LLVM 23.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64 void PreprocessISelDAG() override;
65
66 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
67 /// inline asm expressions.
68 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
69 InlineAsm::ConstraintCode ConstraintID,
70 std::vector<SDValue> &OutOps) override;
71
72 template <signed Low, signed High, signed Scale>
73 bool SelectRDVLImm(SDValue N, SDValue &Imm);
74
75 template <signed Low, signed High>
76 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
77
78 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
79 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
80 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
81 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
82 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
83 return SelectShiftedRegister(N, false, Reg, Shift);
84 }
85 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
86 return SelectShiftedRegister(N, true, Reg, Shift);
87 }
88 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
89 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
90 }
91 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
92 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
93 }
94 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
95 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
96 }
97 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
98 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
99 }
100 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
101 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
102 }
103 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
104 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
105 }
106 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
107 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
108 }
109 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
110 return SelectAddrModeIndexed(N, 1, Base, OffImm);
111 }
112 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
113 return SelectAddrModeIndexed(N, 2, Base, OffImm);
114 }
115 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
116 return SelectAddrModeIndexed(N, 4, Base, OffImm);
117 }
118 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
119 return SelectAddrModeIndexed(N, 8, Base, OffImm);
120 }
121 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
122 return SelectAddrModeIndexed(N, 16, Base, OffImm);
123 }
124 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
125 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
126 }
127 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
128 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
129 }
130 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
131 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
132 }
133 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
134 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
135 }
136 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
137 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
138 }
139 template <unsigned Size, unsigned Max>
140 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
141 // Test if there is an appropriate addressing mode and check if the
142 // immediate fits.
143 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
144 if (Found) {
145 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
146 int64_t C = CI->getSExtValue();
147 if (C <= Max)
148 return true;
149 }
150 }
151
152 // Otherwise, base only, materialize address in register.
153 Base = N;
154 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
155 return true;
156 }
157
158 template<int Width>
159 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
160 SDValue &SignExtend, SDValue &DoShift) {
161 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
162 }
163
164 template<int Width>
165 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
166 SDValue &SignExtend, SDValue &DoShift) {
167 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
168 }
169
170 bool SelectExtractHigh(SDValue N, SDValue &Res) {
171 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
172 N = N->getOperand(0);
173 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
174 !isa<ConstantSDNode>(N->getOperand(1)))
175 return false;
176 EVT VT = N->getValueType(0);
177 EVT LVT = N->getOperand(0).getValueType();
178 unsigned Index = N->getConstantOperandVal(1);
179 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
180 Index != VT.getVectorNumElements())
181 return false;
182 Res = N->getOperand(0);
183 return true;
184 }
185
186 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
187 if (N.getOpcode() != AArch64ISD::VLSHR)
188 return false;
189 SDValue Op = N->getOperand(0);
190 EVT VT = Op.getValueType();
191 unsigned ShtAmt = N->getConstantOperandVal(1);
192 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
193 return false;
194
195 APInt Imm;
196 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
197 Imm = APInt(VT.getScalarSizeInBits(),
198 Op.getOperand(1).getConstantOperandVal(0)
199 << Op.getOperand(1).getConstantOperandVal(1));
200 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
201 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
202 Imm = APInt(VT.getScalarSizeInBits(),
203 Op.getOperand(1).getConstantOperandVal(0));
204 else
205 return false;
206
207 if (Imm != 1ULL << (ShtAmt - 1))
208 return false;
209
210 Res1 = Op.getOperand(0);
211 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
212 return true;
213 }
214
215 bool SelectDupZeroOrUndef(SDValue N) {
216 switch(N->getOpcode()) {
217 case ISD::UNDEF:
218 return true;
219 case AArch64ISD::DUP:
220 case ISD::SPLAT_VECTOR: {
221 auto Opnd0 = N->getOperand(0);
222 if (isNullConstant(Opnd0))
223 return true;
224 if (isNullFPConstant(Opnd0))
225 return true;
226 break;
227 }
228 default:
229 break;
230 }
231
232 return false;
233 }
234
235 bool SelectAny(SDValue) { return true; }
236
237 bool SelectDupZero(SDValue N) {
238 switch(N->getOpcode()) {
239 case AArch64ISD::DUP:
240 case ISD::SPLAT_VECTOR: {
241 auto Opnd0 = N->getOperand(0);
242 if (isNullConstant(Opnd0))
243 return true;
244 if (isNullFPConstant(Opnd0))
245 return true;
246 break;
247 }
248 }
249
250 return false;
251 }
252
253 template <MVT::SimpleValueType VT, bool Negate>
254 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
255 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
256 }
257
258 template <MVT::SimpleValueType VT, bool Negate>
259 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
260 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
261 }
262
263 template <MVT::SimpleValueType VT>
264 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
265 return SelectSVECpyDupImm(N, VT, Imm, Shift);
266 }
267
268 template <MVT::SimpleValueType VT, bool Invert = false>
269 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
270 return SelectSVELogicalImm(N, VT, Imm, Invert);
271 }
272
273 template <MVT::SimpleValueType VT>
274 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
275 return SelectSVEArithImm(N, VT, Imm);
276 }
277
278 template <unsigned Low, unsigned High, bool AllowSaturation = false>
279 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
280 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
281 }
282
283 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
284 if (N->getOpcode() != ISD::SPLAT_VECTOR)
285 return false;
286
287 EVT EltVT = N->getValueType(0).getVectorElementType();
288 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
289 /* High */ EltVT.getFixedSizeInBits(),
290 /* AllowSaturation */ true, Imm);
291 }
292
293 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
294 template<signed Min, signed Max, signed Scale, bool Shift>
295 bool SelectCntImm(SDValue N, SDValue &Imm) {
297 return false;
298
299 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
300 if (Shift)
301 MulImm = 1LL << MulImm;
302
303 if ((MulImm % std::abs(Scale)) != 0)
304 return false;
305
306 MulImm /= Scale;
307 if ((MulImm >= Min) && (MulImm <= Max)) {
308 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
309 return true;
310 }
311
312 return false;
313 }
314
315 template <signed Max, signed Scale>
316 bool SelectEXTImm(SDValue N, SDValue &Imm) {
318 return false;
319
320 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
321
322 if (MulImm >= 0 && MulImm <= Max) {
323 MulImm *= Scale;
324 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
325 return true;
326 }
327
328 return false;
329 }
330
331 template <unsigned BaseReg, unsigned Max>
332 bool ImmToReg(SDValue N, SDValue &Imm) {
333 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
334 uint64_t C = CI->getZExtValue();
335
336 if (C > Max)
337 return false;
338
339 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
340 return true;
341 }
342 return false;
343 }
344
345 /// Form sequences of consecutive 64/128-bit registers for use in NEON
346 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
347 /// between 1 and 4 elements. If it contains a single element that is returned
348 /// unchanged; otherwise a REG_SEQUENCE value is returned.
351 // Form a sequence of SVE registers for instructions using list of vectors,
352 // e.g. structured loads and stores (ldN, stN).
353 SDValue createZTuple(ArrayRef<SDValue> Vecs);
354
355 // Similar to above, except the register must start at a multiple of the
356 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
357 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
358
359 /// Generic helper for the createDTuple/createQTuple
360 /// functions. Those should almost always be called instead.
361 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
362 const unsigned SubRegs[]);
363
364 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
365
366 bool tryIndexedLoad(SDNode *N);
367
368 void SelectPtrauthAuth(SDNode *N);
369 void SelectPtrauthResign(SDNode *N);
370
371 bool trySelectStackSlotTagP(SDNode *N);
372 void SelectTagP(SDNode *N);
373
374 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
375 unsigned SubRegIdx);
376 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
377 unsigned SubRegIdx);
378 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
380 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
381 unsigned Opc_rr, unsigned Opc_ri,
382 bool IsIntr = false);
383 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
384 unsigned Scale, unsigned Opc_ri,
385 unsigned Opc_rr);
386 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
387 bool IsZmMulti, unsigned Opcode,
388 bool HasPred = false);
389 void SelectPExtPair(SDNode *N, unsigned Opc);
390 void SelectWhilePair(SDNode *N, unsigned Opc);
391 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
394 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
395 bool IsTupleInput, unsigned Opc);
396 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
397
398 template <unsigned MaxIdx, unsigned Scale>
399 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
400 unsigned Op);
401 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
402 unsigned Op, unsigned MaxIdx, unsigned Scale,
403 unsigned BaseReg = 0);
404 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
405 /// SVE Reg+Imm addressing mode.
406 template <int64_t Min, int64_t Max>
407 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
408 SDValue &OffImm);
409 /// SVE Reg+Reg address mode.
410 template <unsigned Scale>
411 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
412 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
413 }
414
415 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
416 unsigned Opc, uint32_t MaxImm);
417
418 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
419
420 template <unsigned MaxIdx, unsigned Scale>
421 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
422 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
423 }
424
425 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
426 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
429 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
430 unsigned Opc_rr, unsigned Opc_ri);
431 std::tuple<unsigned, SDValue, SDValue>
432 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
433 const SDValue &OldBase, const SDValue &OldOffset,
434 unsigned Scale);
435
436 bool tryBitfieldExtractOp(SDNode *N);
437 bool tryBitfieldExtractOpFromSExt(SDNode *N);
438 bool tryBitfieldInsertOp(SDNode *N);
439 bool tryBitfieldInsertInZeroOp(SDNode *N);
440 bool tryShiftAmountMod(SDNode *N);
441
442 bool tryReadRegister(SDNode *N);
443 bool tryWriteRegister(SDNode *N);
444
445 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
446 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
447
448 bool trySelectXAR(SDNode *N);
449
450 SDValue tryFoldCselToFMaxMin(SDNode &N);
451
452// Include the pieces autogenerated from the target description.
453#include "AArch64GenDAGISel.inc"
454
455private:
456 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
457 SDValue &Shift);
458 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
459 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
460 SDValue &OffImm) {
461 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
462 }
463 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
464 unsigned Size, SDValue &Base,
465 SDValue &OffImm);
466 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
467 SDValue &OffImm);
468 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
469 SDValue &OffImm);
470 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
471 SDValue &Offset, SDValue &SignExtend,
472 SDValue &DoShift);
473 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
474 SDValue &Offset, SDValue &SignExtend,
475 SDValue &DoShift);
476 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
477 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
478 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
479 SDValue &Offset, SDValue &SignExtend);
480
481 template<unsigned RegWidth>
482 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
483 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
484 }
485 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
486
487 template <unsigned RegWidth>
488 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos) {
489 return SelectCVTFixedPointVec(N, FixedPos, RegWidth);
490 }
491 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos, unsigned Width);
492
493 template<unsigned RegWidth>
494 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
495 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
496 }
497
498 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
499 unsigned Width);
500
501 template <unsigned FloatWidth>
502 bool SelectCVTFixedPosRecipOperandVec(SDValue N, SDValue &FixedPos) {
503 return SelectCVTFixedPosRecipOperandVec(N, FixedPos, FloatWidth);
504 }
505
506 bool SelectCVTFixedPosRecipOperandVec(SDValue N, SDValue &FixedPos,
507 unsigned Width);
508
509 bool SelectCMP_SWAP(SDNode *N);
510
511 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
512 bool Negate);
513 bool SelectSVEAddSubImm(SDLoc DL, APInt Value, MVT VT, SDValue &Imm,
514 SDValue &Shift, bool Negate);
515 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
516 bool Negate);
517 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
518 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
519
520 // Match `<NEON Splat> SVEImm` (where <NEON Splat> could be fmov, movi, etc).
521 bool SelectNEONSplatOfSVELogicalImm(SDValue N, SDValue &Imm);
522 bool SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift);
523 bool SelectNEONSplatOfSVEArithSImm(SDValue N, SDValue &Imm);
524
525 bool SelectSVESignedArithImm(SDLoc DL, APInt Value, SDValue &Imm);
526 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
527 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
528 bool AllowSaturation, SDValue &Imm);
529
530 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
531 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
532 SDValue &Offset);
533 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
534 SDValue &Offset, unsigned Scale = 1);
535
536 bool SelectAllActivePredicate(SDValue N);
537 bool SelectAnyPredicate(SDValue N);
538
539 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
540
541 template <bool MatchCBB>
542 bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
543};
544
545class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
546public:
547 static char ID;
548 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
549 CodeGenOptLevel OptLevel)
551 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
552};
553} // end anonymous namespace
554
555char AArch64DAGToDAGISelLegacy::ID = 0;
556
557INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
558
559/// addBitcastHints - This method adds bitcast hints to the operands of a node
560/// to help instruction selector determine which operands are in Neon registers.
562 SDLoc DL(&N);
563 auto getFloatVT = [&](EVT VT) {
564 EVT ScalarVT = VT.getScalarType();
565 assert((ScalarVT == MVT::i32 || ScalarVT == MVT::i64) && "Unexpected VT");
566 return VT.changeElementType(*(DAG.getContext()),
567 ScalarVT == MVT::i32 ? MVT::f32 : MVT::f64);
568 };
570 NewOps.reserve(N.getNumOperands());
571
572 for (unsigned I = 0, E = N.getNumOperands(); I < E; ++I) {
573 auto bitcasted = DAG.getBitcast(getFloatVT(N.getOperand(I).getValueType()),
574 N.getOperand(I));
575 NewOps.push_back(bitcasted);
576 }
577 EVT OrigVT = N.getValueType(0);
578 SDValue OpNode = DAG.getNode(N.getOpcode(), DL, getFloatVT(OrigVT), NewOps);
579 return DAG.getBitcast(OrigVT, OpNode);
580}
581
582/// isIntImmediate - This method tests to see if the node is a constant
583/// operand. If so Imm will receive the 64-bit value.
584static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
586 Imm = C->getZExtValue();
587 return true;
588 }
589 return false;
590}
591
592// isIntImmediate - This method tests to see if a constant operand.
593// If so Imm will receive the value.
594static bool isIntImmediate(SDValue N, uint64_t &Imm) {
595 return isIntImmediate(N.getNode(), Imm);
596}
597
598// isOpcWithIntImmediate - This method tests to see if the node is a specific
599// opcode and that it has a immediate integer right operand.
600// If so Imm will receive the 32 bit value.
601static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
602 uint64_t &Imm) {
603 return N->getOpcode() == Opc &&
604 isIntImmediate(N->getOperand(1).getNode(), Imm);
605}
606
607// isIntImmediateEq - This method tests to see if N is a constant operand that
608// is equivalent to 'ImmExpected'.
609#ifndef NDEBUG
610static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
611 uint64_t Imm;
612 if (!isIntImmediate(N.getNode(), Imm))
613 return false;
614 return Imm == ImmExpected;
615}
616#endif
617
618static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth) {
619 assert(RegWidth == 32 || RegWidth == 64);
620 if (RegWidth == 32)
621 return APInt(RegWidth,
623 return APInt(RegWidth, AArch64_AM::decodeAdvSIMDModImmType12(Imm));
624}
625
626// Decodes the raw integer splat value from a NEON splat operation.
627static std::optional<APInt> DecodeNEONSplat(SDValue N) {
628 assert(N.getValueType().isInteger() && "Only integers are supported");
629 if (N->getOpcode() == AArch64ISD::NVCAST)
630 N = N->getOperand(0);
631 unsigned SplatWidth = N.getScalarValueSizeInBits();
632 if (N.getOpcode() == AArch64ISD::FMOV)
633 return DecodeFMOVImm(N.getConstantOperandVal(0), SplatWidth);
634 if (N->getOpcode() == AArch64ISD::MOVI)
635 return APInt(SplatWidth, N.getConstantOperandVal(0));
636 if (N->getOpcode() == AArch64ISD::MOVIshift)
637 return APInt(SplatWidth, N.getConstantOperandVal(0)
638 << N.getConstantOperandVal(1));
639 if (N->getOpcode() == AArch64ISD::MVNIshift)
640 return ~APInt(SplatWidth, N.getConstantOperandVal(0)
641 << N.getConstantOperandVal(1));
642 if (N->getOpcode() == AArch64ISD::MOVIedit)
644 N.getConstantOperandVal(0)));
645 if (N->getOpcode() == AArch64ISD::DUP)
646 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(0)))
647 return Const->getAPIntValue().trunc(SplatWidth);
648 // TODO: Recognize more splat-like NEON operations. See ConstantBuildVector
649 // in AArch64ISelLowering.
650 return std::nullopt;
651}
652
653// If \p N is a NEON splat operation (movi, fmov, etc), return the splat value
654// matching the element size of N.
655static std::optional<APInt> GetNEONSplatValue(SDValue N) {
656 unsigned SplatWidth = N.getScalarValueSizeInBits();
657 if (std::optional<APInt> SplatVal = DecodeNEONSplat(N)) {
658 if (SplatVal->getBitWidth() <= SplatWidth)
659 return APInt::getSplat(SplatWidth, *SplatVal);
660 if (SplatVal->isSplat(SplatWidth))
661 return SplatVal->trunc(SplatWidth);
662 }
663 return std::nullopt;
664}
665
666bool AArch64DAGToDAGISel::SelectNEONSplatOfSVELogicalImm(SDValue N,
667 SDValue &Imm) {
668 std::optional<APInt> ImmVal = GetNEONSplatValue(N);
669 if (!ImmVal)
670 return false;
671 uint64_t Encoding;
672 if (!AArch64_AM::isSVELogicalImm(N.getScalarValueSizeInBits(),
673 ImmVal->getZExtValue(), Encoding))
674 return false;
675
676 Imm = CurDAG->getTargetConstant(Encoding, SDLoc(N), MVT::i64);
677 return true;
678}
679
680bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm,
681 SDValue &Shift) {
682 if (std::optional<APInt> ImmVal = GetNEONSplatValue(N))
683 return SelectSVEAddSubImm(SDLoc(N), *ImmVal,
684 N.getValueType().getScalarType().getSimpleVT(),
685 Imm, Shift,
686 /*Negate=*/false);
687 return false;
688}
689
690bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEArithSImm(SDValue N,
691 SDValue &Imm) {
692 if (std::optional<APInt> ImmVal = GetNEONSplatValue(N))
693 return SelectSVESignedArithImm(SDLoc(N), *ImmVal, Imm);
694 return false;
695}
696
697bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
698 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
699 std::vector<SDValue> &OutOps) {
700 switch(ConstraintID) {
701 default:
702 llvm_unreachable("Unexpected asm memory constraint");
703 case InlineAsm::ConstraintCode::m:
704 case InlineAsm::ConstraintCode::o:
705 case InlineAsm::ConstraintCode::Q:
706 // We need to make sure that this one operand does not end up in XZR, thus
707 // require the address to be in a PointerRegClass register.
708 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
709 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
710 SDLoc dl(Op);
711 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
712 SDValue NewOp =
713 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
714 dl, Op.getValueType(),
715 Op, RC), 0);
716 OutOps.push_back(NewOp);
717 return false;
718 }
719 return true;
720}
721
722/// SelectArithImmed - Select an immediate value that can be represented as
723/// a 12-bit value shifted left by either 0 or 12. If so, return true with
724/// Val set to the 12-bit value and Shift set to the shifter operand.
725bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
726 SDValue &Shift) {
727 // This function is called from the addsub_shifted_imm ComplexPattern,
728 // which lists [imm] as the list of opcode it's interested in, however
729 // we still need to check whether the operand is actually an immediate
730 // here because the ComplexPattern opcode list is only used in
731 // root-level opcode matching.
732 if (!isa<ConstantSDNode>(N.getNode()))
733 return false;
734
735 uint64_t Immed = N.getNode()->getAsZExtVal();
736 unsigned ShiftAmt;
737
738 if (Immed >> 12 == 0) {
739 ShiftAmt = 0;
740 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
741 ShiftAmt = 12;
742 Immed = Immed >> 12;
743 } else
744 return false;
745
746 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
747 SDLoc dl(N);
748 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
749 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
750 return true;
751}
752
753/// SelectNegArithImmed - As above, but negates the value before trying to
754/// select it.
755bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
756 SDValue &Shift) {
757 // This function is called from the addsub_shifted_imm ComplexPattern,
758 // which lists [imm] as the list of opcode it's interested in, however
759 // we still need to check whether the operand is actually an immediate
760 // here because the ComplexPattern opcode list is only used in
761 // root-level opcode matching.
762 if (!isa<ConstantSDNode>(N.getNode()))
763 return false;
764
765 // The immediate operand must be a 24-bit zero-extended immediate.
766 uint64_t Immed = N.getNode()->getAsZExtVal();
767
768 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
769 // have the opposite effect on the C flag, so this pattern mustn't match under
770 // those circumstances.
771 if (Immed == 0)
772 return false;
773
774 if (N.getValueType() == MVT::i32)
775 Immed = ~((uint32_t)Immed) + 1;
776 else
777 Immed = ~Immed + 1ULL;
778 if (Immed & 0xFFFFFFFFFF000000ULL)
779 return false;
780
781 Immed &= 0xFFFFFFULL;
782 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
783 Shift);
784}
785
786/// getShiftTypeForNode - Translate a shift node to the corresponding
787/// ShiftType value.
789 switch (N.getOpcode()) {
790 default:
792 case ISD::SHL:
793 return AArch64_AM::LSL;
794 case ISD::SRL:
795 return AArch64_AM::LSR;
796 case ISD::SRA:
797 return AArch64_AM::ASR;
798 case ISD::ROTR:
799 return AArch64_AM::ROR;
800 }
801}
802
804 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
805}
806
807/// Determine whether it is worth it to fold SHL into the addressing
808/// mode.
810 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
811 // It is worth folding logical shift of up to three places.
812 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
813 if (!CSD)
814 return false;
815 unsigned ShiftVal = CSD->getZExtValue();
816 if (ShiftVal > 3)
817 return false;
818
819 // Check if this particular node is reused in any non-memory related
820 // operation. If yes, do not try to fold this node into the address
821 // computation, since the computation will be kept.
822 const SDNode *Node = V.getNode();
823 for (SDNode *UI : Node->users())
824 if (!isMemOpOrPrefetch(UI))
825 for (SDNode *UII : UI->users())
826 if (!isMemOpOrPrefetch(UII))
827 return false;
828 return true;
829}
830
831/// Determine whether it is worth to fold V into an extended register addressing
832/// mode.
833bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
834 // Trivial if we are optimizing for code size or if there is only
835 // one use of the value.
836 if (CurDAG->shouldOptForSize() || V.hasOneUse())
837 return true;
838
839 // If a subtarget has a slow shift, folding a shift into multiple loads
840 // costs additional micro-ops.
841 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
842 return false;
843
844 // Check whether we're going to emit the address arithmetic anyway because
845 // it's used by a non-address operation.
846 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
847 return true;
848 if (V.getOpcode() == ISD::ADD) {
849 const SDValue LHS = V.getOperand(0);
850 const SDValue RHS = V.getOperand(1);
851 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
852 return true;
853 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
854 return true;
855 }
856
857 // It hurts otherwise, since the value will be reused.
858 return false;
859}
860
861/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
862/// to select more shifted register
863bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
864 SDValue &Shift) {
865 EVT VT = N.getValueType();
866 if (VT != MVT::i32 && VT != MVT::i64)
867 return false;
868
869 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
870 return false;
871 SDValue LHS = N.getOperand(0);
872 if (!LHS->hasOneUse())
873 return false;
874
875 unsigned LHSOpcode = LHS->getOpcode();
876 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
877 return false;
878
879 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
880 if (!ShiftAmtNode)
881 return false;
882
883 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
884 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
885 if (!RHSC)
886 return false;
887
888 APInt AndMask = RHSC->getAPIntValue();
889 unsigned LowZBits, MaskLen;
890 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
891 return false;
892
893 unsigned BitWidth = N.getValueSizeInBits();
894 SDLoc DL(LHS);
895 uint64_t NewShiftC;
896 unsigned NewShiftOp;
897 if (LHSOpcode == ISD::SHL) {
898 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
899 // BitWidth != LowZBits + MaskLen doesn't match the pattern
900 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
901 return false;
902
903 NewShiftC = LowZBits - ShiftAmtC;
904 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
905 } else {
906 if (LowZBits == 0)
907 return false;
908
909 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
910 NewShiftC = LowZBits + ShiftAmtC;
911 if (NewShiftC >= BitWidth)
912 return false;
913
914 // SRA need all high bits
915 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
916 return false;
917
918 // SRL high bits can be 0 or 1
919 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
920 return false;
921
922 if (LHSOpcode == ISD::SRL)
923 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
924 else
925 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
926 }
927
928 assert(NewShiftC < BitWidth && "Invalid shift amount");
929 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
930 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
931 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
932 NewShiftAmt, BitWidthMinus1),
933 0);
934 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
935 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
936 return true;
937}
938
939/// getExtendTypeForNode - Translate an extend node to the corresponding
940/// ExtendType value.
942getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
943 if (N.getOpcode() == ISD::SIGN_EXTEND ||
944 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
945 EVT SrcVT;
946 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
947 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
948 else
949 SrcVT = N.getOperand(0).getValueType();
950
951 if (!IsLoadStore && SrcVT == MVT::i8)
952 return AArch64_AM::SXTB;
953 else if (!IsLoadStore && SrcVT == MVT::i16)
954 return AArch64_AM::SXTH;
955 else if (SrcVT == MVT::i32)
956 return AArch64_AM::SXTW;
957 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
958
960 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
961 N.getOpcode() == ISD::ANY_EXTEND) {
962 EVT SrcVT = N.getOperand(0).getValueType();
963 if (!IsLoadStore && SrcVT == MVT::i8)
964 return AArch64_AM::UXTB;
965 else if (!IsLoadStore && SrcVT == MVT::i16)
966 return AArch64_AM::UXTH;
967 else if (SrcVT == MVT::i32)
968 return AArch64_AM::UXTW;
969 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
970
972 } else if (N.getOpcode() == ISD::AND) {
973 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
974 if (!CSD)
976 uint64_t AndMask = CSD->getZExtValue();
977
978 switch (AndMask) {
979 default:
981 case 0xFF:
982 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
983 case 0xFFFF:
984 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
985 case 0xFFFFFFFF:
986 return AArch64_AM::UXTW;
987 }
988 }
989
991}
992
993/// Determine whether it is worth to fold V into an extended register of an
994/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
995/// instruction, and the shift should be treated as worth folding even if has
996/// multiple uses.
997bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
998 // Trivial if we are optimizing for code size or if there is only
999 // one use of the value.
1000 if (CurDAG->shouldOptForSize() || V.hasOneUse())
1001 return true;
1002
1003 // If a subtarget has a fastpath LSL we can fold a logical shift into
1004 // the add/sub and save a cycle.
1005 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
1006 V.getConstantOperandVal(1) <= 4 &&
1008 return true;
1009
1010 // It hurts otherwise, since the value will be reused.
1011 return false;
1012}
1013
1014/// SelectShiftedRegister - Select a "shifted register" operand. If the value
1015/// is not shifted, set the Shift operand to default of "LSL 0". The logical
1016/// instructions allow the shifted register to be rotated, but the arithmetic
1017/// instructions do not. The AllowROR parameter specifies whether ROR is
1018/// supported.
1019bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
1020 SDValue &Reg, SDValue &Shift) {
1021 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
1022 return true;
1023
1025 if (ShType == AArch64_AM::InvalidShiftExtend)
1026 return false;
1027 if (!AllowROR && ShType == AArch64_AM::ROR)
1028 return false;
1029
1030 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1031 unsigned BitSize = N.getValueSizeInBits();
1032 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
1033 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
1034
1035 Reg = N.getOperand(0);
1036 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
1037 return isWorthFoldingALU(N, true);
1038 }
1039
1040 return false;
1041}
1042
1043/// Instructions that accept extend modifiers like UXTW expect the register
1044/// being extended to be a GPR32, but the incoming DAG might be acting on a
1045/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
1046/// this is the case.
1048 if (N.getValueType() == MVT::i32)
1049 return N;
1050
1051 SDLoc dl(N);
1052 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
1053}
1054
1055// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
1056template<signed Low, signed High, signed Scale>
1057bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
1058 if (!isa<ConstantSDNode>(N))
1059 return false;
1060
1061 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
1062 if ((MulImm % std::abs(Scale)) == 0) {
1063 int64_t RDVLImm = MulImm / Scale;
1064 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
1065 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
1066 return true;
1067 }
1068 }
1069
1070 return false;
1071}
1072
1073// Returns a suitable RDSVL multiplier from a left shift.
1074template <signed Low, signed High>
1075bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
1076 if (!isa<ConstantSDNode>(N))
1077 return false;
1078
1079 int64_t MulImm = 1LL << cast<ConstantSDNode>(N)->getSExtValue();
1080 if (MulImm >= Low && MulImm <= High) {
1081 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
1082 return true;
1083 }
1084
1085 return false;
1086}
1087
1088/// SelectArithExtendedRegister - Select a "extended register" operand. This
1089/// operand folds in an extend followed by an optional left shift.
1090bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
1091 SDValue &Shift) {
1092 unsigned ShiftVal = 0;
1094
1095 if (N.getOpcode() == ISD::SHL) {
1096 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1097 if (!CSD)
1098 return false;
1099 ShiftVal = CSD->getZExtValue();
1100 if (ShiftVal > 4)
1101 return false;
1102
1103 Ext = getExtendTypeForNode(N.getOperand(0));
1105 return false;
1106
1107 Reg = N.getOperand(0).getOperand(0);
1108 } else {
1109 Ext = getExtendTypeForNode(N);
1111 return false;
1112
1113 // Don't match sext of vector extracts. These can use SMOV, but if we match
1114 // this as an extended register, we'll always fold the extend into an ALU op
1115 // user of the extend (which results in a UMOV).
1117 SDValue Op = N.getOperand(0);
1118 if (Op->getOpcode() == ISD::ANY_EXTEND)
1119 Op = Op->getOperand(0);
1120 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
1121 Op.getOperand(0).getValueType().isFixedLengthVector())
1122 return false;
1123 }
1124
1125 Reg = N.getOperand(0);
1126
1127 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
1128 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
1129 auto isDef32 = [](SDValue N) {
1130 unsigned Opc = N.getOpcode();
1131 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
1134 Opc != ISD::FREEZE;
1135 };
1136 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
1137 isDef32(Reg))
1138 return false;
1139 }
1140
1141 // AArch64 mandates that the RHS of the operation must use the smallest
1142 // register class that could contain the size being extended from. Thus,
1143 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1144 // there might not be an actual 32-bit value in the program. We can
1145 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1146 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1147 Reg = narrowIfNeeded(CurDAG, Reg);
1148 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1149 MVT::i32);
1150 return isWorthFoldingALU(N);
1151}
1152
1153/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1154/// operand is referred by the instructions have SP operand
1155bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1156 SDValue &Shift) {
1157 unsigned ShiftVal = 0;
1159
1160 if (N.getOpcode() != ISD::SHL)
1161 return false;
1162
1163 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1164 if (!CSD)
1165 return false;
1166 ShiftVal = CSD->getZExtValue();
1167 if (ShiftVal > 4)
1168 return false;
1169
1170 Ext = AArch64_AM::UXTX;
1171 Reg = N.getOperand(0);
1172 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1173 MVT::i32);
1174 return isWorthFoldingALU(N);
1175}
1176
1177/// If there's a use of this ADDlow that's not itself a load/store then we'll
1178/// need to create a real ADD instruction from it anyway and there's no point in
1179/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1180/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1181/// leads to duplicated ADRP instructions.
1183 for (auto *User : N->users()) {
1184 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1185 User->getOpcode() != ISD::ATOMIC_LOAD &&
1186 User->getOpcode() != ISD::ATOMIC_STORE)
1187 return false;
1188
1189 // ldar and stlr have much more restrictive addressing modes (just a
1190 // register).
1191 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1192 return false;
1193 }
1194
1195 return true;
1196}
1197
1198/// Check if the immediate offset is valid as a scaled immediate.
1199static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1200 unsigned Size) {
1201 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1202 Offset < (Range << Log2_32(Size)))
1203 return true;
1204 return false;
1205}
1206
1207/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1208/// immediate" address. The "Size" argument is the size in bytes of the memory
1209/// reference, which determines the scale.
1210bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1211 unsigned BW, unsigned Size,
1212 SDValue &Base,
1213 SDValue &OffImm) {
1214 SDLoc dl(N);
1215 const DataLayout &DL = CurDAG->getDataLayout();
1216 const TargetLowering *TLI = getTargetLowering();
1217 if (N.getOpcode() == ISD::FrameIndex) {
1218 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1219 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1220 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1221 return true;
1222 }
1223
1224 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1225 // selected here doesn't support labels/immediates, only base+offset.
1226 if (CurDAG->isBaseWithConstantOffset(N)) {
1227 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1228 if (IsSignedImm) {
1229 int64_t RHSC = RHS->getSExtValue();
1230 unsigned Scale = Log2_32(Size);
1231 int64_t Range = 0x1LL << (BW - 1);
1232
1233 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1234 RHSC < (Range << Scale)) {
1235 Base = N.getOperand(0);
1236 if (Base.getOpcode() == ISD::FrameIndex) {
1237 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1238 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1239 }
1240 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1241 return true;
1242 }
1243 } else {
1244 // unsigned Immediate
1245 uint64_t RHSC = RHS->getZExtValue();
1246 unsigned Scale = Log2_32(Size);
1247 uint64_t Range = 0x1ULL << BW;
1248
1249 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1250 Base = N.getOperand(0);
1251 if (Base.getOpcode() == ISD::FrameIndex) {
1252 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1253 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1254 }
1255 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1256 return true;
1257 }
1258 }
1259 }
1260 }
1261 // Base only. The address will be materialized into a register before
1262 // the memory is accessed.
1263 // add x0, Xbase, #offset
1264 // stp x1, x2, [x0]
1265 Base = N;
1266 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1267 return true;
1268}
1269
1270/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1271/// immediate" address. The "Size" argument is the size in bytes of the memory
1272/// reference, which determines the scale.
1273bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1274 SDValue &Base, SDValue &OffImm) {
1275 SDLoc dl(N);
1276 const DataLayout &DL = CurDAG->getDataLayout();
1277 const TargetLowering *TLI = getTargetLowering();
1278 if (N.getOpcode() == ISD::FrameIndex) {
1279 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1280 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1281 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1282 return true;
1283 }
1284
1285 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1286 GlobalAddressSDNode *GAN =
1287 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1288 Base = N.getOperand(0);
1289 OffImm = N.getOperand(1);
1290 if (!GAN)
1291 return true;
1292
1293 if (GAN->getOffset() % Size == 0 &&
1295 return true;
1296 }
1297
1298 if (CurDAG->isBaseWithConstantOffset(N)) {
1299 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1300 int64_t RHSC = (int64_t)RHS->getZExtValue();
1301 unsigned Scale = Log2_32(Size);
1302 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1303 Base = N.getOperand(0);
1304 if (Base.getOpcode() == ISD::FrameIndex) {
1305 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1306 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1307 }
1308 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1309 return true;
1310 }
1311 }
1312 }
1313
1314 // Before falling back to our general case, check if the unscaled
1315 // instructions can handle this. If so, that's preferable.
1316 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1317 return false;
1318
1319 // Base only. The address will be materialized into a register before
1320 // the memory is accessed.
1321 // add x0, Xbase, #offset
1322 // ldr x0, [x0]
1323 Base = N;
1324 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1325 return true;
1326}
1327
1328/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1329/// immediate" address. This should only match when there is an offset that
1330/// is not valid for a scaled immediate addressing mode. The "Size" argument
1331/// is the size in bytes of the memory reference, which is needed here to know
1332/// what is valid for a scaled immediate.
1333bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1334 SDValue &Base,
1335 SDValue &OffImm) {
1336 if (!CurDAG->isBaseWithConstantOffset(N))
1337 return false;
1338 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1339 int64_t RHSC = RHS->getSExtValue();
1340 if (RHSC >= -256 && RHSC < 256) {
1341 Base = N.getOperand(0);
1342 if (Base.getOpcode() == ISD::FrameIndex) {
1343 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1344 const TargetLowering *TLI = getTargetLowering();
1345 Base = CurDAG->getTargetFrameIndex(
1346 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1347 }
1348 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1349 return true;
1350 }
1351 }
1352 return false;
1353}
1354
1356 SDLoc dl(N);
1357 SDValue ImpDef = SDValue(
1358 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1359 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1360 N);
1361}
1362
1363/// Check if the given SHL node (\p N), can be used to form an
1364/// extended register for an addressing mode.
1365bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1366 bool WantExtend, SDValue &Offset,
1367 SDValue &SignExtend) {
1368 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1369 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1370 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1371 return false;
1372
1373 SDLoc dl(N);
1374 if (WantExtend) {
1376 getExtendTypeForNode(N.getOperand(0), true);
1378 return false;
1379
1380 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1381 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1382 MVT::i32);
1383 } else {
1384 Offset = N.getOperand(0);
1385 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1386 }
1387
1388 unsigned LegalShiftVal = Log2_32(Size);
1389 unsigned ShiftVal = CSD->getZExtValue();
1390
1391 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1392 return false;
1393
1394 return isWorthFoldingAddr(N, Size);
1395}
1396
1397bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1399 SDValue &SignExtend,
1400 SDValue &DoShift) {
1401 if (N.getOpcode() != ISD::ADD)
1402 return false;
1403 SDValue LHS = N.getOperand(0);
1404 SDValue RHS = N.getOperand(1);
1405 SDLoc dl(N);
1406
1407 // We don't want to match immediate adds here, because they are better lowered
1408 // to the register-immediate addressing modes.
1410 return false;
1411
1412 // Check if this particular node is reused in any non-memory related
1413 // operation. If yes, do not try to fold this node into the address
1414 // computation, since the computation will be kept.
1415 const SDNode *Node = N.getNode();
1416 for (SDNode *UI : Node->users()) {
1417 if (!isMemOpOrPrefetch(UI))
1418 return false;
1419 }
1420
1421 // Remember if it is worth folding N when it produces extended register.
1422 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1423
1424 // Try to match a shifted extend on the RHS.
1425 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1426 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1427 Base = LHS;
1428 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1429 return true;
1430 }
1431
1432 // Try to match a shifted extend on the LHS.
1433 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1434 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1435 Base = RHS;
1436 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1437 return true;
1438 }
1439
1440 // There was no shift, whatever else we find.
1441 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1442
1444 // Try to match an unshifted extend on the LHS.
1445 if (IsExtendedRegisterWorthFolding &&
1446 (Ext = getExtendTypeForNode(LHS, true)) !=
1448 Base = RHS;
1449 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1450 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1451 MVT::i32);
1452 if (isWorthFoldingAddr(LHS, Size))
1453 return true;
1454 }
1455
1456 // Try to match an unshifted extend on the RHS.
1457 if (IsExtendedRegisterWorthFolding &&
1458 (Ext = getExtendTypeForNode(RHS, true)) !=
1460 Base = LHS;
1461 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1462 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1463 MVT::i32);
1464 if (isWorthFoldingAddr(RHS, Size))
1465 return true;
1466 }
1467
1468 return false;
1469}
1470
1471// Check if the given immediate is preferred by ADD. If an immediate can be
1472// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1473// encoded by one MOVZ, return true.
1474static bool isPreferredADD(int64_t ImmOff) {
1475 // Constant in [0x0, 0xfff] can be encoded in ADD.
1476 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1477 return true;
1478 // Check if it can be encoded in an "ADD LSL #12".
1479 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1480 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1481 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1482 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1483 return false;
1484}
1485
1486bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1488 SDValue &SignExtend,
1489 SDValue &DoShift) {
1490 if (N.getOpcode() != ISD::ADD)
1491 return false;
1492 SDValue LHS = N.getOperand(0);
1493 SDValue RHS = N.getOperand(1);
1494 SDLoc DL(N);
1495
1496 // Check if this particular node is reused in any non-memory related
1497 // operation. If yes, do not try to fold this node into the address
1498 // computation, since the computation will be kept.
1499 const SDNode *Node = N.getNode();
1500 for (SDNode *UI : Node->users()) {
1501 if (!isMemOpOrPrefetch(UI))
1502 return false;
1503 }
1504
1505 // Watch out if RHS is a wide immediate, it can not be selected into
1506 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1507 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1508 // instructions like:
1509 // MOV X0, WideImmediate
1510 // ADD X1, BaseReg, X0
1511 // LDR X2, [X1, 0]
1512 // For such situation, using [BaseReg, XReg] addressing mode can save one
1513 // ADD/SUB:
1514 // MOV X0, WideImmediate
1515 // LDR X2, [BaseReg, X0]
1516 if (isa<ConstantSDNode>(RHS)) {
1517 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1518 // Skip the immediate can be selected by load/store addressing mode.
1519 // Also skip the immediate can be encoded by a single ADD (SUB is also
1520 // checked by using -ImmOff).
1521 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1522 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1523 return false;
1524
1525 SDValue Ops[] = { RHS };
1526 SDNode *MOVI =
1527 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1528 SDValue MOVIV = SDValue(MOVI, 0);
1529 // This ADD of two X register will be selected into [Reg+Reg] mode.
1530 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1531 }
1532
1533 // Remember if it is worth folding N when it produces extended register.
1534 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1535
1536 // Try to match a shifted extend on the RHS.
1537 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1538 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1539 Base = LHS;
1540 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1541 return true;
1542 }
1543
1544 // Try to match a shifted extend on the LHS.
1545 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1546 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1547 Base = RHS;
1548 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1549 return true;
1550 }
1551
1552 // Match any non-shifted, non-extend, non-immediate add expression.
1553 Base = LHS;
1554 Offset = RHS;
1555 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1556 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1557 // Reg1 + Reg2 is free: no check needed.
1558 return true;
1559}
1560
1561SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1562 static const unsigned RegClassIDs[] = {
1563 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1564 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1565 AArch64::dsub2, AArch64::dsub3};
1566
1567 return createTuple(Regs, RegClassIDs, SubRegs);
1568}
1569
1570SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1571 static const unsigned RegClassIDs[] = {
1572 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1573 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1574 AArch64::qsub2, AArch64::qsub3};
1575
1576 return createTuple(Regs, RegClassIDs, SubRegs);
1577}
1578
1579SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1580 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1581 AArch64::ZPR3RegClassID,
1582 AArch64::ZPR4RegClassID};
1583 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1584 AArch64::zsub2, AArch64::zsub3};
1585
1586 return createTuple(Regs, RegClassIDs, SubRegs);
1587}
1588
1589SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1590 assert(Regs.size() == 2 || Regs.size() == 4);
1591
1592 // The createTuple interface requires 3 RegClassIDs for each possible
1593 // tuple type even though we only have them for ZPR2 and ZPR4.
1594 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1595 AArch64::ZPR4Mul4RegClassID};
1596 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1597 AArch64::zsub2, AArch64::zsub3};
1598 return createTuple(Regs, RegClassIDs, SubRegs);
1599}
1600
1601SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1602 const unsigned RegClassIDs[],
1603 const unsigned SubRegs[]) {
1604 // There's no special register-class for a vector-list of 1 element: it's just
1605 // a vector.
1606 if (Regs.size() == 1)
1607 return Regs[0];
1608
1609 assert(Regs.size() >= 2 && Regs.size() <= 4);
1610
1611 SDLoc DL(Regs[0]);
1612
1614
1615 // First operand of REG_SEQUENCE is the desired RegClass.
1616 Ops.push_back(
1617 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1618
1619 // Then we get pairs of source & subregister-position for the components.
1620 for (unsigned i = 0; i < Regs.size(); ++i) {
1621 Ops.push_back(Regs[i]);
1622 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1623 }
1624
1625 SDNode *N =
1626 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1627 return SDValue(N, 0);
1628}
1629
1630void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1631 bool isExt) {
1632 SDLoc dl(N);
1633 EVT VT = N->getValueType(0);
1634
1635 unsigned ExtOff = isExt;
1636
1637 // Form a REG_SEQUENCE to force register allocation.
1638 unsigned Vec0Off = ExtOff + 1;
1639 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1640 SDValue RegSeq = createQTuple(Regs);
1641
1643 if (isExt)
1644 Ops.push_back(N->getOperand(1));
1645 Ops.push_back(RegSeq);
1646 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1647 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1648}
1649
1650static std::tuple<SDValue, SDValue>
1652 SDLoc DL(Disc);
1653 SDValue AddrDisc;
1654 SDValue ConstDisc;
1655
1656 // If this is a blend, remember the constant and address discriminators.
1657 // Otherwise, it's either a constant discriminator, or a non-blended
1658 // address discriminator.
1659 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1660 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1661 AddrDisc = Disc->getOperand(1);
1662 ConstDisc = Disc->getOperand(2);
1663 } else {
1664 ConstDisc = Disc;
1665 }
1666
1667 // If the constant discriminator (either the blend RHS, or the entire
1668 // discriminator value) isn't a 16-bit constant, bail out, and let the
1669 // discriminator be computed separately.
1670 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1671 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1672 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1673
1674 // If there's no address discriminator, use XZR directly.
1675 if (!AddrDisc)
1676 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1677
1678 return std::make_tuple(
1679 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1680 AddrDisc);
1681}
1682
1683void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1684 SDLoc DL(N);
1685 // IntrinsicID is operand #0
1686 SDValue Val = N->getOperand(1);
1687 SDValue AUTKey = N->getOperand(2);
1688 SDValue AUTDisc = N->getOperand(3);
1689
1690 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1691 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1692
1693 SDValue AUTAddrDisc, AUTConstDisc;
1694 std::tie(AUTConstDisc, AUTAddrDisc) =
1695 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1696
1697 if (!Subtarget->isX16X17Safer()) {
1698 std::vector<SDValue> Ops = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1699 // Copy deactivation symbol if present.
1700 if (N->getNumOperands() > 4)
1701 Ops.push_back(N->getOperand(4));
1702
1703 SDNode *AUT =
1704 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1705 ReplaceNode(N, AUT);
1706 } else {
1707 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1708 AArch64::X16, Val, SDValue());
1709 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1710
1711 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1712 ReplaceNode(N, AUT);
1713 }
1714}
1715
1716void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1717 SDLoc DL(N);
1718 // IntrinsicID is operand #0, if W_CHAIN it is #1
1719 int OffsetBase = N->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 1 : 0;
1720 SDValue Val = N->getOperand(OffsetBase + 1);
1721 SDValue AUTKey = N->getOperand(OffsetBase + 2);
1722 SDValue AUTDisc = N->getOperand(OffsetBase + 3);
1723 SDValue PACKey = N->getOperand(OffsetBase + 4);
1724 SDValue PACDisc = N->getOperand(OffsetBase + 5);
1725 uint32_t IntNum = N->getConstantOperandVal(OffsetBase + 0);
1726 bool HasLoad = IntNum == Intrinsic::ptrauth_resign_load_relative;
1727
1728 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1729 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1730
1731 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1732 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1733
1734 SDValue AUTAddrDisc, AUTConstDisc;
1735 std::tie(AUTConstDisc, AUTAddrDisc) =
1736 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1737
1738 SDValue PACAddrDisc, PACConstDisc;
1739 std::tie(PACConstDisc, PACAddrDisc) =
1740 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1741
1742 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1743 AArch64::X16, Val, SDValue());
1744
1745 if (HasLoad) {
1746 SDValue Addend = N->getOperand(OffsetBase + 6);
1747 SDValue IncomingChain = N->getOperand(0);
1748 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc,
1749 PACKey, PACConstDisc, PACAddrDisc,
1750 Addend, IncomingChain, X16Copy.getValue(1)};
1751
1752 SDNode *AUTRELLOADPAC = CurDAG->getMachineNode(AArch64::AUTRELLOADPAC, DL,
1753 MVT::i64, MVT::Other, Ops);
1754 ReplaceNode(N, AUTRELLOADPAC);
1755 } else {
1756 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1757 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1758
1759 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1760 ReplaceNode(N, AUTPAC);
1761 }
1762}
1763
1764bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1765 LoadSDNode *LD = cast<LoadSDNode>(N);
1766 if (LD->isUnindexed())
1767 return false;
1768 EVT VT = LD->getMemoryVT();
1769 EVT DstVT = N->getValueType(0);
1770 ISD::MemIndexedMode AM = LD->getAddressingMode();
1771 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1772 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1773 int OffsetVal = (int)OffsetOp->getZExtValue();
1774
1775 // We're not doing validity checking here. That was done when checking
1776 // if we should mark the load as indexed or not. We're just selecting
1777 // the right instruction.
1778 unsigned Opcode = 0;
1779
1780 ISD::LoadExtType ExtType = LD->getExtensionType();
1781 bool InsertTo64 = false;
1782 if (VT == MVT::i64)
1783 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1784 else if (VT == MVT::i32) {
1785 if (ExtType == ISD::NON_EXTLOAD)
1786 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1787 else if (ExtType == ISD::SEXTLOAD)
1788 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1789 else {
1790 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1791 InsertTo64 = true;
1792 // The result of the load is only i32. It's the subreg_to_reg that makes
1793 // it into an i64.
1794 DstVT = MVT::i32;
1795 }
1796 } else if (VT == MVT::i16) {
1797 if (ExtType == ISD::SEXTLOAD) {
1798 if (DstVT == MVT::i64)
1799 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1800 else
1801 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1802 } else {
1803 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1804 InsertTo64 = DstVT == MVT::i64;
1805 // The result of the load is only i32. It's the subreg_to_reg that makes
1806 // it into an i64.
1807 DstVT = MVT::i32;
1808 }
1809 } else if (VT == MVT::i8) {
1810 if (ExtType == ISD::SEXTLOAD) {
1811 if (DstVT == MVT::i64)
1812 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1813 else
1814 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1815 } else {
1816 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1817 InsertTo64 = DstVT == MVT::i64;
1818 // The result of the load is only i32. It's the subreg_to_reg that makes
1819 // it into an i64.
1820 DstVT = MVT::i32;
1821 }
1822 } else if (VT == MVT::f16) {
1823 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1824 } else if (VT == MVT::bf16) {
1825 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1826 } else if (VT == MVT::f32) {
1827 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1828 } else if (VT == MVT::f64 ||
1829 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1830 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1831 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1832 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1833 } else if (VT.is64BitVector()) {
1834 if (IsPre || OffsetVal != 8)
1835 return false;
1836 switch (VT.getScalarSizeInBits()) {
1837 case 8:
1838 Opcode = AArch64::LD1Onev8b_POST;
1839 break;
1840 case 16:
1841 Opcode = AArch64::LD1Onev4h_POST;
1842 break;
1843 case 32:
1844 Opcode = AArch64::LD1Onev2s_POST;
1845 break;
1846 case 64:
1847 Opcode = AArch64::LD1Onev1d_POST;
1848 break;
1849 default:
1850 llvm_unreachable("Expected vector element to be a power of 2");
1851 }
1852 } else if (VT.is128BitVector()) {
1853 if (IsPre || OffsetVal != 16)
1854 return false;
1855 switch (VT.getScalarSizeInBits()) {
1856 case 8:
1857 Opcode = AArch64::LD1Onev16b_POST;
1858 break;
1859 case 16:
1860 Opcode = AArch64::LD1Onev8h_POST;
1861 break;
1862 case 32:
1863 Opcode = AArch64::LD1Onev4s_POST;
1864 break;
1865 case 64:
1866 Opcode = AArch64::LD1Onev2d_POST;
1867 break;
1868 default:
1869 llvm_unreachable("Expected vector element to be a power of 2");
1870 }
1871 } else
1872 return false;
1873 SDValue Chain = LD->getChain();
1874 SDValue Base = LD->getBasePtr();
1875 SDLoc dl(N);
1876 // LD1 encodes an immediate offset by using XZR as the offset register.
1877 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1878 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1879 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1880 SDValue Ops[] = { Base, Offset, Chain };
1881 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1882 MVT::Other, Ops);
1883
1884 // Transfer memoperands.
1885 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1886 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1887
1888 // Either way, we're replacing the node, so tell the caller that.
1889 SDValue LoadedVal = SDValue(Res, 1);
1890 if (InsertTo64) {
1891 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1892 LoadedVal = SDValue(CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, dl,
1893 MVT::i64, LoadedVal, SubReg),
1894 0);
1895 }
1896
1897 ReplaceUses(SDValue(N, 0), LoadedVal);
1898 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1899 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1900 CurDAG->RemoveDeadNode(N);
1901 return true;
1902}
1903
1904void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1905 unsigned SubRegIdx) {
1906 SDLoc dl(N);
1907 EVT VT = N->getValueType(0);
1908 SDValue Chain = N->getOperand(0);
1909
1910 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1911 Chain};
1912
1913 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1914
1915 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1916 SDValue SuperReg = SDValue(Ld, 0);
1917 for (unsigned i = 0; i < NumVecs; ++i)
1918 ReplaceUses(SDValue(N, i),
1919 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1920
1921 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1922
1923 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1924 // because it's too simple to have needed special treatment during lowering.
1925 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1926 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1927 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1928 }
1929
1930 CurDAG->RemoveDeadNode(N);
1931}
1932
1933void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1934 unsigned Opc, unsigned SubRegIdx) {
1935 SDLoc dl(N);
1936 EVT VT = N->getValueType(0);
1937 SDValue Chain = N->getOperand(0);
1938
1939 SDValue Ops[] = {N->getOperand(1), // Mem operand
1940 N->getOperand(2), // Incremental
1941 Chain};
1942
1943 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1944 MVT::Untyped, MVT::Other};
1945
1946 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1947
1948 // Update uses of write back register
1949 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1950
1951 // Update uses of vector list
1952 SDValue SuperReg = SDValue(Ld, 1);
1953 if (NumVecs == 1)
1954 ReplaceUses(SDValue(N, 0), SuperReg);
1955 else
1956 for (unsigned i = 0; i < NumVecs; ++i)
1957 ReplaceUses(SDValue(N, i),
1958 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1959
1960 // Update the chain
1961 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1962 CurDAG->RemoveDeadNode(N);
1963}
1964
1965/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1966/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1967/// new Base and an SDValue representing the new offset.
1968std::tuple<unsigned, SDValue, SDValue>
1969AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1970 unsigned Opc_ri,
1971 const SDValue &OldBase,
1972 const SDValue &OldOffset,
1973 unsigned Scale) {
1974 SDValue NewBase = OldBase;
1975 SDValue NewOffset = OldOffset;
1976 // Detect a possible Reg+Imm addressing mode.
1977 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1978 N, OldBase, NewBase, NewOffset);
1979
1980 // Detect a possible reg+reg addressing mode, but only if we haven't already
1981 // detected a Reg+Imm one.
1982 const bool IsRegReg =
1983 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1984
1985 // Select the instruction.
1986 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1987}
1988
1989enum class SelectTypeKind {
1990 Int1 = 0,
1991 Int = 1,
1992 FP = 2,
1994};
1995
1996/// This function selects an opcode from a list of opcodes, which is
1997/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1998/// element types, in this order.
1999template <SelectTypeKind Kind>
2000static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
2001 // Only match scalable vector VTs
2002 if (!VT.isScalableVector())
2003 return 0;
2004
2005 EVT EltVT = VT.getVectorElementType();
2006 unsigned Key = VT.getVectorMinNumElements();
2007 switch (Kind) {
2009 break;
2011 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
2012 EltVT != MVT::i64)
2013 return 0;
2014 break;
2016 if (EltVT != MVT::i1)
2017 return 0;
2018 break;
2019 case SelectTypeKind::FP:
2020 if (EltVT == MVT::bf16)
2021 Key = 16;
2022 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
2023 EltVT != MVT::f64)
2024 return 0;
2025 break;
2026 }
2027
2028 unsigned Offset;
2029 switch (Key) {
2030 case 16: // 8-bit or bf16
2031 Offset = 0;
2032 break;
2033 case 8: // 16-bit
2034 Offset = 1;
2035 break;
2036 case 4: // 32-bit
2037 Offset = 2;
2038 break;
2039 case 2: // 64-bit
2040 Offset = 3;
2041 break;
2042 default:
2043 return 0;
2044 }
2045
2046 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
2047}
2048
2049// This function is almost identical to SelectWhilePair, but has an
2050// extra check on the range of the immediate operand.
2051// TODO: Merge these two functions together at some point?
2052void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
2053 // Immediate can be either 0 or 1.
2054 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
2055 if (Imm->getZExtValue() > 1)
2056 return;
2057
2058 SDLoc DL(N);
2059 EVT VT = N->getValueType(0);
2060 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
2061 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2062 SDValue SuperReg = SDValue(WhilePair, 0);
2063
2064 for (unsigned I = 0; I < 2; ++I)
2065 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2066 AArch64::psub0 + I, DL, VT, SuperReg));
2067
2068 CurDAG->RemoveDeadNode(N);
2069}
2070
2071void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
2072 SDLoc DL(N);
2073 EVT VT = N->getValueType(0);
2074
2075 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
2076
2077 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2078 SDValue SuperReg = SDValue(WhilePair, 0);
2079
2080 for (unsigned I = 0; I < 2; ++I)
2081 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2082 AArch64::psub0 + I, DL, VT, SuperReg));
2083
2084 CurDAG->RemoveDeadNode(N);
2085}
2086
2087void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
2088 unsigned Opcode) {
2089 EVT VT = N->getValueType(0);
2090 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2091 SDValue Ops = createZTuple(Regs);
2092 SDLoc DL(N);
2093 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2094 SDValue SuperReg = SDValue(Intrinsic, 0);
2095 for (unsigned i = 0; i < NumVecs; ++i)
2096 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2097 AArch64::zsub0 + i, DL, VT, SuperReg));
2098
2099 CurDAG->RemoveDeadNode(N);
2100}
2101
2102void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
2103 unsigned Opcode) {
2104 SDLoc DL(N);
2105 EVT VT = N->getValueType(0);
2106 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
2107 Ops.push_back(/*Chain*/ N->getOperand(0));
2108
2109 SDNode *Instruction =
2110 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
2111 SDValue SuperReg = SDValue(Instruction, 0);
2112
2113 for (unsigned i = 0; i < NumVecs; ++i)
2114 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2115 AArch64::zsub0 + i, DL, VT, SuperReg));
2116
2117 // Copy chain
2118 unsigned ChainIdx = NumVecs;
2119 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
2120 CurDAG->RemoveDeadNode(N);
2121}
2122
2123void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
2124 unsigned NumVecs,
2125 bool IsZmMulti,
2126 unsigned Opcode,
2127 bool HasPred) {
2128 assert(Opcode != 0 && "Unexpected opcode");
2129
2130 SDLoc DL(N);
2131 EVT VT = N->getValueType(0);
2132 SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID
2134
2135 auto GetMultiVecOperand = [&]() {
2136 SmallVector<SDValue, 4> Regs(OpsIter, OpsIter + NumVecs);
2137 OpsIter += NumVecs;
2138 return createZMulTuple(Regs);
2139 };
2140
2141 if (HasPred)
2142 Ops.push_back(*OpsIter++);
2143
2144 Ops.push_back(GetMultiVecOperand());
2145 if (IsZmMulti)
2146 Ops.push_back(GetMultiVecOperand());
2147 else
2148 Ops.push_back(*OpsIter++);
2149
2150 // Append any remaining operands.
2151 Ops.append(OpsIter, N->op_end());
2152 SDNode *Intrinsic;
2153 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2154 SDValue SuperReg = SDValue(Intrinsic, 0);
2155 for (unsigned i = 0; i < NumVecs; ++i)
2156 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2157 AArch64::zsub0 + i, DL, VT, SuperReg));
2158
2159 CurDAG->RemoveDeadNode(N);
2160}
2161
2162void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2163 unsigned Scale, unsigned Opc_ri,
2164 unsigned Opc_rr, bool IsIntr) {
2165 assert(Scale < 5 && "Invalid scaling value.");
2166 SDLoc DL(N);
2167 EVT VT = N->getValueType(0);
2168 SDValue Chain = N->getOperand(0);
2169
2170 // Optimize addressing mode.
2172 unsigned Opc;
2173 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2174 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2175 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2176
2177 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2178 Base, // Memory operand
2179 Offset, Chain};
2180
2181 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2182
2183 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2184 SDValue SuperReg = SDValue(Load, 0);
2185 for (unsigned i = 0; i < NumVecs; ++i)
2186 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2187 AArch64::zsub0 + i, DL, VT, SuperReg));
2188
2189 // Copy chain
2190 unsigned ChainIdx = NumVecs;
2191 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2192 CurDAG->RemoveDeadNode(N);
2193}
2194
2195void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2196 unsigned NumVecs,
2197 unsigned Scale,
2198 unsigned Opc_ri,
2199 unsigned Opc_rr) {
2200 assert(Scale < 4 && "Invalid scaling value.");
2201 SDLoc DL(N);
2202 EVT VT = N->getValueType(0);
2203 SDValue Chain = N->getOperand(0);
2204
2205 SDValue PNg = N->getOperand(2);
2206 SDValue Base = N->getOperand(3);
2207 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2208 unsigned Opc;
2209 std::tie(Opc, Base, Offset) =
2210 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2211
2212 SDValue Ops[] = {PNg, // Predicate-as-counter
2213 Base, // Memory operand
2214 Offset, Chain};
2215
2216 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2217
2218 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2219 SDValue SuperReg = SDValue(Load, 0);
2220 for (unsigned i = 0; i < NumVecs; ++i)
2221 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2222 AArch64::zsub0 + i, DL, VT, SuperReg));
2223
2224 // Copy chain
2225 unsigned ChainIdx = NumVecs;
2226 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2227 CurDAG->RemoveDeadNode(N);
2228}
2229
2230void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2231 unsigned Opcode) {
2232 if (N->getValueType(0) != MVT::nxv4f32)
2233 return;
2234 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2235}
2236
2237void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2238 unsigned NumOutVecs,
2239 unsigned Opc,
2240 uint32_t MaxImm) {
2241 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2242 if (Imm->getZExtValue() > MaxImm)
2243 return;
2244
2245 SDValue ZtValue;
2246 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2247 return;
2248
2249 SDValue Chain = Node->getOperand(0);
2250 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
2251 SDLoc DL(Node);
2252 EVT VT = Node->getValueType(0);
2253
2254 SDNode *Instruction =
2255 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2256 SDValue SuperReg = SDValue(Instruction, 0);
2257
2258 for (unsigned I = 0; I < NumOutVecs; ++I)
2259 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2260 AArch64::zsub0 + I, DL, VT, SuperReg));
2261
2262 // Copy chain
2263 unsigned ChainIdx = NumOutVecs;
2264 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2265 CurDAG->RemoveDeadNode(Node);
2266}
2267
2268void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2269 unsigned NumOutVecs,
2270 unsigned Opc) {
2271 SDValue ZtValue;
2272 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2273 return;
2274
2275 SDValue Chain = Node->getOperand(0);
2276 SDValue Ops[] = {ZtValue,
2277 createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
2278 Chain};
2279
2280 SDLoc DL(Node);
2281 EVT VT = Node->getValueType(0);
2282
2283 SDNode *Instruction =
2284 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2285 SDValue SuperReg = SDValue(Instruction, 0);
2286
2287 for (unsigned I = 0; I < NumOutVecs; ++I)
2288 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2289 AArch64::zsub0 + I, DL, VT, SuperReg));
2290
2291 // Copy chain
2292 unsigned ChainIdx = NumOutVecs;
2293 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2294 CurDAG->RemoveDeadNode(Node);
2295}
2296
2297void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2298 unsigned Op) {
2299 SDLoc DL(N);
2300 EVT VT = N->getValueType(0);
2301
2302 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2303 SDValue Zd = createZMulTuple(Regs);
2304 SDValue Zn = N->getOperand(1 + NumVecs);
2305 SDValue Zm = N->getOperand(2 + NumVecs);
2306
2307 SDValue Ops[] = {Zd, Zn, Zm};
2308
2309 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2310 SDValue SuperReg = SDValue(Intrinsic, 0);
2311 for (unsigned i = 0; i < NumVecs; ++i)
2312 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2313 AArch64::zsub0 + i, DL, VT, SuperReg));
2314
2315 CurDAG->RemoveDeadNode(N);
2316}
2317
2318bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2319 switch (BaseReg) {
2320 default:
2321 return false;
2322 case AArch64::ZA:
2323 case AArch64::ZAB0:
2324 if (TileNum == 0)
2325 break;
2326 return false;
2327 case AArch64::ZAH0:
2328 if (TileNum <= 1)
2329 break;
2330 return false;
2331 case AArch64::ZAS0:
2332 if (TileNum <= 3)
2333 break;
2334 return false;
2335 case AArch64::ZAD0:
2336 if (TileNum <= 7)
2337 break;
2338 return false;
2339 }
2340
2341 BaseReg += TileNum;
2342 return true;
2343}
2344
2345template <unsigned MaxIdx, unsigned Scale>
2346void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2347 unsigned BaseReg, unsigned Op) {
2348 unsigned TileNum = 0;
2349 if (BaseReg != AArch64::ZA)
2350 TileNum = N->getConstantOperandVal(2);
2351
2352 if (!SelectSMETile(BaseReg, TileNum))
2353 return;
2354
2355 SDValue SliceBase, Base, Offset;
2356 if (BaseReg == AArch64::ZA)
2357 SliceBase = N->getOperand(2);
2358 else
2359 SliceBase = N->getOperand(3);
2360
2361 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2362 return;
2363
2364 SDLoc DL(N);
2365 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2366 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2367 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2368
2369 EVT VT = N->getValueType(0);
2370 for (unsigned I = 0; I < NumVecs; ++I)
2371 ReplaceUses(SDValue(N, I),
2372 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2373 SDValue(Mov, 0)));
2374 // Copy chain
2375 unsigned ChainIdx = NumVecs;
2376 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2377 CurDAG->RemoveDeadNode(N);
2378}
2379
2380void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2381 unsigned Op, unsigned MaxIdx,
2382 unsigned Scale, unsigned BaseReg) {
2383 // Slice can be in different positions
2384 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2385 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2386 SDValue SliceBase = N->getOperand(2);
2387 if (BaseReg != AArch64::ZA)
2388 SliceBase = N->getOperand(3);
2389
2391 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2392 return;
2393 // The correct Za tile number is computed in Machine Instruction
2394 // See EmitZAInstr
2395 // DAG cannot select Za tile as an output register with ZReg
2396 SDLoc DL(N);
2398 if (BaseReg != AArch64::ZA )
2399 Ops.push_back(N->getOperand(2));
2400 Ops.push_back(Base);
2401 Ops.push_back(Offset);
2402 Ops.push_back(N->getOperand(0)); //Chain
2403 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2404
2405 EVT VT = N->getValueType(0);
2406 for (unsigned I = 0; I < NumVecs; ++I)
2407 ReplaceUses(SDValue(N, I),
2408 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2409 SDValue(Mov, 0)));
2410
2411 // Copy chain
2412 unsigned ChainIdx = NumVecs;
2413 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2414 CurDAG->RemoveDeadNode(N);
2415}
2416
2417void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2418 unsigned NumOutVecs,
2419 bool IsTupleInput,
2420 unsigned Opc) {
2421 SDLoc DL(N);
2422 EVT VT = N->getValueType(0);
2423 unsigned NumInVecs = N->getNumOperands() - 1;
2424
2426 if (IsTupleInput) {
2427 assert((NumInVecs == 2 || NumInVecs == 4) &&
2428 "Don't know how to handle multi-register input!");
2429 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2430 Ops.push_back(createZMulTuple(Regs));
2431 } else {
2432 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2433 for (unsigned I = 0; I < NumInVecs; I++)
2434 Ops.push_back(N->getOperand(1 + I));
2435 }
2436
2437 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2438 SDValue SuperReg = SDValue(Res, 0);
2439
2440 for (unsigned I = 0; I < NumOutVecs; I++)
2441 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2442 AArch64::zsub0 + I, DL, VT, SuperReg));
2443 CurDAG->RemoveDeadNode(N);
2444}
2445
2446void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2447 unsigned Opc) {
2448 SDLoc dl(N);
2449 EVT VT = N->getOperand(2)->getValueType(0);
2450
2451 // Form a REG_SEQUENCE to force register allocation.
2452 bool Is128Bit = VT.getSizeInBits() == 128;
2453 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2454 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2455
2456 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2457 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2458
2459 // Transfer memoperands.
2460 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2461 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2462
2463 ReplaceNode(N, St);
2464}
2465
2466void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2467 unsigned Scale, unsigned Opc_rr,
2468 unsigned Opc_ri) {
2469 SDLoc dl(N);
2470
2471 // Form a REG_SEQUENCE to force register allocation.
2472 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2473 SDValue RegSeq = createZTuple(Regs);
2474
2475 // Optimize addressing mode.
2476 unsigned Opc;
2478 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2479 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2480 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2481
2482 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2483 Base, // address
2484 Offset, // offset
2485 N->getOperand(0)}; // chain
2486 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2487
2488 ReplaceNode(N, St);
2489}
2490
2491bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2492 SDValue &OffImm) {
2493 SDLoc dl(N);
2494 const DataLayout &DL = CurDAG->getDataLayout();
2495 const TargetLowering *TLI = getTargetLowering();
2496
2497 // Try to match it for the frame address
2498 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2499 int FI = FINode->getIndex();
2500 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2501 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2502 return true;
2503 }
2504
2505 return false;
2506}
2507
2508void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2509 unsigned Opc) {
2510 SDLoc dl(N);
2511 EVT VT = N->getOperand(2)->getValueType(0);
2512 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2513 MVT::Other}; // Type for the Chain
2514
2515 // Form a REG_SEQUENCE to force register allocation.
2516 bool Is128Bit = VT.getSizeInBits() == 128;
2517 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2518 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2519
2520 SDValue Ops[] = {RegSeq,
2521 N->getOperand(NumVecs + 1), // base register
2522 N->getOperand(NumVecs + 2), // Incremental
2523 N->getOperand(0)}; // Chain
2524 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2525
2526 ReplaceNode(N, St);
2527}
2528
2529namespace {
2530/// WidenVector - Given a value in the V64 register class, produce the
2531/// equivalent value in the V128 register class.
2532class WidenVector {
2533 SelectionDAG &DAG;
2534
2535public:
2536 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2537
2538 SDValue operator()(SDValue V64Reg) {
2539 EVT VT = V64Reg.getValueType();
2540 unsigned NarrowSize = VT.getVectorNumElements();
2541 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2542 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2543 SDLoc DL(V64Reg);
2544
2545 SDValue Undef =
2546 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2547 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2548 }
2549};
2550} // namespace
2551
2552/// NarrowVector - Given a value in the V128 register class, produce the
2553/// equivalent value in the V64 register class.
2555 EVT VT = V128Reg.getValueType();
2556 unsigned WideSize = VT.getVectorNumElements();
2557 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2558 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2559
2560 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2561 V128Reg);
2562}
2563
2564void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2565 unsigned Opc) {
2566 SDLoc dl(N);
2567 EVT VT = N->getValueType(0);
2568 bool Narrow = VT.getSizeInBits() == 64;
2569
2570 // Form a REG_SEQUENCE to force register allocation.
2571 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2572
2573 if (Narrow)
2574 transform(Regs, Regs.begin(),
2575 WidenVector(*CurDAG));
2576
2577 SDValue RegSeq = createQTuple(Regs);
2578
2579 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2580
2581 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2582
2583 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2584 N->getOperand(NumVecs + 3), N->getOperand(0)};
2585 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2586 SDValue SuperReg = SDValue(Ld, 0);
2587
2588 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2589 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2590 AArch64::qsub2, AArch64::qsub3 };
2591 for (unsigned i = 0; i < NumVecs; ++i) {
2592 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2593 if (Narrow)
2594 NV = NarrowVector(NV, *CurDAG);
2595 ReplaceUses(SDValue(N, i), NV);
2596 }
2597
2598 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2599 CurDAG->RemoveDeadNode(N);
2600}
2601
2602void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2603 unsigned Opc) {
2604 SDLoc dl(N);
2605 EVT VT = N->getValueType(0);
2606 bool Narrow = VT.getSizeInBits() == 64;
2607
2608 // Form a REG_SEQUENCE to force register allocation.
2609 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2610
2611 if (Narrow)
2612 transform(Regs, Regs.begin(),
2613 WidenVector(*CurDAG));
2614
2615 SDValue RegSeq = createQTuple(Regs);
2616
2617 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2618 RegSeq->getValueType(0), MVT::Other};
2619
2620 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2621
2622 SDValue Ops[] = {RegSeq,
2623 CurDAG->getTargetConstant(LaneNo, dl,
2624 MVT::i64), // Lane Number
2625 N->getOperand(NumVecs + 2), // Base register
2626 N->getOperand(NumVecs + 3), // Incremental
2627 N->getOperand(0)};
2628 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2629
2630 // Update uses of the write back register
2631 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2632
2633 // Update uses of the vector list
2634 SDValue SuperReg = SDValue(Ld, 1);
2635 if (NumVecs == 1) {
2636 ReplaceUses(SDValue(N, 0),
2637 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2638 } else {
2639 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2640 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2641 AArch64::qsub2, AArch64::qsub3 };
2642 for (unsigned i = 0; i < NumVecs; ++i) {
2643 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2644 SuperReg);
2645 if (Narrow)
2646 NV = NarrowVector(NV, *CurDAG);
2647 ReplaceUses(SDValue(N, i), NV);
2648 }
2649 }
2650
2651 // Update the Chain
2652 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2653 CurDAG->RemoveDeadNode(N);
2654}
2655
2656void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2657 unsigned Opc) {
2658 SDLoc dl(N);
2659 EVT VT = N->getOperand(2)->getValueType(0);
2660 bool Narrow = VT.getSizeInBits() == 64;
2661
2662 // Form a REG_SEQUENCE to force register allocation.
2663 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2664
2665 if (Narrow)
2666 transform(Regs, Regs.begin(),
2667 WidenVector(*CurDAG));
2668
2669 SDValue RegSeq = createQTuple(Regs);
2670
2671 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2672
2673 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2674 N->getOperand(NumVecs + 3), N->getOperand(0)};
2675 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2676
2677 // Transfer memoperands.
2678 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2679 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2680
2681 ReplaceNode(N, St);
2682}
2683
2684void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2685 unsigned Opc) {
2686 SDLoc dl(N);
2687 EVT VT = N->getOperand(2)->getValueType(0);
2688 bool Narrow = VT.getSizeInBits() == 64;
2689
2690 // Form a REG_SEQUENCE to force register allocation.
2691 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2692
2693 if (Narrow)
2694 transform(Regs, Regs.begin(),
2695 WidenVector(*CurDAG));
2696
2697 SDValue RegSeq = createQTuple(Regs);
2698
2699 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2700 MVT::Other};
2701
2702 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2703
2704 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2705 N->getOperand(NumVecs + 2), // Base Register
2706 N->getOperand(NumVecs + 3), // Incremental
2707 N->getOperand(0)};
2708 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2709
2710 // Transfer memoperands.
2711 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2712 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2713
2714 ReplaceNode(N, St);
2715}
2716
2718 unsigned &Opc, SDValue &Opd0,
2719 unsigned &LSB, unsigned &MSB,
2720 unsigned NumberOfIgnoredLowBits,
2721 bool BiggerPattern) {
2722 assert(N->getOpcode() == ISD::AND &&
2723 "N must be a AND operation to call this function");
2724
2725 EVT VT = N->getValueType(0);
2726
2727 // Here we can test the type of VT and return false when the type does not
2728 // match, but since it is done prior to that call in the current context
2729 // we turned that into an assert to avoid redundant code.
2730 assert((VT == MVT::i32 || VT == MVT::i64) &&
2731 "Type checking must have been done before calling this function");
2732
2733 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2734 // changed the AND node to a 32-bit mask operation. We'll have to
2735 // undo that as part of the transform here if we want to catch all
2736 // the opportunities.
2737 // Currently the NumberOfIgnoredLowBits argument helps to recover
2738 // from these situations when matching bigger pattern (bitfield insert).
2739
2740 // For unsigned extracts, check for a shift right and mask
2741 uint64_t AndImm = 0;
2742 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2743 return false;
2744
2745 const SDNode *Op0 = N->getOperand(0).getNode();
2746
2747 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2748 // simplified. Try to undo that
2749 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2750
2751 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2752 if (AndImm & (AndImm + 1))
2753 return false;
2754
2755 bool ClampMSB = false;
2756 uint64_t SrlImm = 0;
2757 // Handle the SRL + ANY_EXTEND case.
2758 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2759 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2760 // Extend the incoming operand of the SRL to 64-bit.
2761 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2762 // Make sure to clamp the MSB so that we preserve the semantics of the
2763 // original operations.
2764 ClampMSB = true;
2765 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2767 SrlImm)) {
2768 // If the shift result was truncated, we can still combine them.
2769 Opd0 = Op0->getOperand(0).getOperand(0);
2770
2771 // Use the type of SRL node.
2772 VT = Opd0->getValueType(0);
2773 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2774 Opd0 = Op0->getOperand(0);
2775 ClampMSB = (VT == MVT::i32);
2776 } else if (BiggerPattern) {
2777 // Let's pretend a 0 shift right has been performed.
2778 // The resulting code will be at least as good as the original one
2779 // plus it may expose more opportunities for bitfield insert pattern.
2780 // FIXME: Currently we limit this to the bigger pattern, because
2781 // some optimizations expect AND and not UBFM.
2782 Opd0 = N->getOperand(0);
2783 } else
2784 return false;
2785
2786 // Bail out on large immediates. This happens when no proper
2787 // combining/constant folding was performed.
2788 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2789 LLVM_DEBUG(
2790 (dbgs() << N
2791 << ": Found large shift immediate, this should not happen\n"));
2792 return false;
2793 }
2794
2795 LSB = SrlImm;
2796 MSB = SrlImm +
2797 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2798 : llvm::countr_one<uint64_t>(AndImm)) -
2799 1;
2800 if (ClampMSB)
2801 // Since we're moving the extend before the right shift operation, we need
2802 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2803 // the zeros which would get shifted in with the original right shift
2804 // operation.
2805 MSB = MSB > 31 ? 31 : MSB;
2806
2807 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2808 return true;
2809}
2810
2812 SDValue &Opd0, unsigned &Immr,
2813 unsigned &Imms) {
2814 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2815
2816 EVT VT = N->getValueType(0);
2817 unsigned BitWidth = VT.getSizeInBits();
2818 assert((VT == MVT::i32 || VT == MVT::i64) &&
2819 "Type checking must have been done before calling this function");
2820
2821 SDValue Op = N->getOperand(0);
2822 if (Op->getOpcode() == ISD::TRUNCATE) {
2823 Op = Op->getOperand(0);
2824 VT = Op->getValueType(0);
2825 BitWidth = VT.getSizeInBits();
2826 }
2827
2828 uint64_t ShiftImm;
2829 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2830 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2831 return false;
2832
2833 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2834 if (ShiftImm + Width > BitWidth)
2835 return false;
2836
2837 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2838 Opd0 = Op.getOperand(0);
2839 Immr = ShiftImm;
2840 Imms = ShiftImm + Width - 1;
2841 return true;
2842}
2843
2845 SDValue &Opd0, unsigned &LSB,
2846 unsigned &MSB) {
2847 // We are looking for the following pattern which basically extracts several
2848 // continuous bits from the source value and places it from the LSB of the
2849 // destination value, all other bits of the destination value or set to zero:
2850 //
2851 // Value2 = AND Value, MaskImm
2852 // SRL Value2, ShiftImm
2853 //
2854 // with MaskImm >> ShiftImm to search for the bit width.
2855 //
2856 // This gets selected into a single UBFM:
2857 //
2858 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2859 //
2860
2861 if (N->getOpcode() != ISD::SRL)
2862 return false;
2863
2864 uint64_t AndMask = 0;
2865 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2866 return false;
2867
2868 Opd0 = N->getOperand(0).getOperand(0);
2869
2870 uint64_t SrlImm = 0;
2871 if (!isIntImmediate(N->getOperand(1), SrlImm))
2872 return false;
2873
2874 // Check whether we really have several bits extract here.
2875 if (!isMask_64(AndMask >> SrlImm))
2876 return false;
2877
2878 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2879 LSB = SrlImm;
2880 MSB = llvm::Log2_64(AndMask);
2881 return true;
2882}
2883
2884static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2885 unsigned &Immr, unsigned &Imms,
2886 bool BiggerPattern) {
2887 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2888 "N must be a SHR/SRA operation to call this function");
2889
2890 EVT VT = N->getValueType(0);
2891
2892 // Here we can test the type of VT and return false when the type does not
2893 // match, but since it is done prior to that call in the current context
2894 // we turned that into an assert to avoid redundant code.
2895 assert((VT == MVT::i32 || VT == MVT::i64) &&
2896 "Type checking must have been done before calling this function");
2897
2898 // Check for AND + SRL doing several bits extract.
2899 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2900 return true;
2901
2902 // We're looking for a shift of a shift.
2903 uint64_t ShlImm = 0;
2904 uint64_t TruncBits = 0;
2905 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2906 Opd0 = N->getOperand(0).getOperand(0);
2907 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2908 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2909 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2910 // be considered as setting high 32 bits as zero. Our strategy here is to
2911 // always generate 64bit UBFM. This consistency will help the CSE pass
2912 // later find more redundancy.
2913 Opd0 = N->getOperand(0).getOperand(0);
2914 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2915 VT = Opd0.getValueType();
2916 assert(VT == MVT::i64 && "the promoted type should be i64");
2917 } else if (BiggerPattern) {
2918 // Let's pretend a 0 shift left has been performed.
2919 // FIXME: Currently we limit this to the bigger pattern case,
2920 // because some optimizations expect AND and not UBFM
2921 Opd0 = N->getOperand(0);
2922 } else
2923 return false;
2924
2925 // Missing combines/constant folding may have left us with strange
2926 // constants.
2927 if (ShlImm >= VT.getSizeInBits()) {
2928 LLVM_DEBUG(
2929 (dbgs() << N
2930 << ": Found large shift immediate, this should not happen\n"));
2931 return false;
2932 }
2933
2934 uint64_t SrlImm = 0;
2935 if (!isIntImmediate(N->getOperand(1), SrlImm))
2936 return false;
2937
2938 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2939 "bad amount in shift node!");
2940 int immr = SrlImm - ShlImm;
2941 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2942 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2943 // SRA requires a signed extraction
2944 if (VT == MVT::i32)
2945 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2946 else
2947 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2948 return true;
2949}
2950
2951bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2952 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2953
2954 EVT VT = N->getValueType(0);
2955 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2956 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2957 return false;
2958
2959 uint64_t ShiftImm;
2960 SDValue Op = N->getOperand(0);
2961 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2962 return false;
2963
2964 SDLoc dl(N);
2965 // Extend the incoming operand of the shift to 64-bits.
2966 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2967 unsigned Immr = ShiftImm;
2968 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2969 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2970 CurDAG->getTargetConstant(Imms, dl, VT)};
2971 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2972 return true;
2973}
2974
2975static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2976 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2977 unsigned NumberOfIgnoredLowBits = 0,
2978 bool BiggerPattern = false) {
2979 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2980 return false;
2981
2982 switch (N->getOpcode()) {
2983 default:
2984 if (!N->isMachineOpcode())
2985 return false;
2986 break;
2987 case ISD::AND:
2988 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2989 NumberOfIgnoredLowBits, BiggerPattern);
2990 case ISD::SRL:
2991 case ISD::SRA:
2992 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2993
2995 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2996 }
2997
2998 unsigned NOpc = N->getMachineOpcode();
2999 switch (NOpc) {
3000 default:
3001 return false;
3002 case AArch64::SBFMWri:
3003 case AArch64::UBFMWri:
3004 case AArch64::SBFMXri:
3005 case AArch64::UBFMXri:
3006 Opc = NOpc;
3007 Opd0 = N->getOperand(0);
3008 Immr = N->getConstantOperandVal(1);
3009 Imms = N->getConstantOperandVal(2);
3010 return true;
3011 }
3012 // Unreachable
3013 return false;
3014}
3015
3016bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
3017 unsigned Opc, Immr, Imms;
3018 SDValue Opd0;
3019 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
3020 return false;
3021
3022 EVT VT = N->getValueType(0);
3023 SDLoc dl(N);
3024
3025 // If the bit extract operation is 64bit but the original type is 32bit, we
3026 // need to add one EXTRACT_SUBREG.
3027 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
3028 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
3029 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
3030
3031 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
3032 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
3033 MVT::i32, SDValue(BFM, 0));
3034 ReplaceNode(N, Inner.getNode());
3035 return true;
3036 }
3037
3038 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
3039 CurDAG->getTargetConstant(Imms, dl, VT)};
3040 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3041 return true;
3042}
3043
3044/// Does DstMask form a complementary pair with the mask provided by
3045/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
3046/// this asks whether DstMask zeroes precisely those bits that will be set by
3047/// the other half.
3048static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
3049 unsigned NumberOfIgnoredHighBits, EVT VT) {
3050 assert((VT == MVT::i32 || VT == MVT::i64) &&
3051 "i32 or i64 mask type expected!");
3052 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
3053
3054 // Enable implicitTrunc as we're intentionally ignoring high bits.
3055 APInt SignificantDstMask =
3056 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
3057 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
3058
3059 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
3060 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
3061}
3062
3063// Look for bits that will be useful for later uses.
3064// A bit is consider useless as soon as it is dropped and never used
3065// before it as been dropped.
3066// E.g., looking for useful bit of x
3067// 1. y = x & 0x7
3068// 2. z = y >> 2
3069// After #1, x useful bits are 0x7, then the useful bits of x, live through
3070// y.
3071// After #2, the useful bits of x are 0x4.
3072// However, if x is used on an unpredictable instruction, then all its bits
3073// are useful.
3074// E.g.
3075// 1. y = x & 0x7
3076// 2. z = y >> 2
3077// 3. str x, [@x]
3078static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
3079
3081 unsigned Depth) {
3082 uint64_t Imm =
3083 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
3084 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
3085 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
3086 getUsefulBits(Op, UsefulBits, Depth + 1);
3087}
3088
3090 uint64_t Imm, uint64_t MSB,
3091 unsigned Depth) {
3092 // inherit the bitwidth value
3093 APInt OpUsefulBits(UsefulBits);
3094 OpUsefulBits = 1;
3095
3096 if (MSB >= Imm) {
3097 OpUsefulBits <<= MSB - Imm + 1;
3098 --OpUsefulBits;
3099 // The interesting part will be in the lower part of the result
3100 getUsefulBits(Op, OpUsefulBits, Depth + 1);
3101 // The interesting part was starting at Imm in the argument
3102 OpUsefulBits <<= Imm;
3103 } else {
3104 OpUsefulBits <<= MSB + 1;
3105 --OpUsefulBits;
3106 // The interesting part will be shifted in the result
3107 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
3108 getUsefulBits(Op, OpUsefulBits, Depth + 1);
3109 // The interesting part was at zero in the argument
3110 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
3111 }
3112
3113 UsefulBits &= OpUsefulBits;
3114}
3115
3116static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
3117 unsigned Depth) {
3118 uint64_t Imm =
3119 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
3120 uint64_t MSB =
3121 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3122
3123 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
3124}
3125
3127 unsigned Depth) {
3128 uint64_t ShiftTypeAndValue =
3129 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3130 APInt Mask(UsefulBits);
3131 Mask.clearAllBits();
3132 Mask.flipAllBits();
3133
3134 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
3135 // Shift Left
3136 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3137 Mask <<= ShiftAmt;
3138 getUsefulBits(Op, Mask, Depth + 1);
3139 Mask.lshrInPlace(ShiftAmt);
3140 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
3141 // Shift Right
3142 // We do not handle AArch64_AM::ASR, because the sign will change the
3143 // number of useful bits
3144 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3145 Mask.lshrInPlace(ShiftAmt);
3146 getUsefulBits(Op, Mask, Depth + 1);
3147 Mask <<= ShiftAmt;
3148 } else
3149 return;
3150
3151 UsefulBits &= Mask;
3152}
3153
3154static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3155 unsigned Depth) {
3156 uint64_t Imm =
3157 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3158 uint64_t MSB =
3159 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3160
3161 APInt OpUsefulBits(UsefulBits);
3162 OpUsefulBits = 1;
3163
3164 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3165 ResultUsefulBits.flipAllBits();
3166 APInt Mask(UsefulBits.getBitWidth(), 0);
3167
3168 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3169
3170 if (MSB >= Imm) {
3171 // The instruction is a BFXIL.
3172 uint64_t Width = MSB - Imm + 1;
3173 uint64_t LSB = Imm;
3174
3175 OpUsefulBits <<= Width;
3176 --OpUsefulBits;
3177
3178 if (Op.getOperand(1) == Orig) {
3179 // Copy the low bits from the result to bits starting from LSB.
3180 Mask = ResultUsefulBits & OpUsefulBits;
3181 Mask <<= LSB;
3182 }
3183
3184 if (Op.getOperand(0) == Orig)
3185 // Bits starting from LSB in the input contribute to the result.
3186 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3187 } else {
3188 // The instruction is a BFI.
3189 uint64_t Width = MSB + 1;
3190 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3191
3192 OpUsefulBits <<= Width;
3193 --OpUsefulBits;
3194 OpUsefulBits <<= LSB;
3195
3196 if (Op.getOperand(1) == Orig) {
3197 // Copy the bits from the result to the zero bits.
3198 Mask = ResultUsefulBits & OpUsefulBits;
3199 Mask.lshrInPlace(LSB);
3200 }
3201
3202 if (Op.getOperand(0) == Orig)
3203 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3204 }
3205
3206 UsefulBits &= Mask;
3207}
3208
3209static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3210 SDValue Orig, unsigned Depth) {
3211
3212 // Users of this node should have already been instruction selected
3213 // FIXME: Can we turn that into an assert?
3214 if (!UserNode->isMachineOpcode())
3215 return;
3216
3217 switch (UserNode->getMachineOpcode()) {
3218 default:
3219 return;
3220 case AArch64::ANDSWri:
3221 case AArch64::ANDSXri:
3222 case AArch64::ANDWri:
3223 case AArch64::ANDXri:
3224 // We increment Depth only when we call the getUsefulBits
3225 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3226 Depth);
3227 case AArch64::UBFMWri:
3228 case AArch64::UBFMXri:
3229 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3230
3231 case AArch64::ORRWrs:
3232 case AArch64::ORRXrs:
3233 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3234 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3235 Depth);
3236 return;
3237 case AArch64::BFMWri:
3238 case AArch64::BFMXri:
3239 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3240
3241 case AArch64::STRBBui:
3242 case AArch64::STURBBi:
3243 if (UserNode->getOperand(0) != Orig)
3244 return;
3245 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3246 return;
3247
3248 case AArch64::STRHHui:
3249 case AArch64::STURHHi:
3250 if (UserNode->getOperand(0) != Orig)
3251 return;
3252 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3253 return;
3254 }
3255}
3256
3257static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3259 return;
3260 // Initialize UsefulBits
3261 if (!Depth) {
3262 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3263 // At the beginning, assume every produced bits is useful
3264 UsefulBits = APInt(Bitwidth, 0);
3265 UsefulBits.flipAllBits();
3266 }
3267 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3268
3269 for (SDNode *Node : Op.getNode()->users()) {
3270 // A use cannot produce useful bits
3271 APInt UsefulBitsForUse = APInt(UsefulBits);
3272 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3273 UsersUsefulBits |= UsefulBitsForUse;
3274 }
3275 // UsefulBits contains the produced bits that are meaningful for the
3276 // current definition, thus a user cannot make a bit meaningful at
3277 // this point
3278 UsefulBits &= UsersUsefulBits;
3279}
3280
3281/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3282/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3283/// 0, return Op unchanged.
3284static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3285 if (ShlAmount == 0)
3286 return Op;
3287
3288 EVT VT = Op.getValueType();
3289 SDLoc dl(Op);
3290 unsigned BitWidth = VT.getSizeInBits();
3291 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3292
3293 SDNode *ShiftNode;
3294 if (ShlAmount > 0) {
3295 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3296 ShiftNode = CurDAG->getMachineNode(
3297 UBFMOpc, dl, VT, Op,
3298 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3299 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3300 } else {
3301 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3302 assert(ShlAmount < 0 && "expected right shift");
3303 int ShrAmount = -ShlAmount;
3304 ShiftNode = CurDAG->getMachineNode(
3305 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3306 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3307 }
3308
3309 return SDValue(ShiftNode, 0);
3310}
3311
3312// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3313static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3314 bool BiggerPattern,
3315 const uint64_t NonZeroBits,
3316 SDValue &Src, int &DstLSB,
3317 int &Width);
3318
3319// For bit-field-positioning pattern "shl VAL, N)".
3320static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3321 bool BiggerPattern,
3322 const uint64_t NonZeroBits,
3323 SDValue &Src, int &DstLSB,
3324 int &Width);
3325
3326/// Does this tree qualify as an attempt to move a bitfield into position,
3327/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3329 bool BiggerPattern, SDValue &Src,
3330 int &DstLSB, int &Width) {
3331 EVT VT = Op.getValueType();
3332 unsigned BitWidth = VT.getSizeInBits();
3333 (void)BitWidth;
3334 assert(BitWidth == 32 || BitWidth == 64);
3335
3336 KnownBits Known = CurDAG->computeKnownBits(Op);
3337
3338 // Non-zero in the sense that they're not provably zero, which is the key
3339 // point if we want to use this value
3340 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3341 if (!isShiftedMask_64(NonZeroBits))
3342 return false;
3343
3344 switch (Op.getOpcode()) {
3345 default:
3346 break;
3347 case ISD::AND:
3348 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3349 NonZeroBits, Src, DstLSB, Width);
3350 case ISD::SHL:
3351 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3352 NonZeroBits, Src, DstLSB, Width);
3353 }
3354
3355 return false;
3356}
3357
3359 bool BiggerPattern,
3360 const uint64_t NonZeroBits,
3361 SDValue &Src, int &DstLSB,
3362 int &Width) {
3363 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3364
3365 EVT VT = Op.getValueType();
3366 assert((VT == MVT::i32 || VT == MVT::i64) &&
3367 "Caller guarantees VT is one of i32 or i64");
3368 (void)VT;
3369
3370 uint64_t AndImm;
3371 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3372 return false;
3373
3374 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3375 // 1) (AndImm & (1 << POS) == 0)
3376 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3377 //
3378 // 1) and 2) don't agree so something must be wrong (e.g., in
3379 // 'SelectionDAG::computeKnownBits')
3380 assert((~AndImm & NonZeroBits) == 0 &&
3381 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3382
3383 SDValue AndOp0 = Op.getOperand(0);
3384
3385 uint64_t ShlImm;
3386 SDValue ShlOp0;
3387 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3388 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3389 ShlOp0 = AndOp0.getOperand(0);
3390 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3392 ShlImm)) {
3393 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3394
3395 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3396 SDValue ShlVal = AndOp0.getOperand(0);
3397
3398 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3399 // expect VT to be MVT::i32.
3400 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3401
3402 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3403 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3404 } else
3405 return false;
3406
3407 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3408 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3409 // AndOp0+AND.
3410 if (!BiggerPattern && !AndOp0.hasOneUse())
3411 return false;
3412
3413 DstLSB = llvm::countr_zero(NonZeroBits);
3414 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3415
3416 // Bail out on large Width. This happens when no proper combining / constant
3417 // folding was performed.
3418 if (Width >= (int)VT.getSizeInBits()) {
3419 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3420 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3421 // "val".
3422 // If VT is i32, what Width >= 32 means:
3423 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3424 // demands at least 'Width' bits (after dag-combiner). This together with
3425 // `any_extend` Op (undefined higher bits) indicates missed combination
3426 // when lowering the 'and' IR instruction to an machine IR instruction.
3427 LLVM_DEBUG(
3428 dbgs()
3429 << "Found large Width in bit-field-positioning -- this indicates no "
3430 "proper combining / constant folding was performed\n");
3431 return false;
3432 }
3433
3434 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3435 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3436 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3437 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3438 // which case it is not profitable to insert an extra shift.
3439 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3440 return false;
3441
3442 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3443 return true;
3444}
3445
3446// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3447// UBFIZ.
3449 SDValue &Src, int &DstLSB,
3450 int &Width) {
3451 // Caller should have verified that N is a left shift with constant shift
3452 // amount; asserts that.
3453 assert(Op.getOpcode() == ISD::SHL &&
3454 "Op.getNode() should be a SHL node to call this function");
3455 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3456 "Op.getNode() should shift ShlImm to call this function");
3457
3458 uint64_t AndImm = 0;
3459 SDValue Op0 = Op.getOperand(0);
3460 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3461 return false;
3462
3463 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3464 if (isMask_64(ShiftedAndImm)) {
3465 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3466 // should end with Mask, and could be prefixed with random bits if those
3467 // bits are shifted out.
3468 //
3469 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3470 // the AND result corresponding to those bits are shifted out, so it's fine
3471 // to not extract them.
3472 Width = llvm::countr_one(ShiftedAndImm);
3473 DstLSB = ShlImm;
3474 Src = Op0.getOperand(0);
3475 return true;
3476 }
3477 return false;
3478}
3479
3481 bool BiggerPattern,
3482 const uint64_t NonZeroBits,
3483 SDValue &Src, int &DstLSB,
3484 int &Width) {
3485 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3486
3487 EVT VT = Op.getValueType();
3488 assert((VT == MVT::i32 || VT == MVT::i64) &&
3489 "Caller guarantees that type is i32 or i64");
3490 (void)VT;
3491
3492 uint64_t ShlImm;
3493 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3494 return false;
3495
3496 if (!BiggerPattern && !Op.hasOneUse())
3497 return false;
3498
3499 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3500 return true;
3501
3502 DstLSB = llvm::countr_zero(NonZeroBits);
3503 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3504
3505 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3506 return false;
3507
3508 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3509 return true;
3510}
3511
3512static bool isShiftedMask(uint64_t Mask, EVT VT) {
3513 assert(VT == MVT::i32 || VT == MVT::i64);
3514 if (VT == MVT::i32)
3515 return isShiftedMask_32(Mask);
3516 return isShiftedMask_64(Mask);
3517}
3518
3519// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3520// inserted only sets known zero bits.
3522 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3523
3524 EVT VT = N->getValueType(0);
3525 if (VT != MVT::i32 && VT != MVT::i64)
3526 return false;
3527
3528 unsigned BitWidth = VT.getSizeInBits();
3529
3530 uint64_t OrImm;
3531 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3532 return false;
3533
3534 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3535 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3536 // performance neutral.
3538 return false;
3539
3540 uint64_t MaskImm;
3541 SDValue And = N->getOperand(0);
3542 // Must be a single use AND with an immediate operand.
3543 if (!And.hasOneUse() ||
3544 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3545 return false;
3546
3547 // Compute the Known Zero for the AND as this allows us to catch more general
3548 // cases than just looking for AND with imm.
3549 KnownBits Known = CurDAG->computeKnownBits(And);
3550
3551 // Non-zero in the sense that they're not provably zero, which is the key
3552 // point if we want to use this value.
3553 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3554
3555 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3556 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3557 return false;
3558
3559 // The bits being inserted must only set those bits that are known to be zero.
3560 if ((OrImm & NotKnownZero) != 0) {
3561 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3562 // currently handle this case.
3563 return false;
3564 }
3565
3566 // BFI/BFXIL dst, src, #lsb, #width.
3567 int LSB = llvm::countr_one(NotKnownZero);
3568 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3569
3570 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3571 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3572 unsigned ImmS = Width - 1;
3573
3574 // If we're creating a BFI instruction avoid cases where we need more
3575 // instructions to materialize the BFI constant as compared to the original
3576 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3577 // should be no worse in this case.
3578 bool IsBFI = LSB != 0;
3579 uint64_t BFIImm = OrImm >> LSB;
3580 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3581 // We have a BFI instruction and we know the constant can't be materialized
3582 // with a ORR-immediate with the zero register.
3583 unsigned OrChunks = 0, BFIChunks = 0;
3584 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3585 if (((OrImm >> Shift) & 0xFFFF) != 0)
3586 ++OrChunks;
3587 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3588 ++BFIChunks;
3589 }
3590 if (BFIChunks > OrChunks)
3591 return false;
3592 }
3593
3594 // Materialize the constant to be inserted.
3595 SDLoc DL(N);
3596 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3597 SDNode *MOVI = CurDAG->getMachineNode(
3598 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3599
3600 // Create the BFI/BFXIL instruction.
3601 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3602 CurDAG->getTargetConstant(ImmR, DL, VT),
3603 CurDAG->getTargetConstant(ImmS, DL, VT)};
3604 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3605 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3606 return true;
3607}
3608
3610 SDValue &ShiftedOperand,
3611 uint64_t &EncodedShiftImm) {
3612 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3613 if (!Dst.hasOneUse())
3614 return false;
3615
3616 EVT VT = Dst.getValueType();
3617 assert((VT == MVT::i32 || VT == MVT::i64) &&
3618 "Caller should guarantee that VT is one of i32 or i64");
3619 const unsigned SizeInBits = VT.getSizeInBits();
3620
3621 SDLoc DL(Dst.getNode());
3622 uint64_t AndImm, ShlImm;
3623 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3624 isShiftedMask_64(AndImm)) {
3625 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3626 SDValue DstOp0 = Dst.getOperand(0);
3627 if (!DstOp0.hasOneUse())
3628 return false;
3629
3630 // An example to illustrate the transformation
3631 // From:
3632 // lsr x8, x1, #1
3633 // and x8, x8, #0x3f80
3634 // bfxil x8, x1, #0, #7
3635 // To:
3636 // and x8, x23, #0x7f
3637 // ubfx x9, x23, #8, #7
3638 // orr x23, x8, x9, lsl #7
3639 //
3640 // The number of instructions remains the same, but ORR is faster than BFXIL
3641 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3642 // the dependency chain is improved after the transformation.
3643 uint64_t SrlImm;
3644 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3645 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3646 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3647 unsigned MaskWidth =
3648 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3649 unsigned UBFMOpc =
3650 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3651 SDNode *UBFMNode = CurDAG->getMachineNode(
3652 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3653 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3654 VT),
3655 CurDAG->getTargetConstant(
3656 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3657 ShiftedOperand = SDValue(UBFMNode, 0);
3658 EncodedShiftImm = AArch64_AM::getShifterImm(
3659 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3660 return true;
3661 }
3662 }
3663 return false;
3664 }
3665
3666 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3667 ShiftedOperand = Dst.getOperand(0);
3668 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3669 return true;
3670 }
3671
3672 uint64_t SrlImm;
3673 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3674 ShiftedOperand = Dst.getOperand(0);
3675 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3676 return true;
3677 }
3678 return false;
3679}
3680
3681// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3682// the operands and select it to AArch64::ORR with shifted registers if
3683// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3684static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3685 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3686 const bool BiggerPattern) {
3687 EVT VT = N->getValueType(0);
3688 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3689 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3690 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3691 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3692 assert((VT == MVT::i32 || VT == MVT::i64) &&
3693 "Expect result type to be i32 or i64 since N is combinable to BFM");
3694 SDLoc DL(N);
3695
3696 // Bail out if BFM simplifies away one node in BFM Dst.
3697 if (OrOpd1 != Dst)
3698 return false;
3699
3700 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3701 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3702 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3703 if (BiggerPattern) {
3704 uint64_t SrcAndImm;
3705 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3706 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3707 // OrOpd0 = AND Src, #Mask
3708 // So BFM simplifies away one AND node from Src and doesn't simplify away
3709 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3710 // one node (from Rd), ORR is better since it has higher throughput and
3711 // smaller latency than BFM on many AArch64 processors (and for the rest
3712 // ORR is at least as good as BFM).
3713 SDValue ShiftedOperand;
3714 uint64_t EncodedShiftImm;
3715 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3716 EncodedShiftImm)) {
3717 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3718 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3719 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3720 return true;
3721 }
3722 }
3723 return false;
3724 }
3725
3726 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3727
3728 uint64_t ShlImm;
3729 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3730 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3731 SDValue Ops[] = {
3732 Dst, Src,
3733 CurDAG->getTargetConstant(
3735 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3736 return true;
3737 }
3738
3739 // Select the following pattern to left-shifted operand rather than BFI.
3740 // %val1 = op ..
3741 // %val2 = shl %val1, #imm
3742 // %res = or %val1, %val2
3743 //
3744 // If N is selected to be BFI, we know that
3745 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3746 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3747 //
3748 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3749 if (OrOpd0.getOperand(0) == OrOpd1) {
3750 SDValue Ops[] = {
3751 OrOpd1, OrOpd1,
3752 CurDAG->getTargetConstant(
3754 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3755 return true;
3756 }
3757 }
3758
3759 uint64_t SrlImm;
3760 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3761 // Select the following pattern to right-shifted operand rather than BFXIL.
3762 // %val1 = op ..
3763 // %val2 = lshr %val1, #imm
3764 // %res = or %val1, %val2
3765 //
3766 // If N is selected to be BFXIL, we know that
3767 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3768 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3769 //
3770 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3771 if (OrOpd0.getOperand(0) == OrOpd1) {
3772 SDValue Ops[] = {
3773 OrOpd1, OrOpd1,
3774 CurDAG->getTargetConstant(
3776 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3777 return true;
3778 }
3779 }
3780
3781 return false;
3782}
3783
3784static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3785 SelectionDAG *CurDAG) {
3786 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3787
3788 EVT VT = N->getValueType(0);
3789 if (VT != MVT::i32 && VT != MVT::i64)
3790 return false;
3791
3792 unsigned BitWidth = VT.getSizeInBits();
3793
3794 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3795 // have the expected shape. Try to undo that.
3796
3797 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3798 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3799
3800 // Given a OR operation, check if we have the following pattern
3801 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3802 // isBitfieldExtractOp)
3803 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3804 // countTrailingZeros(mask2) == imm2 - imm + 1
3805 // f = d | c
3806 // if yes, replace the OR instruction with:
3807 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3808
3809 // OR is commutative, check all combinations of operand order and values of
3810 // BiggerPattern, i.e.
3811 // Opd0, Opd1, BiggerPattern=false
3812 // Opd1, Opd0, BiggerPattern=false
3813 // Opd0, Opd1, BiggerPattern=true
3814 // Opd1, Opd0, BiggerPattern=true
3815 // Several of these combinations may match, so check with BiggerPattern=false
3816 // first since that will produce better results by matching more instructions
3817 // and/or inserting fewer extra instructions.
3818 for (int I = 0; I < 4; ++I) {
3819
3820 SDValue Dst, Src;
3821 unsigned ImmR, ImmS;
3822 bool BiggerPattern = I / 2;
3823 SDValue OrOpd0Val = N->getOperand(I % 2);
3824 SDNode *OrOpd0 = OrOpd0Val.getNode();
3825 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3826 SDNode *OrOpd1 = OrOpd1Val.getNode();
3827
3828 unsigned BFXOpc;
3829 int DstLSB, Width;
3830 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3831 NumberOfIgnoredLowBits, BiggerPattern)) {
3832 // Check that the returned opcode is compatible with the pattern,
3833 // i.e., same type and zero extended (U and not S)
3834 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3835 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3836 continue;
3837
3838 // Compute the width of the bitfield insertion
3839 DstLSB = 0;
3840 Width = ImmS - ImmR + 1;
3841 // FIXME: This constraint is to catch bitfield insertion we may
3842 // want to widen the pattern if we want to grab general bitfield
3843 // move case
3844 if (Width <= 0)
3845 continue;
3846
3847 // If the mask on the insertee is correct, we have a BFXIL operation. We
3848 // can share the ImmR and ImmS values from the already-computed UBFM.
3849 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3850 BiggerPattern,
3851 Src, DstLSB, Width)) {
3852 ImmR = (BitWidth - DstLSB) % BitWidth;
3853 ImmS = Width - 1;
3854 } else
3855 continue;
3856
3857 // Check the second part of the pattern
3858 EVT VT = OrOpd1Val.getValueType();
3859 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3860
3861 // Compute the Known Zero for the candidate of the first operand.
3862 // This allows to catch more general case than just looking for
3863 // AND with imm. Indeed, simplify-demanded-bits may have removed
3864 // the AND instruction because it proves it was useless.
3865 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3866
3867 // Check if there is enough room for the second operand to appear
3868 // in the first one
3869 APInt BitsToBeInserted =
3870 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3871
3872 if ((BitsToBeInserted & ~Known.Zero) != 0)
3873 continue;
3874
3875 // Set the first operand
3876 uint64_t Imm;
3877 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3878 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3879 // In that case, we can eliminate the AND
3880 Dst = OrOpd1->getOperand(0);
3881 else
3882 // Maybe the AND has been removed by simplify-demanded-bits
3883 // or is useful because it discards more bits
3884 Dst = OrOpd1Val;
3885
3886 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3887 // with shifted operand is more efficient.
3888 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3889 BiggerPattern))
3890 return true;
3891
3892 // both parts match
3893 SDLoc DL(N);
3894 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3895 CurDAG->getTargetConstant(ImmS, DL, VT)};
3896 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3897 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3898 return true;
3899 }
3900
3901 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3902 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3903 // mask (e.g., 0x000ffff0).
3904 uint64_t Mask0Imm, Mask1Imm;
3905 SDValue And0 = N->getOperand(0);
3906 SDValue And1 = N->getOperand(1);
3907 if (And0.hasOneUse() && And1.hasOneUse() &&
3908 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3909 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3910 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3911 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3912
3913 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3914 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3915 // bits to be inserted.
3916 if (isShiftedMask(Mask0Imm, VT)) {
3917 std::swap(And0, And1);
3918 std::swap(Mask0Imm, Mask1Imm);
3919 }
3920
3921 SDValue Src = And1->getOperand(0);
3922 SDValue Dst = And0->getOperand(0);
3923 unsigned LSB = llvm::countr_zero(Mask1Imm);
3924 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3925
3926 // The BFXIL inserts the low-order bits from a source register, so right
3927 // shift the needed bits into place.
3928 SDLoc DL(N);
3929 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3930 uint64_t LsrImm = LSB;
3931 if (Src->hasOneUse() &&
3932 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3933 (LsrImm + LSB) < BitWidth) {
3934 Src = Src->getOperand(0);
3935 LsrImm += LSB;
3936 }
3937
3938 SDNode *LSR = CurDAG->getMachineNode(
3939 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3940 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3941
3942 // BFXIL is an alias of BFM, so translate to BFM operands.
3943 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3944 unsigned ImmS = Width - 1;
3945
3946 // Create the BFXIL instruction.
3947 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3948 CurDAG->getTargetConstant(ImmR, DL, VT),
3949 CurDAG->getTargetConstant(ImmS, DL, VT)};
3950 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3951 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3952 return true;
3953 }
3954
3955 return false;
3956}
3957
3958bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3959 if (N->getOpcode() != ISD::OR)
3960 return false;
3961
3962 APInt NUsefulBits;
3963 getUsefulBits(SDValue(N, 0), NUsefulBits);
3964
3965 // If all bits are not useful, just return UNDEF.
3966 if (!NUsefulBits) {
3967 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3968 return true;
3969 }
3970
3971 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3972 return true;
3973
3974 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3975}
3976
3977/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3978/// equivalent of a left shift by a constant amount followed by an and masking
3979/// out a contiguous set of bits.
3980bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3981 if (N->getOpcode() != ISD::AND)
3982 return false;
3983
3984 EVT VT = N->getValueType(0);
3985 if (VT != MVT::i32 && VT != MVT::i64)
3986 return false;
3987
3988 SDValue Op0;
3989 int DstLSB, Width;
3990 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3991 Op0, DstLSB, Width))
3992 return false;
3993
3994 // ImmR is the rotate right amount.
3995 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3996 // ImmS is the most significant bit of the source to be moved.
3997 unsigned ImmS = Width - 1;
3998
3999 SDLoc DL(N);
4000 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
4001 CurDAG->getTargetConstant(ImmS, DL, VT)};
4002 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
4003 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4004 return true;
4005}
4006
4007/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
4008/// variable shift/rotate instructions.
4009bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
4010 EVT VT = N->getValueType(0);
4011
4012 unsigned Opc;
4013 switch (N->getOpcode()) {
4014 case ISD::ROTR:
4015 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
4016 break;
4017 case ISD::SHL:
4018 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
4019 break;
4020 case ISD::SRL:
4021 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
4022 break;
4023 case ISD::SRA:
4024 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
4025 break;
4026 default:
4027 return false;
4028 }
4029
4030 uint64_t Size;
4031 uint64_t Bits;
4032 if (VT == MVT::i32) {
4033 Bits = 5;
4034 Size = 32;
4035 } else if (VT == MVT::i64) {
4036 Bits = 6;
4037 Size = 64;
4038 } else
4039 return false;
4040
4041 SDValue ShiftAmt = N->getOperand(1);
4042 SDLoc DL(N);
4043 SDValue NewShiftAmt;
4044
4045 // Skip over an extend of the shift amount.
4046 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
4047 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
4048 ShiftAmt = ShiftAmt->getOperand(0);
4049
4050 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
4051 SDValue Add0 = ShiftAmt->getOperand(0);
4052 SDValue Add1 = ShiftAmt->getOperand(1);
4053 uint64_t Add0Imm;
4054 uint64_t Add1Imm;
4055 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
4056 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
4057 // to avoid the ADD/SUB.
4058 NewShiftAmt = Add0;
4059 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
4060 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
4061 (Add0Imm % Size == 0)) {
4062 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
4063 // to generate a NEG instead of a SUB from a constant.
4064 unsigned NegOpc;
4065 unsigned ZeroReg;
4066 EVT SubVT = ShiftAmt->getValueType(0);
4067 if (SubVT == MVT::i32) {
4068 NegOpc = AArch64::SUBWrr;
4069 ZeroReg = AArch64::WZR;
4070 } else {
4071 assert(SubVT == MVT::i64);
4072 NegOpc = AArch64::SUBXrr;
4073 ZeroReg = AArch64::XZR;
4074 }
4075 SDValue Zero =
4076 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
4077 MachineSDNode *Neg =
4078 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
4079 NewShiftAmt = SDValue(Neg, 0);
4080 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
4081 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
4082 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
4083 // to generate a NOT instead of a SUB from a constant.
4084 unsigned NotOpc;
4085 unsigned ZeroReg;
4086 EVT SubVT = ShiftAmt->getValueType(0);
4087 if (SubVT == MVT::i32) {
4088 NotOpc = AArch64::ORNWrr;
4089 ZeroReg = AArch64::WZR;
4090 } else {
4091 assert(SubVT == MVT::i64);
4092 NotOpc = AArch64::ORNXrr;
4093 ZeroReg = AArch64::XZR;
4094 }
4095 SDValue Zero =
4096 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
4097 MachineSDNode *Not =
4098 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
4099 NewShiftAmt = SDValue(Not, 0);
4100 } else
4101 return false;
4102 } else {
4103 // If the shift amount is masked with an AND, check that the mask covers the
4104 // bits that are implicitly ANDed off by the above opcodes and if so, skip
4105 // the AND.
4106 uint64_t MaskImm;
4107 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
4108 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
4109 return false;
4110
4111 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
4112 return false;
4113
4114 NewShiftAmt = ShiftAmt->getOperand(0);
4115 }
4116
4117 // Narrow/widen the shift amount to match the size of the shift operation.
4118 if (VT == MVT::i32)
4119 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
4120 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
4121 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
4122 MachineSDNode *Ext = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT,
4123 NewShiftAmt, SubReg);
4124 NewShiftAmt = SDValue(Ext, 0);
4125 }
4126
4127 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
4128 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4129 return true;
4130}
4131
4133 SDValue &FixedPos,
4134 unsigned RegWidth,
4135 bool isReciprocal) {
4136 APFloat FVal(0.0);
4138 FVal = CN->getValueAPF();
4139 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
4140 // Some otherwise illegal constants are allowed in this case.
4141 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
4142 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
4143 return false;
4144
4145 ConstantPoolSDNode *CN =
4146 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
4147 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
4148 } else
4149 return false;
4150
4151 if (unsigned FBits =
4152 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4153 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4154 return true;
4155 }
4156
4157 return false;
4158}
4159
4161 SDValue N,
4162 SDValue &FixedPos,
4163 unsigned RegWidth,
4164 bool isReciprocal) {
4165 if ((N.getOpcode() == AArch64ISD::NVCAST || N.getOpcode() == ISD::BITCAST) &&
4166 N.getValueType().getScalarSizeInBits() ==
4167 N.getOperand(0).getValueType().getScalarSizeInBits())
4168 N = N.getOperand(0);
4169
4170 auto ImmToFloat = [RegWidth](APInt Imm) {
4171 switch (RegWidth) {
4172 case 16:
4173 return APFloat(APFloat::IEEEhalf(), Imm);
4174 case 32:
4175 return APFloat(APFloat::IEEEsingle(), Imm);
4176 case 64:
4177 return APFloat(APFloat::IEEEdouble(), Imm);
4178 default:
4179 llvm_unreachable("Unexpected RegWidth!");
4180 };
4181 };
4182
4183 APFloat FVal(0.0);
4184 switch (N->getOpcode()) {
4185 case AArch64ISD::MOVIshift:
4186 FVal = ImmToFloat(APInt(RegWidth, N.getConstantOperandVal(0)
4187 << N.getConstantOperandVal(1)));
4188 break;
4189 case AArch64ISD::FMOV:
4190 FVal = ImmToFloat(DecodeFMOVImm(N.getConstantOperandVal(0), RegWidth));
4191 break;
4192 case AArch64ISD::DUP:
4193 if (isa<ConstantSDNode>(N.getOperand(0)))
4194 FVal = ImmToFloat(N.getConstantOperandAPInt(0).trunc(RegWidth));
4195 else
4196 return false;
4197 break;
4198 default:
4199 return false;
4200 }
4201
4202 if (unsigned FBits =
4203 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4204 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4205 return true;
4206 }
4207
4208 return false;
4209}
4210
4211bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4212 unsigned RegWidth) {
4213 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4214 /*isReciprocal*/ false);
4215}
4216
4217bool AArch64DAGToDAGISel::SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos,
4218 unsigned RegWidth) {
4220 CurDAG, N, FixedPos, RegWidth, /*isReciprocal*/ false);
4221}
4222
4223bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperandVec(SDValue N,
4224 SDValue &FixedPos,
4225 unsigned RegWidth) {
4227 CurDAG, N, FixedPos, RegWidth, /*isReciprocal*/ true);
4228}
4229
4230bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4231 SDValue &FixedPos,
4232 unsigned RegWidth) {
4233 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4234 /*isReciprocal*/ true);
4235}
4236
4237// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4238// of the string and obtains the integer values from them and combines these
4239// into a single value to be used in the MRS/MSR instruction.
4242 RegString.split(Fields, ':');
4243
4244 if (Fields.size() == 1)
4245 return -1;
4246
4247 assert(Fields.size() == 5
4248 && "Invalid number of fields in read register string");
4249
4251 bool AllIntFields = true;
4252
4253 for (StringRef Field : Fields) {
4254 unsigned IntField;
4255 AllIntFields &= !Field.getAsInteger(10, IntField);
4256 Ops.push_back(IntField);
4257 }
4258
4259 assert(AllIntFields &&
4260 "Unexpected non-integer value in special register string.");
4261 (void)AllIntFields;
4262
4263 // Need to combine the integer fields of the string into a single value
4264 // based on the bit encoding of MRS/MSR instruction.
4265 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4266 (Ops[3] << 3) | (Ops[4]);
4267}
4268
4269// Lower the read_register intrinsic to an MRS instruction node if the special
4270// register string argument is either of the form detailed in the ALCE (the
4271// form described in getIntOperandsFromRegisterString) or is a named register
4272// known by the MRS SysReg mapper.
4273bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4274 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4275 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4276 SDLoc DL(N);
4277
4278 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4279
4280 unsigned Opcode64Bit = AArch64::MRS;
4281 int Imm = getIntOperandFromRegisterString(RegString->getString());
4282 if (Imm == -1) {
4283 // No match, Use the sysreg mapper to map the remaining possible strings to
4284 // the value for the register to be used for the instruction operand.
4285 const auto *TheReg =
4286 AArch64SysReg::lookupSysRegByName(RegString->getString());
4287 if (TheReg && TheReg->Readable &&
4288 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4289 Imm = TheReg->Encoding;
4290 else
4291 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4292
4293 if (Imm == -1) {
4294 // Still no match, see if this is "pc" or give up.
4295 if (!ReadIs128Bit && RegString->getString() == "pc") {
4296 Opcode64Bit = AArch64::ADR;
4297 Imm = 0;
4298 } else {
4299 return false;
4300 }
4301 }
4302 }
4303
4304 SDValue InChain = N->getOperand(0);
4305 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4306 if (!ReadIs128Bit) {
4307 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4308 {SysRegImm, InChain});
4309 } else {
4310 SDNode *MRRS = CurDAG->getMachineNode(
4311 AArch64::MRRS, DL,
4312 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4313 {SysRegImm, InChain});
4314
4315 // Sysregs are not endian. The even register always contains the low half
4316 // of the register.
4317 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4318 SDValue(MRRS, 0));
4319 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4320 SDValue(MRRS, 0));
4321 SDValue OutChain = SDValue(MRRS, 1);
4322
4323 ReplaceUses(SDValue(N, 0), Lo);
4324 ReplaceUses(SDValue(N, 1), Hi);
4325 ReplaceUses(SDValue(N, 2), OutChain);
4326 };
4327 return true;
4328}
4329
4330// Lower the write_register intrinsic to an MSR instruction node if the special
4331// register string argument is either of the form detailed in the ALCE (the
4332// form described in getIntOperandsFromRegisterString) or is a named register
4333// known by the MSR SysReg mapper.
4334bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4335 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4336 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4337 SDLoc DL(N);
4338
4339 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4340
4341 if (!WriteIs128Bit) {
4342 // Check if the register was one of those allowed as the pstatefield value
4343 // in the MSR (immediate) instruction. To accept the values allowed in the
4344 // pstatefield for the MSR (immediate) instruction, we also require that an
4345 // immediate value has been provided as an argument, we know that this is
4346 // the case as it has been ensured by semantic checking.
4347 auto trySelectPState = [&](auto PMapper, unsigned State) {
4348 if (PMapper) {
4349 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4350 "Expected a constant integer expression.");
4351 unsigned Reg = PMapper->Encoding;
4352 uint64_t Immed = N->getConstantOperandVal(2);
4353 CurDAG->SelectNodeTo(
4354 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4355 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4356 return true;
4357 }
4358 return false;
4359 };
4360
4361 if (trySelectPState(
4362 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4363 AArch64::MSRpstateImm4))
4364 return true;
4365 if (trySelectPState(
4366 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4367 AArch64::MSRpstateImm1))
4368 return true;
4369 }
4370
4371 int Imm = getIntOperandFromRegisterString(RegString->getString());
4372 if (Imm == -1) {
4373 // Use the sysreg mapper to attempt to map the remaining possible strings
4374 // to the value for the register to be used for the MSR (register)
4375 // instruction operand.
4376 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4377 if (TheReg && TheReg->Writeable &&
4378 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4379 Imm = TheReg->Encoding;
4380 else
4381 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4382
4383 if (Imm == -1)
4384 return false;
4385 }
4386
4387 SDValue InChain = N->getOperand(0);
4388 if (!WriteIs128Bit) {
4389 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4390 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4391 N->getOperand(2), InChain);
4392 } else {
4393 // No endian swap. The lower half always goes into the even subreg, and the
4394 // higher half always into the odd supreg.
4395 SDNode *Pair = CurDAG->getMachineNode(
4396 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4397 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4398 MVT::i32),
4399 N->getOperand(2),
4400 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4401 N->getOperand(3),
4402 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4403
4404 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4405 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4406 SDValue(Pair, 0), InChain);
4407 }
4408
4409 return true;
4410}
4411
4412/// We've got special pseudo-instructions for these
4413bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4414 unsigned Opcode;
4415 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4416
4417 // Leave IR for LSE if subtarget supports it.
4418 if (Subtarget->hasLSE()) return false;
4419
4420 if (MemTy == MVT::i8)
4421 Opcode = AArch64::CMP_SWAP_8;
4422 else if (MemTy == MVT::i16)
4423 Opcode = AArch64::CMP_SWAP_16;
4424 else if (MemTy == MVT::i32)
4425 Opcode = AArch64::CMP_SWAP_32;
4426 else if (MemTy == MVT::i64)
4427 Opcode = AArch64::CMP_SWAP_64;
4428 else
4429 llvm_unreachable("Unknown AtomicCmpSwap type");
4430
4431 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4432 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4433 N->getOperand(0)};
4434 SDNode *CmpSwap = CurDAG->getMachineNode(
4435 Opcode, SDLoc(N),
4436 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4437
4438 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4439 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4440
4441 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4442 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4443 CurDAG->RemoveDeadNode(N);
4444
4445 return true;
4446}
4447
4448bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4449 SDValue &Shift, bool Negate) {
4450 if (!isa<ConstantSDNode>(N))
4451 return false;
4452
4453 APInt Val =
4454 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4455
4456 return SelectSVEAddSubImm(SDLoc(N), Val, VT, Imm, Shift, Negate);
4457}
4458
4459bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDLoc DL, APInt Val, MVT VT,
4460 SDValue &Imm, SDValue &Shift,
4461 bool Negate) {
4462 if (Negate)
4463 Val = -Val;
4464
4465 switch (VT.SimpleTy) {
4466 case MVT::i8:
4467 // All immediates are supported.
4468 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4469 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4470 return true;
4471 case MVT::i16:
4472 case MVT::i32:
4473 case MVT::i64:
4474 // Support 8bit unsigned immediates.
4475 if ((Val & ~0xff) == 0) {
4476 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4477 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4478 return true;
4479 }
4480 // Support 16bit unsigned immediates that are a multiple of 256.
4481 if ((Val & ~0xff00) == 0) {
4482 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4483 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4484 return true;
4485 }
4486 break;
4487 default:
4488 break;
4489 }
4490
4491 return false;
4492}
4493
4494bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4495 SDValue &Imm, SDValue &Shift,
4496 bool Negate) {
4497 if (!isa<ConstantSDNode>(N))
4498 return false;
4499
4500 SDLoc DL(N);
4501 int64_t Val = cast<ConstantSDNode>(N)
4502 ->getAPIntValue()
4504 .getSExtValue();
4505
4506 if (Negate)
4507 Val = -Val;
4508
4509 // Signed saturating instructions treat their immediate operand as unsigned,
4510 // whereas the related intrinsics define their operands to be signed. This
4511 // means we can only use the immediate form when the operand is non-negative.
4512 if (Val < 0)
4513 return false;
4514
4515 switch (VT.SimpleTy) {
4516 case MVT::i8:
4517 // All positive immediates are supported.
4518 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4519 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4520 return true;
4521 case MVT::i16:
4522 case MVT::i32:
4523 case MVT::i64:
4524 // Support 8bit positive immediates.
4525 if (Val <= 255) {
4526 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4527 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4528 return true;
4529 }
4530 // Support 16bit positive immediates that are a multiple of 256.
4531 if (Val <= 65280 && Val % 256 == 0) {
4532 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4533 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4534 return true;
4535 }
4536 break;
4537 default:
4538 break;
4539 }
4540
4541 return false;
4542}
4543
4544bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4545 SDValue &Shift) {
4546 if (!isa<ConstantSDNode>(N))
4547 return false;
4548
4549 SDLoc DL(N);
4550 int64_t Val = cast<ConstantSDNode>(N)
4551 ->getAPIntValue()
4552 .trunc(VT.getFixedSizeInBits())
4553 .getSExtValue();
4554 int32_t ImmVal, ShiftVal;
4555 if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
4556 ShiftVal))
4557 return false;
4558
4559 Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
4560 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4561 return true;
4562}
4563
4564bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4565 if (auto CNode = dyn_cast<ConstantSDNode>(N))
4566 return SelectSVESignedArithImm(SDLoc(N), CNode->getAPIntValue(), Imm);
4567 return false;
4568}
4569
4570bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDLoc DL, APInt Val,
4571 SDValue &Imm) {
4572 int64_t ImmVal = Val.getSExtValue();
4573 if (ImmVal >= -128 && ImmVal < 128) {
4574 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4575 return true;
4576 }
4577 return false;
4578}
4579
4580bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4581 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4582 uint64_t ImmVal = CNode->getZExtValue();
4583
4584 switch (VT.SimpleTy) {
4585 case MVT::i8:
4586 ImmVal &= 0xFF;
4587 break;
4588 case MVT::i16:
4589 ImmVal &= 0xFFFF;
4590 break;
4591 case MVT::i32:
4592 ImmVal &= 0xFFFFFFFF;
4593 break;
4594 case MVT::i64:
4595 break;
4596 default:
4597 llvm_unreachable("Unexpected type");
4598 }
4599
4600 if (ImmVal < 256) {
4601 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4602 return true;
4603 }
4604 }
4605 return false;
4606}
4607
4608bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4609 bool Invert) {
4610 uint64_t ImmVal;
4611 if (auto CI = dyn_cast<ConstantSDNode>(N))
4612 ImmVal = CI->getZExtValue();
4613 else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
4614 ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4615 else
4616 return false;
4617
4618 if (Invert)
4619 ImmVal = ~ImmVal;
4620
4621 uint64_t encoding;
4622 if (!AArch64_AM::isSVELogicalImm(VT.getScalarSizeInBits(), ImmVal, encoding))
4623 return false;
4624
4625 Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
4626 return true;
4627}
4628
4629// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4630// Rather than attempt to normalise everything we can sometimes saturate the
4631// shift amount during selection. This function also allows for consistent
4632// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4633// required by the instructions.
4634bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4635 uint64_t High, bool AllowSaturation,
4636 SDValue &Imm) {
4637 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4638 uint64_t ImmVal = CN->getZExtValue();
4639
4640 // Reject shift amounts that are too small.
4641 if (ImmVal < Low)
4642 return false;
4643
4644 // Reject or saturate shift amounts that are too big.
4645 if (ImmVal > High) {
4646 if (!AllowSaturation)
4647 return false;
4648 ImmVal = High;
4649 }
4650
4651 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4652 return true;
4653 }
4654
4655 return false;
4656}
4657
4658bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4659 // tagp(FrameIndex, IRGstack, tag_offset):
4660 // since the offset between FrameIndex and IRGstack is a compile-time
4661 // constant, this can be lowered to a single ADDG instruction.
4662 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4663 return false;
4664 }
4665
4666 SDValue IRG_SP = N->getOperand(2);
4667 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4668 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4669 return false;
4670 }
4671
4672 const TargetLowering *TLI = getTargetLowering();
4673 SDLoc DL(N);
4674 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4675 SDValue FiOp = CurDAG->getTargetFrameIndex(
4676 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4677 int TagOffset = N->getConstantOperandVal(3);
4678
4679 SDNode *Out = CurDAG->getMachineNode(
4680 AArch64::TAGPstack, DL, MVT::i64,
4681 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4682 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4683 ReplaceNode(N, Out);
4684 return true;
4685}
4686
4687void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4688 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4689 "llvm.aarch64.tagp third argument must be an immediate");
4690 if (trySelectStackSlotTagP(N))
4691 return;
4692 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4693 // compile-time constant, not just for stack allocations.
4694
4695 // General case for unrelated pointers in Op1 and Op2.
4696 SDLoc DL(N);
4697 int TagOffset = N->getConstantOperandVal(3);
4698 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4699 {N->getOperand(1), N->getOperand(2)});
4700 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4701 {SDValue(N1, 0), N->getOperand(2)});
4702 SDNode *N3 = CurDAG->getMachineNode(
4703 AArch64::ADDG, DL, MVT::i64,
4704 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4705 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4706 ReplaceNode(N, N3);
4707}
4708
4709bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4710 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4711
4712 // Bail when not a "cast" like insert_subvector.
4713 if (N->getConstantOperandVal(2) != 0)
4714 return false;
4715 if (!N->getOperand(0).isUndef())
4716 return false;
4717
4718 // Bail when normal isel should do the job.
4719 EVT VT = N->getValueType(0);
4720 EVT InVT = N->getOperand(1).getValueType();
4721 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4722 return false;
4723 if (InVT.getSizeInBits() <= 128)
4724 return false;
4725
4726 // NOTE: We can only get here when doing fixed length SVE code generation.
4727 // We do manual selection because the types involved are not linked to real
4728 // registers (despite being legal) and must be coerced into SVE registers.
4729
4731 "Expected to insert into a packed scalable vector!");
4732
4733 SDLoc DL(N);
4734 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4735 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4736 N->getOperand(1), RC));
4737 return true;
4738}
4739
4740bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4741 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4742
4743 // Bail when not a "cast" like extract_subvector.
4744 if (N->getConstantOperandVal(1) != 0)
4745 return false;
4746
4747 // Bail when normal isel can do the job.
4748 EVT VT = N->getValueType(0);
4749 EVT InVT = N->getOperand(0).getValueType();
4750 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4751 return false;
4752 if (VT.getSizeInBits() <= 128)
4753 return false;
4754
4755 // NOTE: We can only get here when doing fixed length SVE code generation.
4756 // We do manual selection because the types involved are not linked to real
4757 // registers (despite being legal) and must be coerced into SVE registers.
4758
4760 "Expected to extract from a packed scalable vector!");
4761
4762 SDLoc DL(N);
4763 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4764 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4765 N->getOperand(0), RC));
4766 return true;
4767}
4768
4769bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4770 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4771
4772 SDValue N0 = N->getOperand(0);
4773 SDValue N1 = N->getOperand(1);
4774
4775 EVT VT = N->getValueType(0);
4776 SDLoc DL(N);
4777
4778 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4779 // Rotate by a constant is a funnel shift in IR which is expanded to
4780 // an OR with shifted operands.
4781 // We do the following transform:
4782 // OR N0, N1 -> xar (x, y, imm)
4783 // Where:
4784 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4785 // N0 = SHL_PRED true, V, splat(bits-imm)
4786 // V = (xor x, y)
4787 if (VT.isScalableVector() &&
4788 (Subtarget->hasSVE2() ||
4789 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4790 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4791 N1.getOpcode() != AArch64ISD::SRL_PRED)
4792 std::swap(N0, N1);
4793 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4794 N1.getOpcode() != AArch64ISD::SRL_PRED)
4795 return false;
4796
4797 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4798 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4799 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4800 return false;
4801
4802 if (N0.getOperand(1) != N1.getOperand(1))
4803 return false;
4804
4805 SDValue R1, R2;
4806 bool IsXOROperand = true;
4807 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4808 IsXOROperand = false;
4809 } else {
4810 R1 = N0.getOperand(1).getOperand(0);
4811 R2 = N1.getOperand(1).getOperand(1);
4812 }
4813
4814 APInt ShlAmt, ShrAmt;
4815 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4817 return false;
4818
4819 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4820 return false;
4821
4822 if (!IsXOROperand) {
4823 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4824 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4825 SDValue MOVIV = SDValue(MOV, 0);
4826
4827 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4828 SDNode *SubRegToReg =
4829 CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT, MOVIV, ZSub);
4830
4831 R1 = N1->getOperand(1);
4832 R2 = SDValue(SubRegToReg, 0);
4833 }
4834
4835 SDValue Imm =
4836 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4837
4838 SDValue Ops[] = {R1, R2, Imm};
4840 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4841 AArch64::XAR_ZZZI_D})) {
4842 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4843 return true;
4844 }
4845 return false;
4846 }
4847
4848 // We have Neon SHA3 XAR operation for v2i64 but for types
4849 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4850 // is available.
4851 EVT SVT;
4852 switch (VT.getSimpleVT().SimpleTy) {
4853 case MVT::v4i32:
4854 case MVT::v2i32:
4855 SVT = MVT::nxv4i32;
4856 break;
4857 case MVT::v8i16:
4858 case MVT::v4i16:
4859 SVT = MVT::nxv8i16;
4860 break;
4861 case MVT::v16i8:
4862 case MVT::v8i8:
4863 SVT = MVT::nxv16i8;
4864 break;
4865 case MVT::v2i64:
4866 case MVT::v1i64:
4867 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4868 break;
4869 default:
4870 return false;
4871 }
4872
4873 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4874 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4875 return false;
4876
4877 if (N0->getOpcode() != AArch64ISD::VSHL ||
4878 N1->getOpcode() != AArch64ISD::VLSHR)
4879 return false;
4880
4881 if (N0->getOperand(0) != N1->getOperand(0))
4882 return false;
4883
4884 SDValue R1, R2;
4885 bool IsXOROperand = true;
4886 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4887 IsXOROperand = false;
4888 } else {
4889 SDValue XOR = N0.getOperand(0);
4890 R1 = XOR.getOperand(0);
4891 R2 = XOR.getOperand(1);
4892 }
4893
4894 unsigned HsAmt = N0.getConstantOperandVal(1);
4895 unsigned ShAmt = N1.getConstantOperandVal(1);
4896
4897 SDValue Imm = CurDAG->getTargetConstant(
4898 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4899
4900 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4901 if (ShAmt + HsAmt != VTSizeInBits)
4902 return false;
4903
4904 if (!IsXOROperand) {
4905 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4906 SDNode *MOV =
4907 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4908 SDValue MOVIV = SDValue(MOV, 0);
4909
4910 R1 = N1->getOperand(0);
4911 R2 = MOVIV;
4912 }
4913
4914 if (SVT != VT) {
4915 SDValue Undef =
4916 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4917
4918 if (SVT.isScalableVector() && VT.is64BitVector()) {
4919 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4920
4921 SDValue UndefQ = SDValue(
4922 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4923 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4924
4925 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4926 UndefQ, R1, DSub),
4927 0);
4928 if (R2.getValueType() == VT)
4929 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4930 UndefQ, R2, DSub),
4931 0);
4932 }
4933
4934 SDValue SubReg = CurDAG->getTargetConstant(
4935 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4936
4937 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4938 R1, SubReg),
4939 0);
4940
4941 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4942 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4943 Undef, R2, SubReg),
4944 0);
4945 }
4946
4947 SDValue Ops[] = {R1, R2, Imm};
4948 SDNode *XAR = nullptr;
4949
4950 if (SVT.isScalableVector()) {
4952 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4953 AArch64::XAR_ZZZI_D}))
4954 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4955 } else {
4956 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4957 }
4958
4959 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4960
4961 if (SVT != VT) {
4962 if (VT.is64BitVector() && SVT.isScalableVector()) {
4963 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4964
4965 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4966 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4967 SDValue(XAR, 0), ZSub);
4968
4969 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4970 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4971 SDValue(Q, 0), DSub);
4972 } else {
4973 SDValue SubReg = CurDAG->getTargetConstant(
4974 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4975 MVT::i32);
4976 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4977 SDValue(XAR, 0), SubReg);
4978 }
4979 }
4980 ReplaceNode(N, XAR);
4981 return true;
4982}
4983
4984void AArch64DAGToDAGISel::Select(SDNode *Node) {
4985 // If we have a custom node, we already have selected!
4986 if (Node->isMachineOpcode()) {
4987 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4988 Node->setNodeId(-1);
4989 return;
4990 }
4991
4992 // Few custom selection stuff.
4993 EVT VT = Node->getValueType(0);
4994
4995 switch (Node->getOpcode()) {
4996 default:
4997 break;
4998
5000 if (SelectCMP_SWAP(Node))
5001 return;
5002 break;
5003
5004 case ISD::READ_REGISTER:
5005 case AArch64ISD::MRRS:
5006 if (tryReadRegister(Node))
5007 return;
5008 break;
5009
5011 case AArch64ISD::MSRR:
5012 if (tryWriteRegister(Node))
5013 return;
5014 break;
5015
5016 case ISD::LOAD: {
5017 // Try to select as an indexed load. Fall through to normal processing
5018 // if we can't.
5019 if (tryIndexedLoad(Node))
5020 return;
5021 break;
5022 }
5023
5024 case ISD::SRL:
5025 case ISD::AND:
5026 case ISD::SRA:
5028 if (tryBitfieldExtractOp(Node))
5029 return;
5030 if (tryBitfieldInsertInZeroOp(Node))
5031 return;
5032 [[fallthrough]];
5033 case ISD::ROTR:
5034 case ISD::SHL:
5035 if (tryShiftAmountMod(Node))
5036 return;
5037 break;
5038
5039 case ISD::SIGN_EXTEND:
5040 if (tryBitfieldExtractOpFromSExt(Node))
5041 return;
5042 break;
5043
5044 case ISD::OR:
5045 if (tryBitfieldInsertOp(Node))
5046 return;
5047 if (trySelectXAR(Node))
5048 return;
5049 break;
5050
5052 if (trySelectCastScalableToFixedLengthVector(Node))
5053 return;
5054 break;
5055 }
5056
5057 case ISD::INSERT_SUBVECTOR: {
5058 if (trySelectCastFixedLengthToScalableVector(Node))
5059 return;
5060 break;
5061 }
5062
5063 case ISD::Constant: {
5064 // Materialize zero constants as copies from WZR/XZR. This allows
5065 // the coalescer to propagate these into other instructions.
5066 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
5067 if (ConstNode->isZero()) {
5068 if (VT == MVT::i32) {
5069 SDValue New = CurDAG->getCopyFromReg(
5070 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
5071 ReplaceNode(Node, New.getNode());
5072 return;
5073 } else if (VT == MVT::i64) {
5074 SDValue New = CurDAG->getCopyFromReg(
5075 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
5076 ReplaceNode(Node, New.getNode());
5077 return;
5078 }
5079 }
5080 break;
5081 }
5082
5083 case ISD::FrameIndex: {
5084 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
5085 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
5086 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
5087 const TargetLowering *TLI = getTargetLowering();
5088 SDValue TFI = CurDAG->getTargetFrameIndex(
5089 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
5090 SDLoc DL(Node);
5091 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
5092 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
5093 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
5094 return;
5095 }
5097 unsigned IntNo = Node->getConstantOperandVal(1);
5098 switch (IntNo) {
5099 default:
5100 break;
5101 case Intrinsic::aarch64_gcsss: {
5102 SDLoc DL(Node);
5103 SDValue Chain = Node->getOperand(0);
5104 SDValue Val = Node->getOperand(2);
5105 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
5106 SDNode *SS1 =
5107 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
5108 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
5109 MVT::Other, Zero, SDValue(SS1, 0));
5110 ReplaceNode(Node, SS2);
5111 return;
5112 }
5113 case Intrinsic::aarch64_ldaxp:
5114 case Intrinsic::aarch64_ldxp: {
5115 unsigned Op =
5116 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
5117 SDValue MemAddr = Node->getOperand(2);
5118 SDLoc DL(Node);
5119 SDValue Chain = Node->getOperand(0);
5120
5121 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
5122 MVT::Other, MemAddr, Chain);
5123
5124 // Transfer memoperands.
5125 MachineMemOperand *MemOp =
5126 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
5127 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
5128 ReplaceNode(Node, Ld);
5129 return;
5130 }
5131 case Intrinsic::aarch64_stlxp:
5132 case Intrinsic::aarch64_stxp: {
5133 unsigned Op =
5134 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
5135 SDLoc DL(Node);
5136 SDValue Chain = Node->getOperand(0);
5137 SDValue ValLo = Node->getOperand(2);
5138 SDValue ValHi = Node->getOperand(3);
5139 SDValue MemAddr = Node->getOperand(4);
5140
5141 // Place arguments in the right order.
5142 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
5143
5144 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
5145 // Transfer memoperands.
5146 MachineMemOperand *MemOp =
5147 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
5148 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
5149
5150 ReplaceNode(Node, St);
5151 return;
5152 }
5153 case Intrinsic::aarch64_neon_ld1x2:
5154 if (VT == MVT::v8i8) {
5155 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
5156 return;
5157 } else if (VT == MVT::v16i8) {
5158 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
5159 return;
5160 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5161 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
5162 return;
5163 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5164 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
5165 return;
5166 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5167 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
5168 return;
5169 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5170 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
5171 return;
5172 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5173 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5174 return;
5175 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5176 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
5177 return;
5178 }
5179 break;
5180 case Intrinsic::aarch64_neon_ld1x3:
5181 if (VT == MVT::v8i8) {
5182 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
5183 return;
5184 } else if (VT == MVT::v16i8) {
5185 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
5186 return;
5187 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5188 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5189 return;
5190 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5191 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5192 return;
5193 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5194 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5195 return;
5196 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5197 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5198 return;
5199 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5200 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5201 return;
5202 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5203 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5204 return;
5205 }
5206 break;
5207 case Intrinsic::aarch64_neon_ld1x4:
5208 if (VT == MVT::v8i8) {
5209 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5210 return;
5211 } else if (VT == MVT::v16i8) {
5212 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5213 return;
5214 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5215 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5216 return;
5217 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5218 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5219 return;
5220 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5221 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5222 return;
5223 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5224 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5225 return;
5226 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5227 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5228 return;
5229 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5230 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5231 return;
5232 }
5233 break;
5234 case Intrinsic::aarch64_neon_ld2:
5235 if (VT == MVT::v8i8) {
5236 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5237 return;
5238 } else if (VT == MVT::v16i8) {
5239 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5240 return;
5241 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5242 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5243 return;
5244 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5245 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5246 return;
5247 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5248 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5249 return;
5250 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5251 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5252 return;
5253 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5254 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5255 return;
5256 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5257 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5258 return;
5259 }
5260 break;
5261 case Intrinsic::aarch64_neon_ld3:
5262 if (VT == MVT::v8i8) {
5263 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5264 return;
5265 } else if (VT == MVT::v16i8) {
5266 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5267 return;
5268 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5269 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5270 return;
5271 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5272 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5273 return;
5274 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5275 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5276 return;
5277 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5278 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5279 return;
5280 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5281 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5282 return;
5283 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5284 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5285 return;
5286 }
5287 break;
5288 case Intrinsic::aarch64_neon_ld4:
5289 if (VT == MVT::v8i8) {
5290 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5291 return;
5292 } else if (VT == MVT::v16i8) {
5293 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5294 return;
5295 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5296 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5297 return;
5298 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5299 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5300 return;
5301 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5302 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5303 return;
5304 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5305 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5306 return;
5307 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5308 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5309 return;
5310 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5311 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5312 return;
5313 }
5314 break;
5315 case Intrinsic::aarch64_neon_ld2r:
5316 if (VT == MVT::v8i8) {
5317 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5318 return;
5319 } else if (VT == MVT::v16i8) {
5320 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5321 return;
5322 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5323 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5324 return;
5325 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5326 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5327 return;
5328 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5329 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5330 return;
5331 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5332 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5333 return;
5334 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5335 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5336 return;
5337 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5338 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5339 return;
5340 }
5341 break;
5342 case Intrinsic::aarch64_neon_ld3r:
5343 if (VT == MVT::v8i8) {
5344 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5345 return;
5346 } else if (VT == MVT::v16i8) {
5347 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5348 return;
5349 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5350 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5351 return;
5352 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5353 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5354 return;
5355 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5356 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5357 return;
5358 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5359 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5360 return;
5361 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5362 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5363 return;
5364 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5365 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5366 return;
5367 }
5368 break;
5369 case Intrinsic::aarch64_neon_ld4r:
5370 if (VT == MVT::v8i8) {
5371 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5372 return;
5373 } else if (VT == MVT::v16i8) {
5374 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5375 return;
5376 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5377 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5378 return;
5379 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5380 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5381 return;
5382 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5383 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5384 return;
5385 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5386 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5387 return;
5388 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5389 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5390 return;
5391 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5392 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5393 return;
5394 }
5395 break;
5396 case Intrinsic::aarch64_neon_ld2lane:
5397 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5398 SelectLoadLane(Node, 2, AArch64::LD2i8);
5399 return;
5400 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5401 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5402 SelectLoadLane(Node, 2, AArch64::LD2i16);
5403 return;
5404 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5405 VT == MVT::v2f32) {
5406 SelectLoadLane(Node, 2, AArch64::LD2i32);
5407 return;
5408 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5409 VT == MVT::v1f64) {
5410 SelectLoadLane(Node, 2, AArch64::LD2i64);
5411 return;
5412 }
5413 break;
5414 case Intrinsic::aarch64_neon_ld3lane:
5415 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5416 SelectLoadLane(Node, 3, AArch64::LD3i8);
5417 return;
5418 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5419 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5420 SelectLoadLane(Node, 3, AArch64::LD3i16);
5421 return;
5422 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5423 VT == MVT::v2f32) {
5424 SelectLoadLane(Node, 3, AArch64::LD3i32);
5425 return;
5426 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5427 VT == MVT::v1f64) {
5428 SelectLoadLane(Node, 3, AArch64::LD3i64);
5429 return;
5430 }
5431 break;
5432 case Intrinsic::aarch64_neon_ld4lane:
5433 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5434 SelectLoadLane(Node, 4, AArch64::LD4i8);
5435 return;
5436 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5437 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5438 SelectLoadLane(Node, 4, AArch64::LD4i16);
5439 return;
5440 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5441 VT == MVT::v2f32) {
5442 SelectLoadLane(Node, 4, AArch64::LD4i32);
5443 return;
5444 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5445 VT == MVT::v1f64) {
5446 SelectLoadLane(Node, 4, AArch64::LD4i64);
5447 return;
5448 }
5449 break;
5450 case Intrinsic::aarch64_ld64b:
5451 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5452 return;
5453 case Intrinsic::aarch64_sve_ld2q_sret: {
5454 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5455 return;
5456 }
5457 case Intrinsic::aarch64_sve_ld3q_sret: {
5458 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5459 return;
5460 }
5461 case Intrinsic::aarch64_sve_ld4q_sret: {
5462 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5463 return;
5464 }
5465 case Intrinsic::aarch64_sve_ld2_sret: {
5466 if (VT == MVT::nxv16i8) {
5467 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5468 true);
5469 return;
5470 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5471 VT == MVT::nxv8bf16) {
5472 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5473 true);
5474 return;
5475 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5476 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5477 true);
5478 return;
5479 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5480 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5481 true);
5482 return;
5483 }
5484 break;
5485 }
5486 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5487 if (VT == MVT::nxv16i8) {
5488 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5489 SelectContiguousMultiVectorLoad(
5490 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5491 else if (Subtarget->hasSVE2p1())
5492 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5493 AArch64::LD1B_2Z);
5494 else
5495 break;
5496 return;
5497 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5498 VT == MVT::nxv8bf16) {
5499 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5500 SelectContiguousMultiVectorLoad(
5501 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5502 else if (Subtarget->hasSVE2p1())
5503 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5504 AArch64::LD1H_2Z);
5505 else
5506 break;
5507 return;
5508 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5509 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5510 SelectContiguousMultiVectorLoad(
5511 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5512 else if (Subtarget->hasSVE2p1())
5513 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5514 AArch64::LD1W_2Z);
5515 else
5516 break;
5517 return;
5518 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5519 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5520 SelectContiguousMultiVectorLoad(
5521 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5522 else if (Subtarget->hasSVE2p1())
5523 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5524 AArch64::LD1D_2Z);
5525 else
5526 break;
5527 return;
5528 }
5529 break;
5530 }
5531 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5532 if (VT == MVT::nxv16i8) {
5533 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5534 SelectContiguousMultiVectorLoad(
5535 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5536 else if (Subtarget->hasSVE2p1())
5537 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5538 AArch64::LD1B_4Z);
5539 else
5540 break;
5541 return;
5542 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5543 VT == MVT::nxv8bf16) {
5544 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5545 SelectContiguousMultiVectorLoad(
5546 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5547 else if (Subtarget->hasSVE2p1())
5548 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5549 AArch64::LD1H_4Z);
5550 else
5551 break;
5552 return;
5553 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5554 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5555 SelectContiguousMultiVectorLoad(
5556 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5557 else if (Subtarget->hasSVE2p1())
5558 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5559 AArch64::LD1W_4Z);
5560 else
5561 break;
5562 return;
5563 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5564 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5565 SelectContiguousMultiVectorLoad(
5566 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5567 else if (Subtarget->hasSVE2p1())
5568 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5569 AArch64::LD1D_4Z);
5570 else
5571 break;
5572 return;
5573 }
5574 break;
5575 }
5576 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5577 if (VT == MVT::nxv16i8) {
5578 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5579 SelectContiguousMultiVectorLoad(Node, 2, 0,
5580 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5581 AArch64::LDNT1B_2Z_PSEUDO);
5582 else if (Subtarget->hasSVE2p1())
5583 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5584 AArch64::LDNT1B_2Z);
5585 else
5586 break;
5587 return;
5588 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5589 VT == MVT::nxv8bf16) {
5590 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5591 SelectContiguousMultiVectorLoad(Node, 2, 1,
5592 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5593 AArch64::LDNT1H_2Z_PSEUDO);
5594 else if (Subtarget->hasSVE2p1())
5595 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5596 AArch64::LDNT1H_2Z);
5597 else
5598 break;
5599 return;
5600 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5601 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5602 SelectContiguousMultiVectorLoad(Node, 2, 2,
5603 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5604 AArch64::LDNT1W_2Z_PSEUDO);
5605 else if (Subtarget->hasSVE2p1())
5606 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5607 AArch64::LDNT1W_2Z);
5608 else
5609 break;
5610 return;
5611 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5612 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5613 SelectContiguousMultiVectorLoad(Node, 2, 3,
5614 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5615 AArch64::LDNT1D_2Z_PSEUDO);
5616 else if (Subtarget->hasSVE2p1())
5617 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5618 AArch64::LDNT1D_2Z);
5619 else
5620 break;
5621 return;
5622 }
5623 break;
5624 }
5625 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5626 if (VT == MVT::nxv16i8) {
5627 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5628 SelectContiguousMultiVectorLoad(Node, 4, 0,
5629 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5630 AArch64::LDNT1B_4Z_PSEUDO);
5631 else if (Subtarget->hasSVE2p1())
5632 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5633 AArch64::LDNT1B_4Z);
5634 else
5635 break;
5636 return;
5637 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5638 VT == MVT::nxv8bf16) {
5639 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5640 SelectContiguousMultiVectorLoad(Node, 4, 1,
5641 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5642 AArch64::LDNT1H_4Z_PSEUDO);
5643 else if (Subtarget->hasSVE2p1())
5644 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5645 AArch64::LDNT1H_4Z);
5646 else
5647 break;
5648 return;
5649 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5650 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5651 SelectContiguousMultiVectorLoad(Node, 4, 2,
5652 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5653 AArch64::LDNT1W_4Z_PSEUDO);
5654 else if (Subtarget->hasSVE2p1())
5655 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5656 AArch64::LDNT1W_4Z);
5657 else
5658 break;
5659 return;
5660 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5661 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5662 SelectContiguousMultiVectorLoad(Node, 4, 3,
5663 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5664 AArch64::LDNT1D_4Z_PSEUDO);
5665 else if (Subtarget->hasSVE2p1())
5666 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5667 AArch64::LDNT1D_4Z);
5668 else
5669 break;
5670 return;
5671 }
5672 break;
5673 }
5674 case Intrinsic::aarch64_sve_ld3_sret: {
5675 if (VT == MVT::nxv16i8) {
5676 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5677 true);
5678 return;
5679 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5680 VT == MVT::nxv8bf16) {
5681 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5682 true);
5683 return;
5684 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5685 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5686 true);
5687 return;
5688 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5689 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5690 true);
5691 return;
5692 }
5693 break;
5694 }
5695 case Intrinsic::aarch64_sve_ld4_sret: {
5696 if (VT == MVT::nxv16i8) {
5697 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5698 true);
5699 return;
5700 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5701 VT == MVT::nxv8bf16) {
5702 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5703 true);
5704 return;
5705 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5706 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5707 true);
5708 return;
5709 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5710 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5711 true);
5712 return;
5713 }
5714 break;
5715 }
5716 case Intrinsic::aarch64_sme_read_hor_vg2: {
5717 if (VT == MVT::nxv16i8) {
5718 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5719 AArch64::MOVA_2ZMXI_H_B);
5720 return;
5721 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5722 VT == MVT::nxv8bf16) {
5723 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5724 AArch64::MOVA_2ZMXI_H_H);
5725 return;
5726 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5727 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5728 AArch64::MOVA_2ZMXI_H_S);
5729 return;
5730 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5731 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5732 AArch64::MOVA_2ZMXI_H_D);
5733 return;
5734 }
5735 break;
5736 }
5737 case Intrinsic::aarch64_sme_read_ver_vg2: {
5738 if (VT == MVT::nxv16i8) {
5739 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5740 AArch64::MOVA_2ZMXI_V_B);
5741 return;
5742 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5743 VT == MVT::nxv8bf16) {
5744 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5745 AArch64::MOVA_2ZMXI_V_H);
5746 return;
5747 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5748 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5749 AArch64::MOVA_2ZMXI_V_S);
5750 return;
5751 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5752 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5753 AArch64::MOVA_2ZMXI_V_D);
5754 return;
5755 }
5756 break;
5757 }
5758 case Intrinsic::aarch64_sme_read_hor_vg4: {
5759 if (VT == MVT::nxv16i8) {
5760 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5761 AArch64::MOVA_4ZMXI_H_B);
5762 return;
5763 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5764 VT == MVT::nxv8bf16) {
5765 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5766 AArch64::MOVA_4ZMXI_H_H);
5767 return;
5768 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5769 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5770 AArch64::MOVA_4ZMXI_H_S);
5771 return;
5772 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5773 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5774 AArch64::MOVA_4ZMXI_H_D);
5775 return;
5776 }
5777 break;
5778 }
5779 case Intrinsic::aarch64_sme_read_ver_vg4: {
5780 if (VT == MVT::nxv16i8) {
5781 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5782 AArch64::MOVA_4ZMXI_V_B);
5783 return;
5784 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5785 VT == MVT::nxv8bf16) {
5786 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5787 AArch64::MOVA_4ZMXI_V_H);
5788 return;
5789 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5790 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5791 AArch64::MOVA_4ZMXI_V_S);
5792 return;
5793 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5794 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5795 AArch64::MOVA_4ZMXI_V_D);
5796 return;
5797 }
5798 break;
5799 }
5800 case Intrinsic::aarch64_sme_read_vg1x2: {
5801 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5802 AArch64::MOVA_VG2_2ZMXI);
5803 return;
5804 }
5805 case Intrinsic::aarch64_sme_read_vg1x4: {
5806 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5807 AArch64::MOVA_VG4_4ZMXI);
5808 return;
5809 }
5810 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5811 if (VT == MVT::nxv16i8) {
5812 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5813 return;
5814 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5815 VT == MVT::nxv8bf16) {
5816 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5817 return;
5818 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5819 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5820 return;
5821 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5822 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5823 return;
5824 }
5825 break;
5826 }
5827 case Intrinsic::aarch64_sme_readz_vert_x2: {
5828 if (VT == MVT::nxv16i8) {
5829 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5830 return;
5831 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5832 VT == MVT::nxv8bf16) {
5833 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5834 return;
5835 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5836 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5837 return;
5838 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5839 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5840 return;
5841 }
5842 break;
5843 }
5844 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5845 if (VT == MVT::nxv16i8) {
5846 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5847 return;
5848 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5849 VT == MVT::nxv8bf16) {
5850 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5851 return;
5852 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5853 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5854 return;
5855 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5856 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5857 return;
5858 }
5859 break;
5860 }
5861 case Intrinsic::aarch64_sme_readz_vert_x4: {
5862 if (VT == MVT::nxv16i8) {
5863 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5864 return;
5865 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5866 VT == MVT::nxv8bf16) {
5867 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5868 return;
5869 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5870 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5871 return;
5872 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5873 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5874 return;
5875 }
5876 break;
5877 }
5878 case Intrinsic::aarch64_sme_readz_x2: {
5879 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5880 AArch64::ZA);
5881 return;
5882 }
5883 case Intrinsic::aarch64_sme_readz_x4: {
5884 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5885 AArch64::ZA);
5886 return;
5887 }
5888 case Intrinsic::swift_async_context_addr: {
5889 SDLoc DL(Node);
5890 SDValue Chain = Node->getOperand(0);
5891 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5892 SDValue Res = SDValue(
5893 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5894 CurDAG->getTargetConstant(8, DL, MVT::i32),
5895 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5896 0);
5897 ReplaceUses(SDValue(Node, 0), Res);
5898 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5899 CurDAG->RemoveDeadNode(Node);
5900
5901 auto &MF = CurDAG->getMachineFunction();
5902 MF.getFrameInfo().setFrameAddressIsTaken(true);
5903 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5904 return;
5905 }
5906 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5908 Node->getValueType(0),
5909 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5910 AArch64::LUTI2_4ZTZI_S}))
5911 // Second Immediate must be <= 3:
5912 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5913 return;
5914 }
5915 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5917 Node->getValueType(0),
5918 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5919 // Second Immediate must be <= 1:
5920 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5921 return;
5922 }
5923 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5925 Node->getValueType(0),
5926 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5927 AArch64::LUTI2_2ZTZI_S}))
5928 // Second Immediate must be <= 7:
5929 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5930 return;
5931 }
5932 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5934 Node->getValueType(0),
5935 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5936 AArch64::LUTI4_2ZTZI_S}))
5937 // Second Immediate must be <= 3:
5938 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5939 return;
5940 }
5941 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5942 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5943 return;
5944 }
5945 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5947 Node->getValueType(0),
5948 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5949 SelectCVTIntrinsicFP8(Node, 2, Opc);
5950 return;
5951 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5953 Node->getValueType(0),
5954 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5955 SelectCVTIntrinsicFP8(Node, 2, Opc);
5956 return;
5957 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5959 Node->getValueType(0),
5960 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5961 SelectCVTIntrinsicFP8(Node, 2, Opc);
5962 return;
5963 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5965 Node->getValueType(0),
5966 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5967 SelectCVTIntrinsicFP8(Node, 2, Opc);
5968 return;
5969 case Intrinsic::ptrauth_resign_load_relative:
5970 SelectPtrauthResign(Node);
5971 return;
5972 }
5973 } break;
5975 unsigned IntNo = Node->getConstantOperandVal(0);
5976 switch (IntNo) {
5977 default:
5978 break;
5979 case Intrinsic::aarch64_tagp:
5980 SelectTagP(Node);
5981 return;
5982
5983 case Intrinsic::ptrauth_auth:
5984 SelectPtrauthAuth(Node);
5985 return;
5986
5987 case Intrinsic::ptrauth_resign:
5988 SelectPtrauthResign(Node);
5989 return;
5990
5991 case Intrinsic::aarch64_neon_tbl2:
5992 SelectTable(Node, 2,
5993 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5994 false);
5995 return;
5996 case Intrinsic::aarch64_neon_tbl3:
5997 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5998 : AArch64::TBLv16i8Three,
5999 false);
6000 return;
6001 case Intrinsic::aarch64_neon_tbl4:
6002 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
6003 : AArch64::TBLv16i8Four,
6004 false);
6005 return;
6006 case Intrinsic::aarch64_neon_tbx2:
6007 SelectTable(Node, 2,
6008 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
6009 true);
6010 return;
6011 case Intrinsic::aarch64_neon_tbx3:
6012 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
6013 : AArch64::TBXv16i8Three,
6014 true);
6015 return;
6016 case Intrinsic::aarch64_neon_tbx4:
6017 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
6018 : AArch64::TBXv16i8Four,
6019 true);
6020 return;
6021 case Intrinsic::aarch64_sve_srshl_single_x2:
6023 Node->getValueType(0),
6024 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
6025 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
6026 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6027 return;
6028 case Intrinsic::aarch64_sve_srshl_single_x4:
6030 Node->getValueType(0),
6031 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
6032 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
6033 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6034 return;
6035 case Intrinsic::aarch64_sve_urshl_single_x2:
6037 Node->getValueType(0),
6038 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
6039 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
6040 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6041 return;
6042 case Intrinsic::aarch64_sve_urshl_single_x4:
6044 Node->getValueType(0),
6045 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
6046 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
6047 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6048 return;
6049 case Intrinsic::aarch64_sve_srshl_x2:
6051 Node->getValueType(0),
6052 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
6053 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
6054 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6055 return;
6056 case Intrinsic::aarch64_sve_srshl_x4:
6058 Node->getValueType(0),
6059 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
6060 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
6061 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6062 return;
6063 case Intrinsic::aarch64_sve_urshl_x2:
6065 Node->getValueType(0),
6066 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
6067 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
6068 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6069 return;
6070 case Intrinsic::aarch64_sve_urshl_x4:
6072 Node->getValueType(0),
6073 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
6074 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
6075 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6076 return;
6077 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
6079 Node->getValueType(0),
6080 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
6081 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
6082 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6083 return;
6084 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
6086 Node->getValueType(0),
6087 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
6088 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
6089 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6090 return;
6091 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
6093 Node->getValueType(0),
6094 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
6095 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
6096 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6097 return;
6098 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
6100 Node->getValueType(0),
6101 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
6102 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
6103 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6104 return;
6105 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
6107 Node->getValueType(0),
6108 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
6109 AArch64::FSCALE_2ZZ_D}))
6110 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6111 return;
6112 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
6114 Node->getValueType(0),
6115 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
6116 AArch64::FSCALE_4ZZ_D}))
6117 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6118 return;
6119 case Intrinsic::aarch64_sme_fp8_scale_x2:
6121 Node->getValueType(0),
6122 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
6123 AArch64::FSCALE_2Z2Z_D}))
6124 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6125 return;
6126 case Intrinsic::aarch64_sme_fp8_scale_x4:
6128 Node->getValueType(0),
6129 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
6130 AArch64::FSCALE_4Z4Z_D}))
6131 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6132 return;
6133 case Intrinsic::aarch64_sve_whilege_x2:
6135 Node->getValueType(0),
6136 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
6137 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
6138 SelectWhilePair(Node, Op);
6139 return;
6140 case Intrinsic::aarch64_sve_whilegt_x2:
6142 Node->getValueType(0),
6143 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
6144 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
6145 SelectWhilePair(Node, Op);
6146 return;
6147 case Intrinsic::aarch64_sve_whilehi_x2:
6149 Node->getValueType(0),
6150 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
6151 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
6152 SelectWhilePair(Node, Op);
6153 return;
6154 case Intrinsic::aarch64_sve_whilehs_x2:
6156 Node->getValueType(0),
6157 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
6158 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
6159 SelectWhilePair(Node, Op);
6160 return;
6161 case Intrinsic::aarch64_sve_whilele_x2:
6163 Node->getValueType(0),
6164 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
6165 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
6166 SelectWhilePair(Node, Op);
6167 return;
6168 case Intrinsic::aarch64_sve_whilelo_x2:
6170 Node->getValueType(0),
6171 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
6172 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
6173 SelectWhilePair(Node, Op);
6174 return;
6175 case Intrinsic::aarch64_sve_whilels_x2:
6177 Node->getValueType(0),
6178 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
6179 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
6180 SelectWhilePair(Node, Op);
6181 return;
6182 case Intrinsic::aarch64_sve_whilelt_x2:
6184 Node->getValueType(0),
6185 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
6186 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
6187 SelectWhilePair(Node, Op);
6188 return;
6189 case Intrinsic::aarch64_sve_smax_single_x2:
6191 Node->getValueType(0),
6192 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6193 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6194 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6195 return;
6196 case Intrinsic::aarch64_sve_umax_single_x2:
6198 Node->getValueType(0),
6199 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6200 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6201 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6202 return;
6203 case Intrinsic::aarch64_sve_fmax_single_x2:
6205 Node->getValueType(0),
6206 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6207 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6208 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6209 return;
6210 case Intrinsic::aarch64_sve_smax_single_x4:
6212 Node->getValueType(0),
6213 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6214 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6215 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6216 return;
6217 case Intrinsic::aarch64_sve_umax_single_x4:
6219 Node->getValueType(0),
6220 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6221 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6222 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6223 return;
6224 case Intrinsic::aarch64_sve_fmax_single_x4:
6226 Node->getValueType(0),
6227 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6228 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6229 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6230 return;
6231 case Intrinsic::aarch64_sve_smin_single_x2:
6233 Node->getValueType(0),
6234 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6235 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6236 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6237 return;
6238 case Intrinsic::aarch64_sve_umin_single_x2:
6240 Node->getValueType(0),
6241 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6242 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6243 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6244 return;
6245 case Intrinsic::aarch64_sve_fmin_single_x2:
6247 Node->getValueType(0),
6248 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6249 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6250 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6251 return;
6252 case Intrinsic::aarch64_sve_smin_single_x4:
6254 Node->getValueType(0),
6255 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6256 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6257 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6258 return;
6259 case Intrinsic::aarch64_sve_umin_single_x4:
6261 Node->getValueType(0),
6262 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6263 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6264 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6265 return;
6266 case Intrinsic::aarch64_sve_fmin_single_x4:
6268 Node->getValueType(0),
6269 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6270 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6271 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6272 return;
6273 case Intrinsic::aarch64_sve_smax_x2:
6275 Node->getValueType(0),
6276 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6277 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6278 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6279 return;
6280 case Intrinsic::aarch64_sve_umax_x2:
6282 Node->getValueType(0),
6283 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6284 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6285 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6286 return;
6287 case Intrinsic::aarch64_sve_fmax_x2:
6289 Node->getValueType(0),
6290 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6291 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6292 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6293 return;
6294 case Intrinsic::aarch64_sve_smax_x4:
6296 Node->getValueType(0),
6297 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6298 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6299 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6300 return;
6301 case Intrinsic::aarch64_sve_umax_x4:
6303 Node->getValueType(0),
6304 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6305 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6306 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6307 return;
6308 case Intrinsic::aarch64_sve_fmax_x4:
6310 Node->getValueType(0),
6311 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6312 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6313 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6314 return;
6315 case Intrinsic::aarch64_sme_famax_x2:
6317 Node->getValueType(0),
6318 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6319 AArch64::FAMAX_2Z2Z_D}))
6320 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6321 return;
6322 case Intrinsic::aarch64_sme_famax_x4:
6324 Node->getValueType(0),
6325 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6326 AArch64::FAMAX_4Z4Z_D}))
6327 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6328 return;
6329 case Intrinsic::aarch64_sme_famin_x2:
6331 Node->getValueType(0),
6332 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6333 AArch64::FAMIN_2Z2Z_D}))
6334 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6335 return;
6336 case Intrinsic::aarch64_sme_famin_x4:
6338 Node->getValueType(0),
6339 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6340 AArch64::FAMIN_4Z4Z_D}))
6341 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6342 return;
6343 case Intrinsic::aarch64_sve_smin_x2:
6345 Node->getValueType(0),
6346 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6347 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6348 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6349 return;
6350 case Intrinsic::aarch64_sve_umin_x2:
6352 Node->getValueType(0),
6353 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6354 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6355 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6356 return;
6357 case Intrinsic::aarch64_sve_fmin_x2:
6359 Node->getValueType(0),
6360 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6361 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6362 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6363 return;
6364 case Intrinsic::aarch64_sve_smin_x4:
6366 Node->getValueType(0),
6367 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6368 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6369 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6370 return;
6371 case Intrinsic::aarch64_sve_umin_x4:
6373 Node->getValueType(0),
6374 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6375 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6376 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6377 return;
6378 case Intrinsic::aarch64_sve_fmin_x4:
6380 Node->getValueType(0),
6381 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6382 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6383 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6384 return;
6385 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6387 Node->getValueType(0),
6388 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6389 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6390 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6391 return;
6392 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6394 Node->getValueType(0),
6395 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6396 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6397 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6398 return;
6399 case Intrinsic::aarch64_sve_fminnm_single_x2:
6401 Node->getValueType(0),
6402 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6403 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6404 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6405 return;
6406 case Intrinsic::aarch64_sve_fminnm_single_x4:
6408 Node->getValueType(0),
6409 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6410 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6411 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6412 return;
6413 case Intrinsic::aarch64_sve_fscale_single_x4:
6414 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::BFSCALE_4ZZ);
6415 return;
6416 case Intrinsic::aarch64_sve_fscale_single_x2:
6417 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::BFSCALE_2ZZ);
6418 return;
6419 case Intrinsic::aarch64_sve_fmul_single_x4:
6421 Node->getValueType(0),
6422 {AArch64::BFMUL_4ZZ, AArch64::FMUL_4ZZ_H, AArch64::FMUL_4ZZ_S,
6423 AArch64::FMUL_4ZZ_D}))
6424 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6425 return;
6426 case Intrinsic::aarch64_sve_fmul_single_x2:
6428 Node->getValueType(0),
6429 {AArch64::BFMUL_2ZZ, AArch64::FMUL_2ZZ_H, AArch64::FMUL_2ZZ_S,
6430 AArch64::FMUL_2ZZ_D}))
6431 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6432 return;
6433 case Intrinsic::aarch64_sve_fmaxnm_x2:
6435 Node->getValueType(0),
6436 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6437 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6438 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6439 return;
6440 case Intrinsic::aarch64_sve_fmaxnm_x4:
6442 Node->getValueType(0),
6443 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6444 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6445 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6446 return;
6447 case Intrinsic::aarch64_sve_fminnm_x2:
6449 Node->getValueType(0),
6450 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6451 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6452 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6453 return;
6454 case Intrinsic::aarch64_sve_fminnm_x4:
6456 Node->getValueType(0),
6457 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6458 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6459 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6460 return;
6461 case Intrinsic::aarch64_sve_aese_lane_x2:
6462 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESE_2ZZI_B);
6463 return;
6464 case Intrinsic::aarch64_sve_aesd_lane_x2:
6465 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESD_2ZZI_B);
6466 return;
6467 case Intrinsic::aarch64_sve_aesemc_lane_x2:
6468 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESEMC_2ZZI_B);
6469 return;
6470 case Intrinsic::aarch64_sve_aesdimc_lane_x2:
6471 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESDIMC_2ZZI_B);
6472 return;
6473 case Intrinsic::aarch64_sve_aese_lane_x4:
6474 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESE_4ZZI_B);
6475 return;
6476 case Intrinsic::aarch64_sve_aesd_lane_x4:
6477 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESD_4ZZI_B);
6478 return;
6479 case Intrinsic::aarch64_sve_aesemc_lane_x4:
6480 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESEMC_4ZZI_B);
6481 return;
6482 case Intrinsic::aarch64_sve_aesdimc_lane_x4:
6483 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESDIMC_4ZZI_B);
6484 return;
6485 case Intrinsic::aarch64_sve_pmlal_pair_x2:
6486 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::PMLAL_2ZZZ_Q);
6487 return;
6488 case Intrinsic::aarch64_sve_pmull_pair_x2: {
6489 SDLoc DL(Node);
6490 SmallVector<SDValue, 4> Regs(Node->ops().slice(1, 2));
6491 SDNode *Res =
6492 CurDAG->getMachineNode(AArch64::PMULL_2ZZZ_Q, DL, MVT::Untyped, Regs);
6493 SDValue SuperReg = SDValue(Res, 0);
6494 for (unsigned I = 0; I < 2; I++)
6495 ReplaceUses(SDValue(Node, I),
6496 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
6497 SuperReg));
6498 CurDAG->RemoveDeadNode(Node);
6499 return;
6500 }
6501 case Intrinsic::aarch64_sve_fscale_x4:
6502 SelectDestructiveMultiIntrinsic(Node, 4, true, AArch64::BFSCALE_4Z4Z);
6503 return;
6504 case Intrinsic::aarch64_sve_fscale_x2:
6505 SelectDestructiveMultiIntrinsic(Node, 2, true, AArch64::BFSCALE_2Z2Z);
6506 return;
6507 case Intrinsic::aarch64_sve_fmul_x4:
6509 Node->getValueType(0),
6510 {AArch64::BFMUL_4Z4Z, AArch64::FMUL_4Z4Z_H, AArch64::FMUL_4Z4Z_S,
6511 AArch64::FMUL_4Z4Z_D}))
6512 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6513 return;
6514 case Intrinsic::aarch64_sve_fmul_x2:
6516 Node->getValueType(0),
6517 {AArch64::BFMUL_2Z2Z, AArch64::FMUL_2Z2Z_H, AArch64::FMUL_2Z2Z_S,
6518 AArch64::FMUL_2Z2Z_D}))
6519 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6520 return;
6521 case Intrinsic::aarch64_sve_fcvtzs_x2:
6522 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6523 return;
6524 case Intrinsic::aarch64_sve_scvtf_x2:
6525 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6526 return;
6527 case Intrinsic::aarch64_sve_fcvtzu_x2:
6528 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6529 return;
6530 case Intrinsic::aarch64_sve_ucvtf_x2:
6531 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6532 return;
6533 case Intrinsic::aarch64_sve_fcvtzs_x4:
6534 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6535 return;
6536 case Intrinsic::aarch64_sve_scvtf_x4:
6537 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6538 return;
6539 case Intrinsic::aarch64_sve_fcvtzu_x4:
6540 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6541 return;
6542 case Intrinsic::aarch64_sve_ucvtf_x4:
6543 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6544 return;
6545 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6546 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6547 return;
6548 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6549 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6550 return;
6551 case Intrinsic::aarch64_sve_sclamp_single_x2:
6553 Node->getValueType(0),
6554 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6555 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6556 SelectClamp(Node, 2, Op);
6557 return;
6558 case Intrinsic::aarch64_sve_uclamp_single_x2:
6560 Node->getValueType(0),
6561 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6562 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6563 SelectClamp(Node, 2, Op);
6564 return;
6565 case Intrinsic::aarch64_sve_fclamp_single_x2:
6567 Node->getValueType(0),
6568 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6569 AArch64::FCLAMP_VG2_2Z2Z_D}))
6570 SelectClamp(Node, 2, Op);
6571 return;
6572 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6573 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6574 return;
6575 case Intrinsic::aarch64_sve_sclamp_single_x4:
6577 Node->getValueType(0),
6578 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6579 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6580 SelectClamp(Node, 4, Op);
6581 return;
6582 case Intrinsic::aarch64_sve_uclamp_single_x4:
6584 Node->getValueType(0),
6585 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6586 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6587 SelectClamp(Node, 4, Op);
6588 return;
6589 case Intrinsic::aarch64_sve_fclamp_single_x4:
6591 Node->getValueType(0),
6592 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6593 AArch64::FCLAMP_VG4_4Z4Z_D}))
6594 SelectClamp(Node, 4, Op);
6595 return;
6596 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6597 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6598 return;
6599 case Intrinsic::aarch64_sve_add_single_x2:
6601 Node->getValueType(0),
6602 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6603 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6604 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6605 return;
6606 case Intrinsic::aarch64_sve_add_single_x4:
6608 Node->getValueType(0),
6609 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6610 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6611 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6612 return;
6613 case Intrinsic::aarch64_sve_zip_x2:
6615 Node->getValueType(0),
6616 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6617 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6618 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6619 return;
6620 case Intrinsic::aarch64_sve_zipq_x2:
6621 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6622 AArch64::ZIP_VG2_2ZZZ_Q);
6623 return;
6624 case Intrinsic::aarch64_sve_zip_x4:
6626 Node->getValueType(0),
6627 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6628 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6629 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6630 return;
6631 case Intrinsic::aarch64_sve_zipq_x4:
6632 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6633 AArch64::ZIP_VG4_4Z4Z_Q);
6634 return;
6635 case Intrinsic::aarch64_sve_uzp_x2:
6637 Node->getValueType(0),
6638 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6639 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6640 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6641 return;
6642 case Intrinsic::aarch64_sve_uzpq_x2:
6643 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6644 AArch64::UZP_VG2_2ZZZ_Q);
6645 return;
6646 case Intrinsic::aarch64_sve_uzp_x4:
6648 Node->getValueType(0),
6649 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6650 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6651 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6652 return;
6653 case Intrinsic::aarch64_sve_uzpq_x4:
6654 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6655 AArch64::UZP_VG4_4Z4Z_Q);
6656 return;
6657 case Intrinsic::aarch64_sve_sel_x2:
6659 Node->getValueType(0),
6660 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6661 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6662 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6663 return;
6664 case Intrinsic::aarch64_sve_sel_x4:
6666 Node->getValueType(0),
6667 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6668 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6669 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6670 return;
6671 case Intrinsic::aarch64_sve_frinta_x2:
6672 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6673 return;
6674 case Intrinsic::aarch64_sve_frinta_x4:
6675 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6676 return;
6677 case Intrinsic::aarch64_sve_frintm_x2:
6678 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6679 return;
6680 case Intrinsic::aarch64_sve_frintm_x4:
6681 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6682 return;
6683 case Intrinsic::aarch64_sve_frintn_x2:
6684 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6685 return;
6686 case Intrinsic::aarch64_sve_frintn_x4:
6687 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6688 return;
6689 case Intrinsic::aarch64_sve_frintp_x2:
6690 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6691 return;
6692 case Intrinsic::aarch64_sve_frintp_x4:
6693 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6694 return;
6695 case Intrinsic::aarch64_sve_sunpk_x2:
6697 Node->getValueType(0),
6698 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6699 AArch64::SUNPK_VG2_2ZZ_D}))
6700 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6701 return;
6702 case Intrinsic::aarch64_sve_uunpk_x2:
6704 Node->getValueType(0),
6705 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6706 AArch64::UUNPK_VG2_2ZZ_D}))
6707 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6708 return;
6709 case Intrinsic::aarch64_sve_sunpk_x4:
6711 Node->getValueType(0),
6712 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6713 AArch64::SUNPK_VG4_4Z2Z_D}))
6714 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6715 return;
6716 case Intrinsic::aarch64_sve_uunpk_x4:
6718 Node->getValueType(0),
6719 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6720 AArch64::UUNPK_VG4_4Z2Z_D}))
6721 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6722 return;
6723 case Intrinsic::aarch64_sve_pext_x2: {
6725 Node->getValueType(0),
6726 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6727 AArch64::PEXT_2PCI_D}))
6728 SelectPExtPair(Node, Op);
6729 return;
6730 }
6731 }
6732 break;
6733 }
6734 case ISD::INTRINSIC_VOID: {
6735 unsigned IntNo = Node->getConstantOperandVal(1);
6736 if (Node->getNumOperands() >= 3)
6737 VT = Node->getOperand(2)->getValueType(0);
6738 switch (IntNo) {
6739 default:
6740 break;
6741 case Intrinsic::aarch64_neon_st1x2: {
6742 if (VT == MVT::v8i8) {
6743 SelectStore(Node, 2, AArch64::ST1Twov8b);
6744 return;
6745 } else if (VT == MVT::v16i8) {
6746 SelectStore(Node, 2, AArch64::ST1Twov16b);
6747 return;
6748 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6749 VT == MVT::v4bf16) {
6750 SelectStore(Node, 2, AArch64::ST1Twov4h);
6751 return;
6752 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6753 VT == MVT::v8bf16) {
6754 SelectStore(Node, 2, AArch64::ST1Twov8h);
6755 return;
6756 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6757 SelectStore(Node, 2, AArch64::ST1Twov2s);
6758 return;
6759 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6760 SelectStore(Node, 2, AArch64::ST1Twov4s);
6761 return;
6762 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6763 SelectStore(Node, 2, AArch64::ST1Twov2d);
6764 return;
6765 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6766 SelectStore(Node, 2, AArch64::ST1Twov1d);
6767 return;
6768 }
6769 break;
6770 }
6771 case Intrinsic::aarch64_neon_st1x3: {
6772 if (VT == MVT::v8i8) {
6773 SelectStore(Node, 3, AArch64::ST1Threev8b);
6774 return;
6775 } else if (VT == MVT::v16i8) {
6776 SelectStore(Node, 3, AArch64::ST1Threev16b);
6777 return;
6778 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6779 VT == MVT::v4bf16) {
6780 SelectStore(Node, 3, AArch64::ST1Threev4h);
6781 return;
6782 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6783 VT == MVT::v8bf16) {
6784 SelectStore(Node, 3, AArch64::ST1Threev8h);
6785 return;
6786 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6787 SelectStore(Node, 3, AArch64::ST1Threev2s);
6788 return;
6789 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6790 SelectStore(Node, 3, AArch64::ST1Threev4s);
6791 return;
6792 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6793 SelectStore(Node, 3, AArch64::ST1Threev2d);
6794 return;
6795 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6796 SelectStore(Node, 3, AArch64::ST1Threev1d);
6797 return;
6798 }
6799 break;
6800 }
6801 case Intrinsic::aarch64_neon_st1x4: {
6802 if (VT == MVT::v8i8) {
6803 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6804 return;
6805 } else if (VT == MVT::v16i8) {
6806 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6807 return;
6808 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6809 VT == MVT::v4bf16) {
6810 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6811 return;
6812 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6813 VT == MVT::v8bf16) {
6814 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6815 return;
6816 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6817 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6818 return;
6819 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6820 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6821 return;
6822 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6823 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6824 return;
6825 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6826 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6827 return;
6828 }
6829 break;
6830 }
6831 case Intrinsic::aarch64_neon_st2: {
6832 if (VT == MVT::v8i8) {
6833 SelectStore(Node, 2, AArch64::ST2Twov8b);
6834 return;
6835 } else if (VT == MVT::v16i8) {
6836 SelectStore(Node, 2, AArch64::ST2Twov16b);
6837 return;
6838 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6839 VT == MVT::v4bf16) {
6840 SelectStore(Node, 2, AArch64::ST2Twov4h);
6841 return;
6842 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6843 VT == MVT::v8bf16) {
6844 SelectStore(Node, 2, AArch64::ST2Twov8h);
6845 return;
6846 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6847 SelectStore(Node, 2, AArch64::ST2Twov2s);
6848 return;
6849 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6850 SelectStore(Node, 2, AArch64::ST2Twov4s);
6851 return;
6852 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6853 SelectStore(Node, 2, AArch64::ST2Twov2d);
6854 return;
6855 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6856 SelectStore(Node, 2, AArch64::ST1Twov1d);
6857 return;
6858 }
6859 break;
6860 }
6861 case Intrinsic::aarch64_neon_st3: {
6862 if (VT == MVT::v8i8) {
6863 SelectStore(Node, 3, AArch64::ST3Threev8b);
6864 return;
6865 } else if (VT == MVT::v16i8) {
6866 SelectStore(Node, 3, AArch64::ST3Threev16b);
6867 return;
6868 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6869 VT == MVT::v4bf16) {
6870 SelectStore(Node, 3, AArch64::ST3Threev4h);
6871 return;
6872 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6873 VT == MVT::v8bf16) {
6874 SelectStore(Node, 3, AArch64::ST3Threev8h);
6875 return;
6876 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6877 SelectStore(Node, 3, AArch64::ST3Threev2s);
6878 return;
6879 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6880 SelectStore(Node, 3, AArch64::ST3Threev4s);
6881 return;
6882 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6883 SelectStore(Node, 3, AArch64::ST3Threev2d);
6884 return;
6885 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6886 SelectStore(Node, 3, AArch64::ST1Threev1d);
6887 return;
6888 }
6889 break;
6890 }
6891 case Intrinsic::aarch64_neon_st4: {
6892 if (VT == MVT::v8i8) {
6893 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6894 return;
6895 } else if (VT == MVT::v16i8) {
6896 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6897 return;
6898 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6899 VT == MVT::v4bf16) {
6900 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6901 return;
6902 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6903 VT == MVT::v8bf16) {
6904 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6905 return;
6906 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6907 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6908 return;
6909 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6910 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6911 return;
6912 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6913 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6914 return;
6915 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6916 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6917 return;
6918 }
6919 break;
6920 }
6921 case Intrinsic::aarch64_neon_st2lane: {
6922 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6923 SelectStoreLane(Node, 2, AArch64::ST2i8);
6924 return;
6925 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6926 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6927 SelectStoreLane(Node, 2, AArch64::ST2i16);
6928 return;
6929 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6930 VT == MVT::v2f32) {
6931 SelectStoreLane(Node, 2, AArch64::ST2i32);
6932 return;
6933 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6934 VT == MVT::v1f64) {
6935 SelectStoreLane(Node, 2, AArch64::ST2i64);
6936 return;
6937 }
6938 break;
6939 }
6940 case Intrinsic::aarch64_neon_st3lane: {
6941 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6942 SelectStoreLane(Node, 3, AArch64::ST3i8);
6943 return;
6944 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6945 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6946 SelectStoreLane(Node, 3, AArch64::ST3i16);
6947 return;
6948 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6949 VT == MVT::v2f32) {
6950 SelectStoreLane(Node, 3, AArch64::ST3i32);
6951 return;
6952 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6953 VT == MVT::v1f64) {
6954 SelectStoreLane(Node, 3, AArch64::ST3i64);
6955 return;
6956 }
6957 break;
6958 }
6959 case Intrinsic::aarch64_neon_st4lane: {
6960 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6961 SelectStoreLane(Node, 4, AArch64::ST4i8);
6962 return;
6963 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6964 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6965 SelectStoreLane(Node, 4, AArch64::ST4i16);
6966 return;
6967 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6968 VT == MVT::v2f32) {
6969 SelectStoreLane(Node, 4, AArch64::ST4i32);
6970 return;
6971 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6972 VT == MVT::v1f64) {
6973 SelectStoreLane(Node, 4, AArch64::ST4i64);
6974 return;
6975 }
6976 break;
6977 }
6978 case Intrinsic::aarch64_sve_st2q: {
6979 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6980 return;
6981 }
6982 case Intrinsic::aarch64_sve_st3q: {
6983 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6984 return;
6985 }
6986 case Intrinsic::aarch64_sve_st4q: {
6987 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6988 return;
6989 }
6990 case Intrinsic::aarch64_sve_st2: {
6991 if (VT == MVT::nxv16i8) {
6992 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6993 return;
6994 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6995 VT == MVT::nxv8bf16) {
6996 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6997 return;
6998 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6999 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
7000 return;
7001 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7002 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
7003 return;
7004 }
7005 break;
7006 }
7007 case Intrinsic::aarch64_sve_st3: {
7008 if (VT == MVT::nxv16i8) {
7009 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
7010 return;
7011 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7012 VT == MVT::nxv8bf16) {
7013 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
7014 return;
7015 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7016 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
7017 return;
7018 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7019 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
7020 return;
7021 }
7022 break;
7023 }
7024 case Intrinsic::aarch64_sve_st4: {
7025 if (VT == MVT::nxv16i8) {
7026 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
7027 return;
7028 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7029 VT == MVT::nxv8bf16) {
7030 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
7031 return;
7032 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7033 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
7034 return;
7035 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7036 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
7037 return;
7038 }
7039 break;
7040 }
7041 }
7042 break;
7043 }
7044 case AArch64ISD::LD2post: {
7045 if (VT == MVT::v8i8) {
7046 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
7047 return;
7048 } else if (VT == MVT::v16i8) {
7049 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
7050 return;
7051 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7052 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
7053 return;
7054 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7055 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
7056 return;
7057 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7058 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
7059 return;
7060 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7061 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
7062 return;
7063 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7064 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
7065 return;
7066 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7067 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
7068 return;
7069 }
7070 break;
7071 }
7072 case AArch64ISD::LD3post: {
7073 if (VT == MVT::v8i8) {
7074 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
7075 return;
7076 } else if (VT == MVT::v16i8) {
7077 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
7078 return;
7079 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7080 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
7081 return;
7082 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7083 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
7084 return;
7085 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7086 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
7087 return;
7088 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7089 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
7090 return;
7091 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7092 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
7093 return;
7094 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7095 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
7096 return;
7097 }
7098 break;
7099 }
7100 case AArch64ISD::LD4post: {
7101 if (VT == MVT::v8i8) {
7102 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
7103 return;
7104 } else if (VT == MVT::v16i8) {
7105 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
7106 return;
7107 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7108 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
7109 return;
7110 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7111 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
7112 return;
7113 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7114 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
7115 return;
7116 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7117 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
7118 return;
7119 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7120 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7121 return;
7122 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7123 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
7124 return;
7125 }
7126 break;
7127 }
7128 case AArch64ISD::LD1x2post: {
7129 if (VT == MVT::v8i8) {
7130 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
7131 return;
7132 } else if (VT == MVT::v16i8) {
7133 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
7134 return;
7135 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7136 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
7137 return;
7138 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7139 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
7140 return;
7141 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7142 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
7143 return;
7144 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7145 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
7146 return;
7147 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7148 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
7149 return;
7150 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7151 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
7152 return;
7153 }
7154 break;
7155 }
7156 case AArch64ISD::LD1x3post: {
7157 if (VT == MVT::v8i8) {
7158 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
7159 return;
7160 } else if (VT == MVT::v16i8) {
7161 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
7162 return;
7163 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7164 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
7165 return;
7166 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7167 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
7168 return;
7169 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7170 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
7171 return;
7172 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7173 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
7174 return;
7175 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7176 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
7177 return;
7178 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7179 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
7180 return;
7181 }
7182 break;
7183 }
7184 case AArch64ISD::LD1x4post: {
7185 if (VT == MVT::v8i8) {
7186 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
7187 return;
7188 } else if (VT == MVT::v16i8) {
7189 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
7190 return;
7191 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7192 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
7193 return;
7194 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7195 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
7196 return;
7197 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7198 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
7199 return;
7200 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7201 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
7202 return;
7203 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7204 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7205 return;
7206 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7207 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
7208 return;
7209 }
7210 break;
7211 }
7212 case AArch64ISD::LD1DUPpost: {
7213 if (VT == MVT::v8i8) {
7214 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
7215 return;
7216 } else if (VT == MVT::v16i8) {
7217 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
7218 return;
7219 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7220 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
7221 return;
7222 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7223 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
7224 return;
7225 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7226 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
7227 return;
7228 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7229 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
7230 return;
7231 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7232 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
7233 return;
7234 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7235 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
7236 return;
7237 }
7238 break;
7239 }
7240 case AArch64ISD::LD2DUPpost: {
7241 if (VT == MVT::v8i8) {
7242 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
7243 return;
7244 } else if (VT == MVT::v16i8) {
7245 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
7246 return;
7247 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7248 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
7249 return;
7250 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7251 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
7252 return;
7253 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7254 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
7255 return;
7256 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7257 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
7258 return;
7259 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7260 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
7261 return;
7262 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7263 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
7264 return;
7265 }
7266 break;
7267 }
7268 case AArch64ISD::LD3DUPpost: {
7269 if (VT == MVT::v8i8) {
7270 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7271 return;
7272 } else if (VT == MVT::v16i8) {
7273 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7274 return;
7275 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7276 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7277 return;
7278 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7279 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7280 return;
7281 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7282 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7283 return;
7284 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7285 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7286 return;
7287 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7288 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7289 return;
7290 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7291 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7292 return;
7293 }
7294 break;
7295 }
7296 case AArch64ISD::LD4DUPpost: {
7297 if (VT == MVT::v8i8) {
7298 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7299 return;
7300 } else if (VT == MVT::v16i8) {
7301 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7302 return;
7303 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7304 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7305 return;
7306 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7307 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7308 return;
7309 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7310 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7311 return;
7312 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7313 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7314 return;
7315 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7316 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7317 return;
7318 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7319 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7320 return;
7321 }
7322 break;
7323 }
7324 case AArch64ISD::LD1LANEpost: {
7325 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7326 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7327 return;
7328 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7329 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7330 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7331 return;
7332 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7333 VT == MVT::v2f32) {
7334 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7335 return;
7336 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7337 VT == MVT::v1f64) {
7338 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7339 return;
7340 }
7341 break;
7342 }
7343 case AArch64ISD::LD2LANEpost: {
7344 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7345 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7346 return;
7347 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7348 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7349 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7350 return;
7351 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7352 VT == MVT::v2f32) {
7353 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7354 return;
7355 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7356 VT == MVT::v1f64) {
7357 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7358 return;
7359 }
7360 break;
7361 }
7362 case AArch64ISD::LD3LANEpost: {
7363 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7364 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7365 return;
7366 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7367 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7368 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7369 return;
7370 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7371 VT == MVT::v2f32) {
7372 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7373 return;
7374 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7375 VT == MVT::v1f64) {
7376 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7377 return;
7378 }
7379 break;
7380 }
7381 case AArch64ISD::LD4LANEpost: {
7382 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7383 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7384 return;
7385 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7386 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7387 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7388 return;
7389 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7390 VT == MVT::v2f32) {
7391 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7392 return;
7393 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7394 VT == MVT::v1f64) {
7395 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7396 return;
7397 }
7398 break;
7399 }
7400 case AArch64ISD::ST2post: {
7401 VT = Node->getOperand(1).getValueType();
7402 if (VT == MVT::v8i8) {
7403 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7404 return;
7405 } else if (VT == MVT::v16i8) {
7406 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7407 return;
7408 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7409 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7410 return;
7411 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7412 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7413 return;
7414 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7415 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7416 return;
7417 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7418 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7419 return;
7420 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7421 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7422 return;
7423 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7424 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7425 return;
7426 }
7427 break;
7428 }
7429 case AArch64ISD::ST3post: {
7430 VT = Node->getOperand(1).getValueType();
7431 if (VT == MVT::v8i8) {
7432 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7433 return;
7434 } else if (VT == MVT::v16i8) {
7435 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7436 return;
7437 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7438 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7439 return;
7440 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7441 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7442 return;
7443 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7444 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7445 return;
7446 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7447 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7448 return;
7449 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7450 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7451 return;
7452 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7453 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7454 return;
7455 }
7456 break;
7457 }
7458 case AArch64ISD::ST4post: {
7459 VT = Node->getOperand(1).getValueType();
7460 if (VT == MVT::v8i8) {
7461 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7462 return;
7463 } else if (VT == MVT::v16i8) {
7464 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7465 return;
7466 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7467 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7468 return;
7469 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7470 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7471 return;
7472 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7473 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7474 return;
7475 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7476 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7477 return;
7478 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7479 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7480 return;
7481 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7482 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7483 return;
7484 }
7485 break;
7486 }
7487 case AArch64ISD::ST1x2post: {
7488 VT = Node->getOperand(1).getValueType();
7489 if (VT == MVT::v8i8) {
7490 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7491 return;
7492 } else if (VT == MVT::v16i8) {
7493 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7494 return;
7495 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7496 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7497 return;
7498 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7499 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7500 return;
7501 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7502 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7503 return;
7504 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7505 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7506 return;
7507 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7508 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7509 return;
7510 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7511 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7512 return;
7513 }
7514 break;
7515 }
7516 case AArch64ISD::ST1x3post: {
7517 VT = Node->getOperand(1).getValueType();
7518 if (VT == MVT::v8i8) {
7519 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7520 return;
7521 } else if (VT == MVT::v16i8) {
7522 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7523 return;
7524 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7525 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7526 return;
7527 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7528 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7529 return;
7530 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7531 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7532 return;
7533 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7534 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7535 return;
7536 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7537 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7538 return;
7539 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7540 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7541 return;
7542 }
7543 break;
7544 }
7545 case AArch64ISD::ST1x4post: {
7546 VT = Node->getOperand(1).getValueType();
7547 if (VT == MVT::v8i8) {
7548 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7549 return;
7550 } else if (VT == MVT::v16i8) {
7551 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7552 return;
7553 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7554 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7555 return;
7556 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7557 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7558 return;
7559 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7560 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7561 return;
7562 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7563 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7564 return;
7565 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7566 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7567 return;
7568 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7569 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7570 return;
7571 }
7572 break;
7573 }
7574 case AArch64ISD::ST2LANEpost: {
7575 VT = Node->getOperand(1).getValueType();
7576 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7577 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7578 return;
7579 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7580 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7581 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7582 return;
7583 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7584 VT == MVT::v2f32) {
7585 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7586 return;
7587 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7588 VT == MVT::v1f64) {
7589 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7590 return;
7591 }
7592 break;
7593 }
7594 case AArch64ISD::ST3LANEpost: {
7595 VT = Node->getOperand(1).getValueType();
7596 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7597 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7598 return;
7599 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7600 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7601 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7602 return;
7603 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7604 VT == MVT::v2f32) {
7605 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7606 return;
7607 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7608 VT == MVT::v1f64) {
7609 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7610 return;
7611 }
7612 break;
7613 }
7614 case AArch64ISD::ST4LANEpost: {
7615 VT = Node->getOperand(1).getValueType();
7616 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7617 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7618 return;
7619 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7620 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7621 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7622 return;
7623 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7624 VT == MVT::v2f32) {
7625 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7626 return;
7627 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7628 VT == MVT::v1f64) {
7629 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7630 return;
7631 }
7632 break;
7633 }
7634 }
7635
7636 // Select the default instruction
7637 SelectCode(Node);
7638}
7639
7640/// createAArch64ISelDag - This pass converts a legalized DAG into a
7641/// AArch64-specific DAG, ready for instruction scheduling.
7643 CodeGenOptLevel OptLevel) {
7644 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7645}
7646
7647/// When \p PredVT is a scalable vector predicate in the form
7648/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7649/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7650/// structured vectors (NumVec >1), the output data type is
7651/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7652/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7653/// EVT.
7655 unsigned NumVec) {
7656 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7657 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7658 return EVT();
7659
7660 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7661 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7662 return EVT();
7663
7664 ElementCount EC = PredVT.getVectorElementCount();
7665 EVT ScalarVT =
7666 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7667 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7668
7669 return MemVT;
7670}
7671
7672/// Return the EVT of the data associated to a memory operation in \p
7673/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7675 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7676 return MemIntr->getMemoryVT();
7677
7678 if (isa<MemSDNode>(Root)) {
7679 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7680
7681 EVT DataVT;
7682 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7683 DataVT = Load->getValueType(0);
7684 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7685 DataVT = Load->getValueType(0);
7686 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7687 DataVT = Store->getValue().getValueType();
7688 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7689 DataVT = Store->getValue().getValueType();
7690 else
7691 llvm_unreachable("Unexpected MemSDNode!");
7692
7693 return DataVT.changeVectorElementType(Ctx, MemVT.getVectorElementType());
7694 }
7695
7696 const unsigned Opcode = Root->getOpcode();
7697 // For custom ISD nodes, we have to look at them individually to extract the
7698 // type of the data moved to/from memory.
7699 switch (Opcode) {
7700 case AArch64ISD::LD1_MERGE_ZERO:
7701 case AArch64ISD::LD1S_MERGE_ZERO:
7702 case AArch64ISD::LDNF1_MERGE_ZERO:
7703 case AArch64ISD::LDNF1S_MERGE_ZERO:
7704 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7705 case AArch64ISD::ST1_PRED:
7706 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7707 default:
7708 break;
7709 }
7710
7711 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7712 return EVT();
7713
7714 switch (Root->getConstantOperandVal(1)) {
7715 default:
7716 return EVT();
7717 case Intrinsic::aarch64_sme_ldr:
7718 case Intrinsic::aarch64_sme_str:
7719 return MVT::nxv16i8;
7720 case Intrinsic::aarch64_sve_prf:
7721 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7722 // width of the predicate.
7724 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7725 case Intrinsic::aarch64_sve_ld2_sret:
7726 case Intrinsic::aarch64_sve_ld2q_sret:
7728 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7729 case Intrinsic::aarch64_sve_st2q:
7731 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7732 case Intrinsic::aarch64_sve_ld3_sret:
7733 case Intrinsic::aarch64_sve_ld3q_sret:
7735 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7736 case Intrinsic::aarch64_sve_st3q:
7738 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7739 case Intrinsic::aarch64_sve_ld4_sret:
7740 case Intrinsic::aarch64_sve_ld4q_sret:
7742 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7743 case Intrinsic::aarch64_sve_st4q:
7745 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7746 case Intrinsic::aarch64_sve_ld1udq:
7747 case Intrinsic::aarch64_sve_st1dq:
7748 return EVT(MVT::nxv1i64);
7749 case Intrinsic::aarch64_sve_ld1uwq:
7750 case Intrinsic::aarch64_sve_st1wq:
7751 return EVT(MVT::nxv1i32);
7752 }
7753}
7754
7755/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7756/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7757/// where Root is the memory access using N for its address.
7758template <int64_t Min, int64_t Max>
7759bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7760 SDValue &Base,
7761 SDValue &OffImm) {
7762 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7763 const DataLayout &DL = CurDAG->getDataLayout();
7764 const MachineFrameInfo &MFI = MF->getFrameInfo();
7765
7766 if (N.getOpcode() == ISD::FrameIndex) {
7767 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7768 // We can only encode VL scaled offsets, so only fold in frame indexes
7769 // referencing SVE objects.
7770 if (MFI.hasScalableStackID(FI)) {
7771 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7772 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7773 return true;
7774 }
7775
7776 return false;
7777 }
7778
7779 if (MemVT == EVT())
7780 return false;
7781
7782 if (N.getOpcode() != ISD::ADD)
7783 return false;
7784
7785 SDValue VScale = N.getOperand(1);
7786 int64_t MulImm = std::numeric_limits<int64_t>::max();
7787 if (VScale.getOpcode() == ISD::VSCALE) {
7788 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7789 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7790 int64_t ByteOffset = C->getSExtValue();
7791 const auto KnownVScale =
7793
7794 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7795 return false;
7796
7797 MulImm = ByteOffset / KnownVScale;
7798 } else
7799 return false;
7800
7801 TypeSize TS = MemVT.getSizeInBits();
7802 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7803
7804 if ((MulImm % MemWidthBytes) != 0)
7805 return false;
7806
7807 int64_t Offset = MulImm / MemWidthBytes;
7809 return false;
7810
7811 Base = N.getOperand(0);
7812 if (Base.getOpcode() == ISD::FrameIndex) {
7813 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7814 // We can only encode VL scaled offsets, so only fold in frame indexes
7815 // referencing SVE objects.
7816 if (MFI.hasScalableStackID(FI))
7817 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7818 }
7819
7820 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7821 return true;
7822}
7823
7824/// Select register plus register addressing mode for SVE, with scaled
7825/// offset.
7826bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7827 SDValue &Base,
7828 SDValue &Offset) {
7829 if (N.getOpcode() != ISD::ADD)
7830 return false;
7831
7832 // Process an ADD node.
7833 const SDValue LHS = N.getOperand(0);
7834 const SDValue RHS = N.getOperand(1);
7835
7836 // 8 bit data does not come with the SHL node, so it is treated
7837 // separately.
7838 if (Scale == 0) {
7839 Base = LHS;
7840 Offset = RHS;
7841 return true;
7842 }
7843
7844 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7845 int64_t ImmOff = C->getSExtValue();
7846 unsigned Size = 1 << Scale;
7847
7848 // To use the reg+reg addressing mode, the immediate must be a multiple of
7849 // the vector element's byte size.
7850 if (ImmOff % Size)
7851 return false;
7852
7853 SDLoc DL(N);
7854 Base = LHS;
7855 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7856 SDValue Ops[] = {Offset};
7857 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7858 Offset = SDValue(MI, 0);
7859 return true;
7860 }
7861
7862 // Check if the RHS is a shift node with a constant.
7863 if (RHS.getOpcode() != ISD::SHL)
7864 return false;
7865
7866 const SDValue ShiftRHS = RHS.getOperand(1);
7867 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7868 if (C->getZExtValue() == Scale) {
7869 Base = LHS;
7870 Offset = RHS.getOperand(0);
7871 return true;
7872 }
7873
7874 return false;
7875}
7876
7877bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7878 const AArch64TargetLowering *TLI =
7879 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7880
7881 return TLI->isAllActivePredicate(*CurDAG, N);
7882}
7883
7884bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7885 EVT VT = N.getValueType();
7886 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7887}
7888
7889bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7891 unsigned Scale) {
7892 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7893 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7894 int64_t ImmOff = C->getSExtValue();
7895 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7896 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7897 }
7898 return SDValue();
7899 };
7900
7901 if (SDValue C = MatchConstantOffset(N)) {
7902 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7903 Offset = C;
7904 return true;
7905 }
7906
7907 // Try to untangle an ADD node into a 'reg + offset'
7908 if (CurDAG->isBaseWithConstantOffset(N)) {
7909 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7910 Base = N.getOperand(0);
7911 Offset = C;
7912 return true;
7913 }
7914 }
7915
7916 // By default, just match reg + 0.
7917 Base = N;
7918 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7919 return true;
7920}
7921
7922bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7923 SDValue &Imm) {
7925 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7926 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7927 // Check conservatively if the immediate fits the valid range [0, 64).
7928 // Immediate variants for GE and HS definitely need to be decremented
7929 // when lowering the pseudos later, so an immediate of 1 would become 0.
7930 // For the inverse conditions LT and LO we don't know for sure if they
7931 // will need a decrement but should the decision be made to reverse the
7932 // branch condition, we again end up with the need to decrement.
7933 // The same argument holds for LE, LS, GT and HI and possibly
7934 // incremented immediates. This can lead to slightly less optimal
7935 // codegen, e.g. we never codegen the legal case
7936 // cblt w0, #63, A
7937 // because we could end up with the illegal case
7938 // cbge w0, #64, B
7939 // should the decision to reverse the branch direction be made. For the
7940 // lower bound cases this is no problem since we can express comparisons
7941 // against 0 with either tbz/tnbz or using wzr/xzr.
7942 uint64_t LowerBound = 0, UpperBound = 64;
7943 switch (CC) {
7944 case AArch64CC::GE:
7945 case AArch64CC::HS:
7946 case AArch64CC::LT:
7947 case AArch64CC::LO:
7948 LowerBound = 1;
7949 break;
7950 case AArch64CC::LE:
7951 case AArch64CC::LS:
7952 case AArch64CC::GT:
7953 case AArch64CC::HI:
7954 UpperBound = 63;
7955 break;
7956 default:
7957 break;
7958 }
7959
7960 if (CN->getAPIntValue().uge(LowerBound) &&
7961 CN->getAPIntValue().ult(UpperBound)) {
7962 SDLoc DL(N);
7963 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
7964 return true;
7965 }
7966 }
7967
7968 return false;
7969}
7970
7971template <bool MatchCBB>
7972bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
7973 SDValue &ExtType) {
7974
7975 // Use an invalid shift-extend value to indicate we don't need to extend later
7976 if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) {
7977 EVT Ty = cast<VTSDNode>(N.getOperand(1))->getVT();
7978 if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
7979 return false;
7980 Reg = N.getOperand(0);
7981 ExtType = CurDAG->getSignedTargetConstant(AArch64_AM::InvalidShiftExtend,
7982 SDLoc(N), MVT::i32);
7983 return true;
7984 }
7985
7987
7988 if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) ||
7989 (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) {
7990 Reg = N.getOperand(0);
7991 ExtType =
7992 CurDAG->getTargetConstant(getExtendEncoding(ET), SDLoc(N), MVT::i32);
7993 return true;
7994 }
7995
7996 return false;
7997}
7998
7999/// Try to fold AArch64 CSEL/FCMP patterns to FMAXNM/FMINNM.
8000///
8001/// This is intentionally done in PreprocessISelDAG rather than DAGCombine:
8002/// doing this earlier based on the defining operation of X can be invalidated
8003/// by later DAG combines. At this point the DAG is being prepared for
8004/// instruction selection, so the use of isKnownNeverSNaN(X) applies to the
8005/// final SDValue being selected.
8006/// Only handles FCMP(X, C) with scalar FP types, where C is a non-NaN constant.
8007/// The nsz requirement is needed only when C is zero, to avoid signed-zero
8008/// mismatches. The never-sNaN check is required because AArch64 FMAXNM/FMINNM
8009/// differ from fcmp+fcsel for signaling NaN inputs.
8010SDValue AArch64DAGToDAGISel::tryFoldCselToFMaxMin(SDNode &N) {
8011 EVT VT = N.getValueType(0);
8012
8013 // Scalar FP only.
8014 if (!VT.isFloatingPoint() || VT.isVector())
8015 return SDValue();
8016
8017 SDValue TVal = N.getOperand(0);
8018 SDValue FVal = N.getOperand(1);
8019 SDValue CCVal = N.getOperand(2);
8020 SDValue Cmp = N.getOperand(3);
8021
8022 if (Cmp.getOpcode() != AArch64ISD::FCMP)
8023 return SDValue();
8024
8025 auto *CC = dyn_cast<ConstantSDNode>(CCVal);
8026 if (!CC)
8027 return SDValue();
8028
8029 SDValue CmpLHS = Cmp.getOperand(0);
8030 SDValue CmpRHS = Cmp.getOperand(1);
8031 unsigned CondCode = CC->getZExtValue();
8032
8033 // Map VT and operation (max/min) to machine opcode.
8034 auto getOpc = [](EVT VT, bool isMax) -> unsigned {
8035 if (VT == MVT::f16)
8036 return isMax ? AArch64::FMAXNMHrr : AArch64::FMINNMHrr;
8037 else if (VT == MVT::f32)
8038 return isMax ? AArch64::FMAXNMSrr : AArch64::FMINNMSrr;
8039 else if (VT == MVT::f64)
8040 return isMax ? AArch64::FMAXNMDrr : AArch64::FMINNMDrr;
8041 else
8042 return 0; // unsupported
8043 };
8044
8045 // Determine whether to use max or min based on condition code and operands.
8046 bool isMax;
8047 if (CondCode == AArch64CC::GT || CondCode == AArch64CC::GE) {
8048 if (TVal == CmpLHS && FVal == CmpRHS)
8049 isMax = true;
8050 else if (TVal == CmpRHS && FVal == CmpLHS)
8051 isMax = false;
8052 else
8053 return SDValue();
8054 } else if (CondCode == AArch64CC::MI || CondCode == AArch64CC::LS) {
8055 if (TVal == CmpLHS && FVal == CmpRHS)
8056 isMax = false;
8057 else if (TVal == CmpRHS && FVal == CmpLHS)
8058 isMax = true;
8059 else
8060 return SDValue();
8061 } else {
8062 return SDValue();
8063 }
8064
8065 // Get the machine opcode for this VT and operation.
8066 unsigned Opc = getOpc(VT, isMax);
8067 if (!Opc)
8068 return SDValue();
8069
8070 // Constant must be non-NaN.
8071 auto *CFP = dyn_cast<ConstantFPSDNode>(CmpRHS);
8072 if (!CFP || CFP->getValueAPF().isNaN())
8073 return SDValue();
8074
8075 // nsz flag required only when constant is zero: fmaxnm(+0,-0)=+0 differs from
8076 // fcmp+select's -0. For non-zero constants, semantics are identical.
8077 if (CFP->isZero() && !N.getFlags().hasNoSignedZeros())
8078 return SDValue();
8079
8080 // Only fold if variable operand is never sNaN.
8081 // This runs after DAG combines, so later combines cannot remove a defining
8082 // operation used by isKnownNeverSNaN().
8083 if (!CurDAG->isKnownNeverSNaN(CmpLHS))
8084 return SDValue();
8085
8086 SDLoc DL(&N);
8087
8088 // Directly emit the machine node
8089 return SDValue(CurDAG->getMachineNode(Opc, DL, VT, CmpLHS, CmpRHS), 0);
8090}
8091
8092void AArch64DAGToDAGISel::PreprocessISelDAG() {
8093 bool MadeChange = false;
8094 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
8095 if (N.use_empty())
8096 continue;
8097
8099 switch (N.getOpcode()) {
8100 case ISD::SCALAR_TO_VECTOR: {
8101 EVT ScalarTy = N.getValueType(0).getVectorElementType();
8102 if ((ScalarTy == MVT::i32 || ScalarTy == MVT::i64) &&
8103 ScalarTy == N.getOperand(0).getValueType())
8104 Result = addBitcastHints(*CurDAG, N);
8105
8106 break;
8107 }
8108 case AArch64ISD::CSEL:
8109 Result = tryFoldCselToFMaxMin(N);
8110 break;
8111 default:
8112 break;
8113 }
8114
8115 if (Result) {
8116 LLVM_DEBUG(dbgs() << "AArch64 DAG preprocessing replacing:\nOld: ");
8117 LLVM_DEBUG(N.dump(CurDAG));
8118 LLVM_DEBUG(dbgs() << "\nNew: ");
8119 LLVM_DEBUG(Result.dump(CurDAG));
8120 LLVM_DEBUG(dbgs() << "\n");
8121
8122 CurDAG->ReplaceAllUsesOfValueWith(SDValue(&N, 0), Result);
8123 MadeChange = true;
8124 }
8125 }
8126
8127 if (MadeChange)
8128 CurDAG->RemoveDeadNodes();
8129
8131}
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static std::optional< APInt > GetNEONSplatValue(SDValue N)
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static SDValue addBitcastHints(SelectionDAG &DAG, SDNode &N)
addBitcastHints - This method adds bitcast hints to the operands of a node to help instruction select...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static std::optional< APInt > DecodeNEONSplat(SDValue N)
static bool checkCVTFixedPointOperandWithFBitsForVectors(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define PASS_NAME
Value * RHS
Value * LHS
const AArch64RegisterInfo * getRegisterInfo() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1693
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1475
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
iterator begin() const
Definition ArrayRef.h:129
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasScalableStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
LLVM Value Representation.
Definition Value.h:75
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:964
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t decodeAdvSIMDModImmType12(uint8_t Imm)
static uint64_t decodeAdvSIMDModImmType11(uint8_t Imm)
unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET)
Mapping from extend bits to required operation: shifter: 000 ==> uxtb 001 ==> uxth 010 ==> uxtw 011 =...
static uint64_t decodeAdvSIMDModImmType10(uint8_t Imm)
static bool isSVELogicalImm(unsigned SizeInBits, uint64_t ImmVal, uint64_t &Encoding)
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
isSignExtendShiftType - Returns true if Type is sign extending.
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:558
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
unsigned CheckFixedPointOperandConstant(APFloat &FVal, unsigned RegWidth, bool isReciprocal)
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2025
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
Extended Value Type.
Definition ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:479
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:367
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:215
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
bool isFixedLengthVector() const
Definition ValueTypes.h:189
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:182
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:210
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.