LLVM 23.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64 void PreprocessISelDAG() override;
65
66 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
67 /// inline asm expressions.
68 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
69 InlineAsm::ConstraintCode ConstraintID,
70 std::vector<SDValue> &OutOps) override;
71
72 template <signed Low, signed High, signed Scale>
73 bool SelectRDVLImm(SDValue N, SDValue &Imm);
74
75 template <signed Low, signed High>
76 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
77
78 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
79 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
80 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
81 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
82 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
83 return SelectShiftedRegister(N, false, Reg, Shift);
84 }
85 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
86 return SelectShiftedRegister(N, true, Reg, Shift);
87 }
88 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
89 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
90 }
91 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
92 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
93 }
94 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
95 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
96 }
97 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
98 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
99 }
100 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
101 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
102 }
103 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
104 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
105 }
106 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
107 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
108 }
109 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
110 return SelectAddrModeIndexed(N, 1, Base, OffImm);
111 }
112 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
113 return SelectAddrModeIndexed(N, 2, Base, OffImm);
114 }
115 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
116 return SelectAddrModeIndexed(N, 4, Base, OffImm);
117 }
118 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
119 return SelectAddrModeIndexed(N, 8, Base, OffImm);
120 }
121 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
122 return SelectAddrModeIndexed(N, 16, Base, OffImm);
123 }
124 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
125 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
126 }
127 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
128 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
129 }
130 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
131 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
132 }
133 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
134 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
135 }
136 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
137 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
138 }
139 template <unsigned Size, unsigned Max>
140 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
141 // Test if there is an appropriate addressing mode and check if the
142 // immediate fits.
143 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
144 if (Found) {
145 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
146 int64_t C = CI->getSExtValue();
147 if (C <= Max)
148 return true;
149 }
150 }
151
152 // Otherwise, base only, materialize address in register.
153 Base = N;
154 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
155 return true;
156 }
157
158 template<int Width>
159 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
160 SDValue &SignExtend, SDValue &DoShift) {
161 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
162 }
163
164 template<int Width>
165 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
166 SDValue &SignExtend, SDValue &DoShift) {
167 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
168 }
169
170 bool SelectExtractHigh(SDValue N, SDValue &Res) {
171 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
172 N = N->getOperand(0);
173 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
174 !isa<ConstantSDNode>(N->getOperand(1)))
175 return false;
176 EVT VT = N->getValueType(0);
177 EVT LVT = N->getOperand(0).getValueType();
178 unsigned Index = N->getConstantOperandVal(1);
179 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
180 Index != VT.getVectorNumElements())
181 return false;
182 Res = N->getOperand(0);
183 return true;
184 }
185
186 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
187 if (N.getOpcode() != AArch64ISD::VLSHR)
188 return false;
189 SDValue Op = N->getOperand(0);
190 EVT VT = Op.getValueType();
191 unsigned ShtAmt = N->getConstantOperandVal(1);
192 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
193 return false;
194
195 APInt Imm;
196 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
197 Imm = APInt(VT.getScalarSizeInBits(),
198 Op.getOperand(1).getConstantOperandVal(0)
199 << Op.getOperand(1).getConstantOperandVal(1));
200 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
201 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
202 Imm = APInt(VT.getScalarSizeInBits(),
203 Op.getOperand(1).getConstantOperandVal(0));
204 else
205 return false;
206
207 if (Imm != 1ULL << (ShtAmt - 1))
208 return false;
209
210 Res1 = Op.getOperand(0);
211 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
212 return true;
213 }
214
215 bool SelectDupZeroOrUndef(SDValue N) {
216 switch(N->getOpcode()) {
217 case ISD::UNDEF:
218 return true;
219 case AArch64ISD::DUP:
220 case ISD::SPLAT_VECTOR: {
221 auto Opnd0 = N->getOperand(0);
222 if (isNullConstant(Opnd0))
223 return true;
224 if (isNullFPConstant(Opnd0))
225 return true;
226 break;
227 }
228 default:
229 break;
230 }
231
232 return false;
233 }
234
235 bool SelectAny(SDValue) { return true; }
236
237 bool SelectDupZero(SDValue N) {
238 switch(N->getOpcode()) {
239 case AArch64ISD::DUP:
240 case ISD::SPLAT_VECTOR: {
241 auto Opnd0 = N->getOperand(0);
242 if (isNullConstant(Opnd0))
243 return true;
244 if (isNullFPConstant(Opnd0))
245 return true;
246 break;
247 }
248 }
249
250 return false;
251 }
252
253 template <MVT::SimpleValueType VT, bool Negate>
254 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
255 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
256 }
257
258 template <MVT::SimpleValueType VT, bool Negate>
259 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
260 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
261 }
262
263 template <MVT::SimpleValueType VT>
264 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
265 return SelectSVECpyDupImm(N, VT, Imm, Shift);
266 }
267
268 template <MVT::SimpleValueType VT, bool Invert = false>
269 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
270 return SelectSVELogicalImm(N, VT, Imm, Invert);
271 }
272
273 template <MVT::SimpleValueType VT>
274 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
275 return SelectSVEArithImm(N, VT, Imm);
276 }
277
278 template <unsigned Low, unsigned High, bool AllowSaturation = false>
279 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
280 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
281 }
282
283 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
284 if (N->getOpcode() != ISD::SPLAT_VECTOR)
285 return false;
286
287 EVT EltVT = N->getValueType(0).getVectorElementType();
288 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
289 /* High */ EltVT.getFixedSizeInBits(),
290 /* AllowSaturation */ true, Imm);
291 }
292
293 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
294 template<signed Min, signed Max, signed Scale, bool Shift>
295 bool SelectCntImm(SDValue N, SDValue &Imm) {
297 return false;
298
299 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
300 if (Shift)
301 MulImm = 1LL << MulImm;
302
303 if ((MulImm % std::abs(Scale)) != 0)
304 return false;
305
306 MulImm /= Scale;
307 if ((MulImm >= Min) && (MulImm <= Max)) {
308 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
309 return true;
310 }
311
312 return false;
313 }
314
315 template <signed Max, signed Scale>
316 bool SelectEXTImm(SDValue N, SDValue &Imm) {
318 return false;
319
320 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
321
322 if (MulImm >= 0 && MulImm <= Max) {
323 MulImm *= Scale;
324 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
325 return true;
326 }
327
328 return false;
329 }
330
331 template <unsigned BaseReg, unsigned Max>
332 bool ImmToReg(SDValue N, SDValue &Imm) {
333 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
334 uint64_t C = CI->getZExtValue();
335
336 if (C > Max)
337 return false;
338
339 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
340 return true;
341 }
342 return false;
343 }
344
345 /// Form sequences of consecutive 64/128-bit registers for use in NEON
346 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
347 /// between 1 and 4 elements. If it contains a single element that is returned
348 /// unchanged; otherwise a REG_SEQUENCE value is returned.
351 // Form a sequence of SVE registers for instructions using list of vectors,
352 // e.g. structured loads and stores (ldN, stN).
353 SDValue createZTuple(ArrayRef<SDValue> Vecs);
354
355 // Similar to above, except the register must start at a multiple of the
356 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
357 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
358
359 /// Generic helper for the createDTuple/createQTuple
360 /// functions. Those should almost always be called instead.
361 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
362 const unsigned SubRegs[]);
363
364 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
365
366 bool tryIndexedLoad(SDNode *N);
367
368 void SelectPtrauthAuth(SDNode *N);
369 void SelectPtrauthResign(SDNode *N);
370
371 bool trySelectStackSlotTagP(SDNode *N);
372 void SelectTagP(SDNode *N);
373
374 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
375 unsigned SubRegIdx);
376 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
377 unsigned SubRegIdx);
378 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
380 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
381 unsigned Opc_rr, unsigned Opc_ri,
382 bool IsIntr = false);
383 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
384 unsigned Scale, unsigned Opc_ri,
385 unsigned Opc_rr);
386 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
387 bool IsZmMulti, unsigned Opcode,
388 bool HasPred = false);
389 void SelectPExtPair(SDNode *N, unsigned Opc);
390 void SelectWhilePair(SDNode *N, unsigned Opc);
391 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
394 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
395 bool IsTupleInput, unsigned Opc);
396 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
397
398 template <unsigned MaxIdx, unsigned Scale>
399 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
400 unsigned Op);
401 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
402 unsigned Op, unsigned MaxIdx, unsigned Scale,
403 unsigned BaseReg = 0);
404 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
405 /// SVE Reg+Imm addressing mode.
406 template <int64_t Min, int64_t Max>
407 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
408 SDValue &OffImm);
409 /// SVE Reg+Reg address mode.
410 template <unsigned Scale>
411 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
412 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
413 }
414
415 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
416 unsigned Opc, uint32_t MaxImm);
417
418 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
419
420 template <unsigned MaxIdx, unsigned Scale>
421 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
422 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
423 }
424
425 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
426 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
429 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
430 unsigned Opc_rr, unsigned Opc_ri);
431 std::tuple<unsigned, SDValue, SDValue>
432 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
433 const SDValue &OldBase, const SDValue &OldOffset,
434 unsigned Scale);
435
436 bool tryBitfieldExtractOp(SDNode *N);
437 bool tryBitfieldExtractOpFromSExt(SDNode *N);
438 bool tryBitfieldInsertOp(SDNode *N);
439 bool tryBitfieldInsertInZeroOp(SDNode *N);
440 bool tryShiftAmountMod(SDNode *N);
441
442 bool tryReadRegister(SDNode *N);
443 bool tryWriteRegister(SDNode *N);
444
445 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
446 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
447
448 bool trySelectXAR(SDNode *N);
449
450// Include the pieces autogenerated from the target description.
451#include "AArch64GenDAGISel.inc"
452
453private:
454 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
455 SDValue &Shift);
456 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
457 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
458 SDValue &OffImm) {
459 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
460 }
461 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
462 unsigned Size, SDValue &Base,
463 SDValue &OffImm);
464 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
465 SDValue &OffImm);
466 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
467 SDValue &OffImm);
468 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
469 SDValue &Offset, SDValue &SignExtend,
470 SDValue &DoShift);
471 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
472 SDValue &Offset, SDValue &SignExtend,
473 SDValue &DoShift);
474 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
475 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
476 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
477 SDValue &Offset, SDValue &SignExtend);
478
479 template<unsigned RegWidth>
480 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
481 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
482 }
483 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
484
485 template <unsigned RegWidth>
486 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos) {
487 return SelectCVTFixedPointVec(N, FixedPos, RegWidth);
488 }
489 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos, unsigned Width);
490
491 template<unsigned RegWidth>
492 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
493 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
494 }
495
496 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
497 unsigned Width);
498
499 bool SelectCMP_SWAP(SDNode *N);
500
501 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
502 bool Negate);
503 bool SelectSVEAddSubImm(SDLoc DL, APInt Value, MVT VT, SDValue &Imm,
504 SDValue &Shift, bool Negate);
505 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
506 bool Negate);
507 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
508 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
509
510 // Match `<NEON Splat> SVEImm` (where <NEON Splat> could be fmov, movi, etc).
511 bool SelectNEONSplatOfSVELogicalImm(SDValue N, SDValue &Imm);
512 bool SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift);
513 bool SelectNEONSplatOfSVEArithSImm(SDValue N, SDValue &Imm);
514
515 bool SelectSVESignedArithImm(SDLoc DL, APInt Value, SDValue &Imm);
516 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
517 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
518 bool AllowSaturation, SDValue &Imm);
519
520 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
521 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
522 SDValue &Offset);
523 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
524 SDValue &Offset, unsigned Scale = 1);
525
526 bool SelectAllActivePredicate(SDValue N);
527 bool SelectAnyPredicate(SDValue N);
528
529 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
530
531 template <bool MatchCBB>
532 bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
533};
534
535class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
536public:
537 static char ID;
538 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
539 CodeGenOptLevel OptLevel)
541 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
542};
543} // end anonymous namespace
544
545char AArch64DAGToDAGISelLegacy::ID = 0;
546
547INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
548
549/// addBitcastHints - This method adds bitcast hints to the operands of a node
550/// to help instruction selector determine which operands are in Neon registers.
552 SDLoc DL(&N);
553 auto getFloatVT = [&](EVT VT) {
554 EVT ScalarVT = VT.getScalarType();
555 assert((ScalarVT == MVT::i32 || ScalarVT == MVT::i64) && "Unexpected VT");
556 return VT.changeElementType(*(DAG.getContext()),
557 ScalarVT == MVT::i32 ? MVT::f32 : MVT::f64);
558 };
560 NewOps.reserve(N.getNumOperands());
561
562 for (unsigned I = 0, E = N.getNumOperands(); I < E; ++I) {
563 auto bitcasted = DAG.getBitcast(getFloatVT(N.getOperand(I).getValueType()),
564 N.getOperand(I));
565 NewOps.push_back(bitcasted);
566 }
567 EVT OrigVT = N.getValueType(0);
568 SDValue OpNode = DAG.getNode(N.getOpcode(), DL, getFloatVT(OrigVT), NewOps);
569 return DAG.getBitcast(OrigVT, OpNode);
570}
571
572/// isIntImmediate - This method tests to see if the node is a constant
573/// operand. If so Imm will receive the 32-bit value.
574static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
576 Imm = C->getZExtValue();
577 return true;
578 }
579 return false;
580}
581
582// isIntImmediate - This method tests to see if a constant operand.
583// If so Imm will receive the value.
584static bool isIntImmediate(SDValue N, uint64_t &Imm) {
585 return isIntImmediate(N.getNode(), Imm);
586}
587
588// isOpcWithIntImmediate - This method tests to see if the node is a specific
589// opcode and that it has a immediate integer right operand.
590// If so Imm will receive the 32 bit value.
591static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
592 uint64_t &Imm) {
593 return N->getOpcode() == Opc &&
594 isIntImmediate(N->getOperand(1).getNode(), Imm);
595}
596
597// isIntImmediateEq - This method tests to see if N is a constant operand that
598// is equivalent to 'ImmExpected'.
599#ifndef NDEBUG
600static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
601 uint64_t Imm;
602 if (!isIntImmediate(N.getNode(), Imm))
603 return false;
604 return Imm == ImmExpected;
605}
606#endif
607
608static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth) {
609 assert(RegWidth == 32 || RegWidth == 64);
610 if (RegWidth == 32)
611 return APInt(RegWidth,
613 return APInt(RegWidth, AArch64_AM::decodeAdvSIMDModImmType12(Imm));
614}
615
616// Decodes the integer splat value from a NEON splat operation.
617static std::optional<APInt> DecodeNEONSplat(SDValue N) {
618 assert(N.getValueType().isInteger() && "Only integers are supported");
619 unsigned SplatWidth = N.getScalarValueSizeInBits();
620 if (N->getOpcode() == AArch64ISD::NVCAST) {
621 SDValue Op = N->getOperand(0);
622 if (Op.getOpcode() != AArch64ISD::FMOV ||
623 Op.getScalarValueSizeInBits() != N.getScalarValueSizeInBits())
624 return std::nullopt;
625 return DecodeFMOVImm(Op.getConstantOperandVal(0), SplatWidth);
626 }
627 if (N->getOpcode() == AArch64ISD::MOVI)
628 return APInt(SplatWidth, N.getConstantOperandVal(0));
629 if (N->getOpcode() == AArch64ISD::MOVIshift)
630 return APInt(SplatWidth, N.getConstantOperandVal(0)
631 << N.getConstantOperandVal(1));
632 if (N->getOpcode() == AArch64ISD::MVNIshift)
633 return ~APInt(SplatWidth, N.getConstantOperandVal(0)
634 << N.getConstantOperandVal(1));
635 if (N->getOpcode() == AArch64ISD::DUP)
636 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(0)))
637 return Const->getAPIntValue().trunc(SplatWidth);
638 // TODO: Recognize more splat-like NEON operations. See ConstantBuildVector
639 // in AArch64ISelLowering. AArch64ISD::MOVIedit support will allow more folds.
640 return std::nullopt;
641}
642
643bool AArch64DAGToDAGISel::SelectNEONSplatOfSVELogicalImm(SDValue N,
644 SDValue &Imm) {
645 std::optional<APInt> ImmVal = DecodeNEONSplat(N);
646 if (!ImmVal)
647 return false;
648 uint64_t Encoding;
649 if (!AArch64_AM::isSVELogicalImm(N.getScalarValueSizeInBits(),
650 ImmVal->getZExtValue(), Encoding))
651 return false;
652
653 Imm = CurDAG->getTargetConstant(Encoding, SDLoc(N), MVT::i64);
654 return true;
655}
656
657bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm,
658 SDValue &Shift) {
659 if (std::optional<APInt> ImmVal = DecodeNEONSplat(N))
660 return SelectSVEAddSubImm(SDLoc(N), *ImmVal,
661 N.getValueType().getScalarType().getSimpleVT(),
662 Imm, Shift,
663 /*Negate=*/false);
664 return false;
665}
666
667bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEArithSImm(SDValue N,
668 SDValue &Imm) {
669 if (std::optional<APInt> ImmVal = DecodeNEONSplat(N))
670 return SelectSVESignedArithImm(SDLoc(N), *ImmVal, Imm);
671 return false;
672}
673
674bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
675 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
676 std::vector<SDValue> &OutOps) {
677 switch(ConstraintID) {
678 default:
679 llvm_unreachable("Unexpected asm memory constraint");
680 case InlineAsm::ConstraintCode::m:
681 case InlineAsm::ConstraintCode::o:
682 case InlineAsm::ConstraintCode::Q:
683 // We need to make sure that this one operand does not end up in XZR, thus
684 // require the address to be in a PointerRegClass register.
685 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
686 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
687 SDLoc dl(Op);
688 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
689 SDValue NewOp =
690 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
691 dl, Op.getValueType(),
692 Op, RC), 0);
693 OutOps.push_back(NewOp);
694 return false;
695 }
696 return true;
697}
698
699/// SelectArithImmed - Select an immediate value that can be represented as
700/// a 12-bit value shifted left by either 0 or 12. If so, return true with
701/// Val set to the 12-bit value and Shift set to the shifter operand.
702bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
703 SDValue &Shift) {
704 // This function is called from the addsub_shifted_imm ComplexPattern,
705 // which lists [imm] as the list of opcode it's interested in, however
706 // we still need to check whether the operand is actually an immediate
707 // here because the ComplexPattern opcode list is only used in
708 // root-level opcode matching.
709 if (!isa<ConstantSDNode>(N.getNode()))
710 return false;
711
712 uint64_t Immed = N.getNode()->getAsZExtVal();
713 unsigned ShiftAmt;
714
715 if (Immed >> 12 == 0) {
716 ShiftAmt = 0;
717 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
718 ShiftAmt = 12;
719 Immed = Immed >> 12;
720 } else
721 return false;
722
723 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
724 SDLoc dl(N);
725 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
726 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
727 return true;
728}
729
730/// SelectNegArithImmed - As above, but negates the value before trying to
731/// select it.
732bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
733 SDValue &Shift) {
734 // This function is called from the addsub_shifted_imm ComplexPattern,
735 // which lists [imm] as the list of opcode it's interested in, however
736 // we still need to check whether the operand is actually an immediate
737 // here because the ComplexPattern opcode list is only used in
738 // root-level opcode matching.
739 if (!isa<ConstantSDNode>(N.getNode()))
740 return false;
741
742 // The immediate operand must be a 24-bit zero-extended immediate.
743 uint64_t Immed = N.getNode()->getAsZExtVal();
744
745 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
746 // have the opposite effect on the C flag, so this pattern mustn't match under
747 // those circumstances.
748 if (Immed == 0)
749 return false;
750
751 if (N.getValueType() == MVT::i32)
752 Immed = ~((uint32_t)Immed) + 1;
753 else
754 Immed = ~Immed + 1ULL;
755 if (Immed & 0xFFFFFFFFFF000000ULL)
756 return false;
757
758 Immed &= 0xFFFFFFULL;
759 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
760 Shift);
761}
762
763/// getShiftTypeForNode - Translate a shift node to the corresponding
764/// ShiftType value.
766 switch (N.getOpcode()) {
767 default:
769 case ISD::SHL:
770 return AArch64_AM::LSL;
771 case ISD::SRL:
772 return AArch64_AM::LSR;
773 case ISD::SRA:
774 return AArch64_AM::ASR;
775 case ISD::ROTR:
776 return AArch64_AM::ROR;
777 }
778}
779
781 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
782}
783
784/// Determine whether it is worth it to fold SHL into the addressing
785/// mode.
787 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
788 // It is worth folding logical shift of up to three places.
789 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
790 if (!CSD)
791 return false;
792 unsigned ShiftVal = CSD->getZExtValue();
793 if (ShiftVal > 3)
794 return false;
795
796 // Check if this particular node is reused in any non-memory related
797 // operation. If yes, do not try to fold this node into the address
798 // computation, since the computation will be kept.
799 const SDNode *Node = V.getNode();
800 for (SDNode *UI : Node->users())
801 if (!isMemOpOrPrefetch(UI))
802 for (SDNode *UII : UI->users())
803 if (!isMemOpOrPrefetch(UII))
804 return false;
805 return true;
806}
807
808/// Determine whether it is worth to fold V into an extended register addressing
809/// mode.
810bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
811 // Trivial if we are optimizing for code size or if there is only
812 // one use of the value.
813 if (CurDAG->shouldOptForSize() || V.hasOneUse())
814 return true;
815
816 // If a subtarget has a slow shift, folding a shift into multiple loads
817 // costs additional micro-ops.
818 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
819 return false;
820
821 // Check whether we're going to emit the address arithmetic anyway because
822 // it's used by a non-address operation.
823 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
824 return true;
825 if (V.getOpcode() == ISD::ADD) {
826 const SDValue LHS = V.getOperand(0);
827 const SDValue RHS = V.getOperand(1);
828 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
829 return true;
830 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
831 return true;
832 }
833
834 // It hurts otherwise, since the value will be reused.
835 return false;
836}
837
838/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
839/// to select more shifted register
840bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
841 SDValue &Shift) {
842 EVT VT = N.getValueType();
843 if (VT != MVT::i32 && VT != MVT::i64)
844 return false;
845
846 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
847 return false;
848 SDValue LHS = N.getOperand(0);
849 if (!LHS->hasOneUse())
850 return false;
851
852 unsigned LHSOpcode = LHS->getOpcode();
853 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
854 return false;
855
856 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
857 if (!ShiftAmtNode)
858 return false;
859
860 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
861 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
862 if (!RHSC)
863 return false;
864
865 APInt AndMask = RHSC->getAPIntValue();
866 unsigned LowZBits, MaskLen;
867 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
868 return false;
869
870 unsigned BitWidth = N.getValueSizeInBits();
871 SDLoc DL(LHS);
872 uint64_t NewShiftC;
873 unsigned NewShiftOp;
874 if (LHSOpcode == ISD::SHL) {
875 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
876 // BitWidth != LowZBits + MaskLen doesn't match the pattern
877 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
878 return false;
879
880 NewShiftC = LowZBits - ShiftAmtC;
881 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
882 } else {
883 if (LowZBits == 0)
884 return false;
885
886 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
887 NewShiftC = LowZBits + ShiftAmtC;
888 if (NewShiftC >= BitWidth)
889 return false;
890
891 // SRA need all high bits
892 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
893 return false;
894
895 // SRL high bits can be 0 or 1
896 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
897 return false;
898
899 if (LHSOpcode == ISD::SRL)
900 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
901 else
902 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
903 }
904
905 assert(NewShiftC < BitWidth && "Invalid shift amount");
906 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
907 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
908 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
909 NewShiftAmt, BitWidthMinus1),
910 0);
911 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
912 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
913 return true;
914}
915
916/// getExtendTypeForNode - Translate an extend node to the corresponding
917/// ExtendType value.
919getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
920 if (N.getOpcode() == ISD::SIGN_EXTEND ||
921 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
922 EVT SrcVT;
923 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
924 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
925 else
926 SrcVT = N.getOperand(0).getValueType();
927
928 if (!IsLoadStore && SrcVT == MVT::i8)
929 return AArch64_AM::SXTB;
930 else if (!IsLoadStore && SrcVT == MVT::i16)
931 return AArch64_AM::SXTH;
932 else if (SrcVT == MVT::i32)
933 return AArch64_AM::SXTW;
934 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
935
937 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
938 N.getOpcode() == ISD::ANY_EXTEND) {
939 EVT SrcVT = N.getOperand(0).getValueType();
940 if (!IsLoadStore && SrcVT == MVT::i8)
941 return AArch64_AM::UXTB;
942 else if (!IsLoadStore && SrcVT == MVT::i16)
943 return AArch64_AM::UXTH;
944 else if (SrcVT == MVT::i32)
945 return AArch64_AM::UXTW;
946 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
947
949 } else if (N.getOpcode() == ISD::AND) {
950 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
951 if (!CSD)
953 uint64_t AndMask = CSD->getZExtValue();
954
955 switch (AndMask) {
956 default:
958 case 0xFF:
959 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
960 case 0xFFFF:
961 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
962 case 0xFFFFFFFF:
963 return AArch64_AM::UXTW;
964 }
965 }
966
968}
969
970/// Determine whether it is worth to fold V into an extended register of an
971/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
972/// instruction, and the shift should be treated as worth folding even if has
973/// multiple uses.
974bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
975 // Trivial if we are optimizing for code size or if there is only
976 // one use of the value.
977 if (CurDAG->shouldOptForSize() || V.hasOneUse())
978 return true;
979
980 // If a subtarget has a fastpath LSL we can fold a logical shift into
981 // the add/sub and save a cycle.
982 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
983 V.getConstantOperandVal(1) <= 4 &&
985 return true;
986
987 // It hurts otherwise, since the value will be reused.
988 return false;
989}
990
991/// SelectShiftedRegister - Select a "shifted register" operand. If the value
992/// is not shifted, set the Shift operand to default of "LSL 0". The logical
993/// instructions allow the shifted register to be rotated, but the arithmetic
994/// instructions do not. The AllowROR parameter specifies whether ROR is
995/// supported.
996bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
997 SDValue &Reg, SDValue &Shift) {
998 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
999 return true;
1000
1002 if (ShType == AArch64_AM::InvalidShiftExtend)
1003 return false;
1004 if (!AllowROR && ShType == AArch64_AM::ROR)
1005 return false;
1006
1007 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1008 unsigned BitSize = N.getValueSizeInBits();
1009 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
1010 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
1011
1012 Reg = N.getOperand(0);
1013 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
1014 return isWorthFoldingALU(N, true);
1015 }
1016
1017 return false;
1018}
1019
1020/// Instructions that accept extend modifiers like UXTW expect the register
1021/// being extended to be a GPR32, but the incoming DAG might be acting on a
1022/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
1023/// this is the case.
1025 if (N.getValueType() == MVT::i32)
1026 return N;
1027
1028 SDLoc dl(N);
1029 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
1030}
1031
1032// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
1033template<signed Low, signed High, signed Scale>
1034bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
1035 if (!isa<ConstantSDNode>(N))
1036 return false;
1037
1038 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
1039 if ((MulImm % std::abs(Scale)) == 0) {
1040 int64_t RDVLImm = MulImm / Scale;
1041 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
1042 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
1043 return true;
1044 }
1045 }
1046
1047 return false;
1048}
1049
1050// Returns a suitable RDSVL multiplier from a left shift.
1051template <signed Low, signed High>
1052bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
1053 if (!isa<ConstantSDNode>(N))
1054 return false;
1055
1056 int64_t MulImm = 1LL << cast<ConstantSDNode>(N)->getSExtValue();
1057 if (MulImm >= Low && MulImm <= High) {
1058 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
1059 return true;
1060 }
1061
1062 return false;
1063}
1064
1065/// SelectArithExtendedRegister - Select a "extended register" operand. This
1066/// operand folds in an extend followed by an optional left shift.
1067bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
1068 SDValue &Shift) {
1069 unsigned ShiftVal = 0;
1071
1072 if (N.getOpcode() == ISD::SHL) {
1073 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1074 if (!CSD)
1075 return false;
1076 ShiftVal = CSD->getZExtValue();
1077 if (ShiftVal > 4)
1078 return false;
1079
1080 Ext = getExtendTypeForNode(N.getOperand(0));
1082 return false;
1083
1084 Reg = N.getOperand(0).getOperand(0);
1085 } else {
1086 Ext = getExtendTypeForNode(N);
1088 return false;
1089
1090 // Don't match sext of vector extracts. These can use SMOV, but if we match
1091 // this as an extended register, we'll always fold the extend into an ALU op
1092 // user of the extend (which results in a UMOV).
1094 SDValue Op = N.getOperand(0);
1095 if (Op->getOpcode() == ISD::ANY_EXTEND)
1096 Op = Op->getOperand(0);
1097 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
1098 Op.getOperand(0).getValueType().isFixedLengthVector())
1099 return false;
1100 }
1101
1102 Reg = N.getOperand(0);
1103
1104 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
1105 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
1106 auto isDef32 = [](SDValue N) {
1107 unsigned Opc = N.getOpcode();
1108 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
1111 Opc != ISD::FREEZE;
1112 };
1113 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
1114 isDef32(Reg))
1115 return false;
1116 }
1117
1118 // AArch64 mandates that the RHS of the operation must use the smallest
1119 // register class that could contain the size being extended from. Thus,
1120 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1121 // there might not be an actual 32-bit value in the program. We can
1122 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1123 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1124 Reg = narrowIfNeeded(CurDAG, Reg);
1125 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1126 MVT::i32);
1127 return isWorthFoldingALU(N);
1128}
1129
1130/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1131/// operand is referred by the instructions have SP operand
1132bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1133 SDValue &Shift) {
1134 unsigned ShiftVal = 0;
1136
1137 if (N.getOpcode() != ISD::SHL)
1138 return false;
1139
1140 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1141 if (!CSD)
1142 return false;
1143 ShiftVal = CSD->getZExtValue();
1144 if (ShiftVal > 4)
1145 return false;
1146
1147 Ext = AArch64_AM::UXTX;
1148 Reg = N.getOperand(0);
1149 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1150 MVT::i32);
1151 return isWorthFoldingALU(N);
1152}
1153
1154/// If there's a use of this ADDlow that's not itself a load/store then we'll
1155/// need to create a real ADD instruction from it anyway and there's no point in
1156/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1157/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1158/// leads to duplicated ADRP instructions.
1160 for (auto *User : N->users()) {
1161 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1162 User->getOpcode() != ISD::ATOMIC_LOAD &&
1163 User->getOpcode() != ISD::ATOMIC_STORE)
1164 return false;
1165
1166 // ldar and stlr have much more restrictive addressing modes (just a
1167 // register).
1168 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1169 return false;
1170 }
1171
1172 return true;
1173}
1174
1175/// Check if the immediate offset is valid as a scaled immediate.
1176static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1177 unsigned Size) {
1178 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1179 Offset < (Range << Log2_32(Size)))
1180 return true;
1181 return false;
1182}
1183
1184/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1185/// immediate" address. The "Size" argument is the size in bytes of the memory
1186/// reference, which determines the scale.
1187bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1188 unsigned BW, unsigned Size,
1189 SDValue &Base,
1190 SDValue &OffImm) {
1191 SDLoc dl(N);
1192 const DataLayout &DL = CurDAG->getDataLayout();
1193 const TargetLowering *TLI = getTargetLowering();
1194 if (N.getOpcode() == ISD::FrameIndex) {
1195 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1196 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1197 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1198 return true;
1199 }
1200
1201 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1202 // selected here doesn't support labels/immediates, only base+offset.
1203 if (CurDAG->isBaseWithConstantOffset(N)) {
1204 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1205 if (IsSignedImm) {
1206 int64_t RHSC = RHS->getSExtValue();
1207 unsigned Scale = Log2_32(Size);
1208 int64_t Range = 0x1LL << (BW - 1);
1209
1210 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1211 RHSC < (Range << Scale)) {
1212 Base = N.getOperand(0);
1213 if (Base.getOpcode() == ISD::FrameIndex) {
1214 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1215 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1216 }
1217 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1218 return true;
1219 }
1220 } else {
1221 // unsigned Immediate
1222 uint64_t RHSC = RHS->getZExtValue();
1223 unsigned Scale = Log2_32(Size);
1224 uint64_t Range = 0x1ULL << BW;
1225
1226 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1227 Base = N.getOperand(0);
1228 if (Base.getOpcode() == ISD::FrameIndex) {
1229 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1230 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1231 }
1232 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1233 return true;
1234 }
1235 }
1236 }
1237 }
1238 // Base only. The address will be materialized into a register before
1239 // the memory is accessed.
1240 // add x0, Xbase, #offset
1241 // stp x1, x2, [x0]
1242 Base = N;
1243 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1244 return true;
1245}
1246
1247/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1248/// immediate" address. The "Size" argument is the size in bytes of the memory
1249/// reference, which determines the scale.
1250bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1251 SDValue &Base, SDValue &OffImm) {
1252 SDLoc dl(N);
1253 const DataLayout &DL = CurDAG->getDataLayout();
1254 const TargetLowering *TLI = getTargetLowering();
1255 if (N.getOpcode() == ISD::FrameIndex) {
1256 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1257 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1258 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1259 return true;
1260 }
1261
1262 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1263 GlobalAddressSDNode *GAN =
1264 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1265 Base = N.getOperand(0);
1266 OffImm = N.getOperand(1);
1267 if (!GAN)
1268 return true;
1269
1270 if (GAN->getOffset() % Size == 0 &&
1272 return true;
1273 }
1274
1275 if (CurDAG->isBaseWithConstantOffset(N)) {
1276 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1277 int64_t RHSC = (int64_t)RHS->getZExtValue();
1278 unsigned Scale = Log2_32(Size);
1279 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1280 Base = N.getOperand(0);
1281 if (Base.getOpcode() == ISD::FrameIndex) {
1282 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1283 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1284 }
1285 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1286 return true;
1287 }
1288 }
1289 }
1290
1291 // Before falling back to our general case, check if the unscaled
1292 // instructions can handle this. If so, that's preferable.
1293 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1294 return false;
1295
1296 // Base only. The address will be materialized into a register before
1297 // the memory is accessed.
1298 // add x0, Xbase, #offset
1299 // ldr x0, [x0]
1300 Base = N;
1301 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1302 return true;
1303}
1304
1305/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1306/// immediate" address. This should only match when there is an offset that
1307/// is not valid for a scaled immediate addressing mode. The "Size" argument
1308/// is the size in bytes of the memory reference, which is needed here to know
1309/// what is valid for a scaled immediate.
1310bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1311 SDValue &Base,
1312 SDValue &OffImm) {
1313 if (!CurDAG->isBaseWithConstantOffset(N))
1314 return false;
1315 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1316 int64_t RHSC = RHS->getSExtValue();
1317 if (RHSC >= -256 && RHSC < 256) {
1318 Base = N.getOperand(0);
1319 if (Base.getOpcode() == ISD::FrameIndex) {
1320 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1321 const TargetLowering *TLI = getTargetLowering();
1322 Base = CurDAG->getTargetFrameIndex(
1323 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1324 }
1325 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1326 return true;
1327 }
1328 }
1329 return false;
1330}
1331
1333 SDLoc dl(N);
1334 SDValue ImpDef = SDValue(
1335 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1336 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1337 N);
1338}
1339
1340/// Check if the given SHL node (\p N), can be used to form an
1341/// extended register for an addressing mode.
1342bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1343 bool WantExtend, SDValue &Offset,
1344 SDValue &SignExtend) {
1345 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1346 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1347 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1348 return false;
1349
1350 SDLoc dl(N);
1351 if (WantExtend) {
1353 getExtendTypeForNode(N.getOperand(0), true);
1355 return false;
1356
1357 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1358 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1359 MVT::i32);
1360 } else {
1361 Offset = N.getOperand(0);
1362 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1363 }
1364
1365 unsigned LegalShiftVal = Log2_32(Size);
1366 unsigned ShiftVal = CSD->getZExtValue();
1367
1368 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1369 return false;
1370
1371 return isWorthFoldingAddr(N, Size);
1372}
1373
1374bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1376 SDValue &SignExtend,
1377 SDValue &DoShift) {
1378 if (N.getOpcode() != ISD::ADD)
1379 return false;
1380 SDValue LHS = N.getOperand(0);
1381 SDValue RHS = N.getOperand(1);
1382 SDLoc dl(N);
1383
1384 // We don't want to match immediate adds here, because they are better lowered
1385 // to the register-immediate addressing modes.
1387 return false;
1388
1389 // Check if this particular node is reused in any non-memory related
1390 // operation. If yes, do not try to fold this node into the address
1391 // computation, since the computation will be kept.
1392 const SDNode *Node = N.getNode();
1393 for (SDNode *UI : Node->users()) {
1394 if (!isMemOpOrPrefetch(UI))
1395 return false;
1396 }
1397
1398 // Remember if it is worth folding N when it produces extended register.
1399 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1400
1401 // Try to match a shifted extend on the RHS.
1402 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1403 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1404 Base = LHS;
1405 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1406 return true;
1407 }
1408
1409 // Try to match a shifted extend on the LHS.
1410 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1411 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1412 Base = RHS;
1413 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1414 return true;
1415 }
1416
1417 // There was no shift, whatever else we find.
1418 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1419
1421 // Try to match an unshifted extend on the LHS.
1422 if (IsExtendedRegisterWorthFolding &&
1423 (Ext = getExtendTypeForNode(LHS, true)) !=
1425 Base = RHS;
1426 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1427 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1428 MVT::i32);
1429 if (isWorthFoldingAddr(LHS, Size))
1430 return true;
1431 }
1432
1433 // Try to match an unshifted extend on the RHS.
1434 if (IsExtendedRegisterWorthFolding &&
1435 (Ext = getExtendTypeForNode(RHS, true)) !=
1437 Base = LHS;
1438 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1439 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1440 MVT::i32);
1441 if (isWorthFoldingAddr(RHS, Size))
1442 return true;
1443 }
1444
1445 return false;
1446}
1447
1448// Check if the given immediate is preferred by ADD. If an immediate can be
1449// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1450// encoded by one MOVZ, return true.
1451static bool isPreferredADD(int64_t ImmOff) {
1452 // Constant in [0x0, 0xfff] can be encoded in ADD.
1453 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1454 return true;
1455 // Check if it can be encoded in an "ADD LSL #12".
1456 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1457 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1458 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1459 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1460 return false;
1461}
1462
1463bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1465 SDValue &SignExtend,
1466 SDValue &DoShift) {
1467 if (N.getOpcode() != ISD::ADD)
1468 return false;
1469 SDValue LHS = N.getOperand(0);
1470 SDValue RHS = N.getOperand(1);
1471 SDLoc DL(N);
1472
1473 // Check if this particular node is reused in any non-memory related
1474 // operation. If yes, do not try to fold this node into the address
1475 // computation, since the computation will be kept.
1476 const SDNode *Node = N.getNode();
1477 for (SDNode *UI : Node->users()) {
1478 if (!isMemOpOrPrefetch(UI))
1479 return false;
1480 }
1481
1482 // Watch out if RHS is a wide immediate, it can not be selected into
1483 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1484 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1485 // instructions like:
1486 // MOV X0, WideImmediate
1487 // ADD X1, BaseReg, X0
1488 // LDR X2, [X1, 0]
1489 // For such situation, using [BaseReg, XReg] addressing mode can save one
1490 // ADD/SUB:
1491 // MOV X0, WideImmediate
1492 // LDR X2, [BaseReg, X0]
1493 if (isa<ConstantSDNode>(RHS)) {
1494 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1495 // Skip the immediate can be selected by load/store addressing mode.
1496 // Also skip the immediate can be encoded by a single ADD (SUB is also
1497 // checked by using -ImmOff).
1498 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1499 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1500 return false;
1501
1502 SDValue Ops[] = { RHS };
1503 SDNode *MOVI =
1504 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1505 SDValue MOVIV = SDValue(MOVI, 0);
1506 // This ADD of two X register will be selected into [Reg+Reg] mode.
1507 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1508 }
1509
1510 // Remember if it is worth folding N when it produces extended register.
1511 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1512
1513 // Try to match a shifted extend on the RHS.
1514 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1515 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1516 Base = LHS;
1517 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1518 return true;
1519 }
1520
1521 // Try to match a shifted extend on the LHS.
1522 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1523 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1524 Base = RHS;
1525 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1526 return true;
1527 }
1528
1529 // Match any non-shifted, non-extend, non-immediate add expression.
1530 Base = LHS;
1531 Offset = RHS;
1532 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1533 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1534 // Reg1 + Reg2 is free: no check needed.
1535 return true;
1536}
1537
1538SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1539 static const unsigned RegClassIDs[] = {
1540 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1541 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1542 AArch64::dsub2, AArch64::dsub3};
1543
1544 return createTuple(Regs, RegClassIDs, SubRegs);
1545}
1546
1547SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1548 static const unsigned RegClassIDs[] = {
1549 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1550 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1551 AArch64::qsub2, AArch64::qsub3};
1552
1553 return createTuple(Regs, RegClassIDs, SubRegs);
1554}
1555
1556SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1557 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1558 AArch64::ZPR3RegClassID,
1559 AArch64::ZPR4RegClassID};
1560 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1561 AArch64::zsub2, AArch64::zsub3};
1562
1563 return createTuple(Regs, RegClassIDs, SubRegs);
1564}
1565
1566SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1567 assert(Regs.size() == 2 || Regs.size() == 4);
1568
1569 // The createTuple interface requires 3 RegClassIDs for each possible
1570 // tuple type even though we only have them for ZPR2 and ZPR4.
1571 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1572 AArch64::ZPR4Mul4RegClassID};
1573 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1574 AArch64::zsub2, AArch64::zsub3};
1575 return createTuple(Regs, RegClassIDs, SubRegs);
1576}
1577
1578SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1579 const unsigned RegClassIDs[],
1580 const unsigned SubRegs[]) {
1581 // There's no special register-class for a vector-list of 1 element: it's just
1582 // a vector.
1583 if (Regs.size() == 1)
1584 return Regs[0];
1585
1586 assert(Regs.size() >= 2 && Regs.size() <= 4);
1587
1588 SDLoc DL(Regs[0]);
1589
1591
1592 // First operand of REG_SEQUENCE is the desired RegClass.
1593 Ops.push_back(
1594 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1595
1596 // Then we get pairs of source & subregister-position for the components.
1597 for (unsigned i = 0; i < Regs.size(); ++i) {
1598 Ops.push_back(Regs[i]);
1599 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1600 }
1601
1602 SDNode *N =
1603 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1604 return SDValue(N, 0);
1605}
1606
1607void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1608 bool isExt) {
1609 SDLoc dl(N);
1610 EVT VT = N->getValueType(0);
1611
1612 unsigned ExtOff = isExt;
1613
1614 // Form a REG_SEQUENCE to force register allocation.
1615 unsigned Vec0Off = ExtOff + 1;
1616 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1617 SDValue RegSeq = createQTuple(Regs);
1618
1620 if (isExt)
1621 Ops.push_back(N->getOperand(1));
1622 Ops.push_back(RegSeq);
1623 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1624 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1625}
1626
1627static std::tuple<SDValue, SDValue>
1629 SDLoc DL(Disc);
1630 SDValue AddrDisc;
1631 SDValue ConstDisc;
1632
1633 // If this is a blend, remember the constant and address discriminators.
1634 // Otherwise, it's either a constant discriminator, or a non-blended
1635 // address discriminator.
1636 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1637 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1638 AddrDisc = Disc->getOperand(1);
1639 ConstDisc = Disc->getOperand(2);
1640 } else {
1641 ConstDisc = Disc;
1642 }
1643
1644 // If the constant discriminator (either the blend RHS, or the entire
1645 // discriminator value) isn't a 16-bit constant, bail out, and let the
1646 // discriminator be computed separately.
1647 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1648 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1649 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1650
1651 // If there's no address discriminator, use XZR directly.
1652 if (!AddrDisc)
1653 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1654
1655 return std::make_tuple(
1656 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1657 AddrDisc);
1658}
1659
1660void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1661 SDLoc DL(N);
1662 // IntrinsicID is operand #0
1663 SDValue Val = N->getOperand(1);
1664 SDValue AUTKey = N->getOperand(2);
1665 SDValue AUTDisc = N->getOperand(3);
1666
1667 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1668 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1669
1670 SDValue AUTAddrDisc, AUTConstDisc;
1671 std::tie(AUTConstDisc, AUTAddrDisc) =
1672 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1673
1674 if (!Subtarget->isX16X17Safer()) {
1675 std::vector<SDValue> Ops = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1676 // Copy deactivation symbol if present.
1677 if (N->getNumOperands() > 4)
1678 Ops.push_back(N->getOperand(4));
1679
1680 SDNode *AUT =
1681 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1682 ReplaceNode(N, AUT);
1683 } else {
1684 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1685 AArch64::X16, Val, SDValue());
1686 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1687
1688 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1689 ReplaceNode(N, AUT);
1690 }
1691}
1692
1693void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1694 SDLoc DL(N);
1695 // IntrinsicID is operand #0, if W_CHAIN it is #1
1696 int OffsetBase = N->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 1 : 0;
1697 SDValue Val = N->getOperand(OffsetBase + 1);
1698 SDValue AUTKey = N->getOperand(OffsetBase + 2);
1699 SDValue AUTDisc = N->getOperand(OffsetBase + 3);
1700 SDValue PACKey = N->getOperand(OffsetBase + 4);
1701 SDValue PACDisc = N->getOperand(OffsetBase + 5);
1702 uint32_t IntNum = N->getConstantOperandVal(OffsetBase + 0);
1703 bool HasLoad = IntNum == Intrinsic::ptrauth_resign_load_relative;
1704
1705 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1706 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1707
1708 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1709 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1710
1711 SDValue AUTAddrDisc, AUTConstDisc;
1712 std::tie(AUTConstDisc, AUTAddrDisc) =
1713 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1714
1715 SDValue PACAddrDisc, PACConstDisc;
1716 std::tie(PACConstDisc, PACAddrDisc) =
1717 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1718
1719 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1720 AArch64::X16, Val, SDValue());
1721
1722 if (HasLoad) {
1723 SDValue Addend = N->getOperand(OffsetBase + 6);
1724 SDValue IncomingChain = N->getOperand(0);
1725 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc,
1726 PACKey, PACConstDisc, PACAddrDisc,
1727 Addend, IncomingChain, X16Copy.getValue(1)};
1728
1729 SDNode *AUTRELLOADPAC = CurDAG->getMachineNode(AArch64::AUTRELLOADPAC, DL,
1730 MVT::i64, MVT::Other, Ops);
1731 ReplaceNode(N, AUTRELLOADPAC);
1732 } else {
1733 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1734 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1735
1736 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1737 ReplaceNode(N, AUTPAC);
1738 }
1739}
1740
1741bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1742 LoadSDNode *LD = cast<LoadSDNode>(N);
1743 if (LD->isUnindexed())
1744 return false;
1745 EVT VT = LD->getMemoryVT();
1746 EVT DstVT = N->getValueType(0);
1747 ISD::MemIndexedMode AM = LD->getAddressingMode();
1748 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1749 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1750 int OffsetVal = (int)OffsetOp->getZExtValue();
1751
1752 // We're not doing validity checking here. That was done when checking
1753 // if we should mark the load as indexed or not. We're just selecting
1754 // the right instruction.
1755 unsigned Opcode = 0;
1756
1757 ISD::LoadExtType ExtType = LD->getExtensionType();
1758 bool InsertTo64 = false;
1759 if (VT == MVT::i64)
1760 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1761 else if (VT == MVT::i32) {
1762 if (ExtType == ISD::NON_EXTLOAD)
1763 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1764 else if (ExtType == ISD::SEXTLOAD)
1765 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1766 else {
1767 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1768 InsertTo64 = true;
1769 // The result of the load is only i32. It's the subreg_to_reg that makes
1770 // it into an i64.
1771 DstVT = MVT::i32;
1772 }
1773 } else if (VT == MVT::i16) {
1774 if (ExtType == ISD::SEXTLOAD) {
1775 if (DstVT == MVT::i64)
1776 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1777 else
1778 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1779 } else {
1780 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1781 InsertTo64 = DstVT == MVT::i64;
1782 // The result of the load is only i32. It's the subreg_to_reg that makes
1783 // it into an i64.
1784 DstVT = MVT::i32;
1785 }
1786 } else if (VT == MVT::i8) {
1787 if (ExtType == ISD::SEXTLOAD) {
1788 if (DstVT == MVT::i64)
1789 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1790 else
1791 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1792 } else {
1793 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1794 InsertTo64 = DstVT == MVT::i64;
1795 // The result of the load is only i32. It's the subreg_to_reg that makes
1796 // it into an i64.
1797 DstVT = MVT::i32;
1798 }
1799 } else if (VT == MVT::f16) {
1800 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1801 } else if (VT == MVT::bf16) {
1802 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1803 } else if (VT == MVT::f32) {
1804 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1805 } else if (VT == MVT::f64 ||
1806 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1807 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1808 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1809 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1810 } else if (VT.is64BitVector()) {
1811 if (IsPre || OffsetVal != 8)
1812 return false;
1813 switch (VT.getScalarSizeInBits()) {
1814 case 8:
1815 Opcode = AArch64::LD1Onev8b_POST;
1816 break;
1817 case 16:
1818 Opcode = AArch64::LD1Onev4h_POST;
1819 break;
1820 case 32:
1821 Opcode = AArch64::LD1Onev2s_POST;
1822 break;
1823 case 64:
1824 Opcode = AArch64::LD1Onev1d_POST;
1825 break;
1826 default:
1827 llvm_unreachable("Expected vector element to be a power of 2");
1828 }
1829 } else if (VT.is128BitVector()) {
1830 if (IsPre || OffsetVal != 16)
1831 return false;
1832 switch (VT.getScalarSizeInBits()) {
1833 case 8:
1834 Opcode = AArch64::LD1Onev16b_POST;
1835 break;
1836 case 16:
1837 Opcode = AArch64::LD1Onev8h_POST;
1838 break;
1839 case 32:
1840 Opcode = AArch64::LD1Onev4s_POST;
1841 break;
1842 case 64:
1843 Opcode = AArch64::LD1Onev2d_POST;
1844 break;
1845 default:
1846 llvm_unreachable("Expected vector element to be a power of 2");
1847 }
1848 } else
1849 return false;
1850 SDValue Chain = LD->getChain();
1851 SDValue Base = LD->getBasePtr();
1852 SDLoc dl(N);
1853 // LD1 encodes an immediate offset by using XZR as the offset register.
1854 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1855 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1856 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1857 SDValue Ops[] = { Base, Offset, Chain };
1858 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1859 MVT::Other, Ops);
1860
1861 // Transfer memoperands.
1862 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1863 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1864
1865 // Either way, we're replacing the node, so tell the caller that.
1866 SDValue LoadedVal = SDValue(Res, 1);
1867 if (InsertTo64) {
1868 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1869 LoadedVal = SDValue(CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, dl,
1870 MVT::i64, LoadedVal, SubReg),
1871 0);
1872 }
1873
1874 ReplaceUses(SDValue(N, 0), LoadedVal);
1875 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1876 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1877 CurDAG->RemoveDeadNode(N);
1878 return true;
1879}
1880
1881void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1882 unsigned SubRegIdx) {
1883 SDLoc dl(N);
1884 EVT VT = N->getValueType(0);
1885 SDValue Chain = N->getOperand(0);
1886
1887 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1888 Chain};
1889
1890 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1891
1892 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1893 SDValue SuperReg = SDValue(Ld, 0);
1894 for (unsigned i = 0; i < NumVecs; ++i)
1895 ReplaceUses(SDValue(N, i),
1896 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1897
1898 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1899
1900 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1901 // because it's too simple to have needed special treatment during lowering.
1902 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1903 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1904 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1905 }
1906
1907 CurDAG->RemoveDeadNode(N);
1908}
1909
1910void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1911 unsigned Opc, unsigned SubRegIdx) {
1912 SDLoc dl(N);
1913 EVT VT = N->getValueType(0);
1914 SDValue Chain = N->getOperand(0);
1915
1916 SDValue Ops[] = {N->getOperand(1), // Mem operand
1917 N->getOperand(2), // Incremental
1918 Chain};
1919
1920 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1921 MVT::Untyped, MVT::Other};
1922
1923 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1924
1925 // Update uses of write back register
1926 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1927
1928 // Update uses of vector list
1929 SDValue SuperReg = SDValue(Ld, 1);
1930 if (NumVecs == 1)
1931 ReplaceUses(SDValue(N, 0), SuperReg);
1932 else
1933 for (unsigned i = 0; i < NumVecs; ++i)
1934 ReplaceUses(SDValue(N, i),
1935 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1936
1937 // Update the chain
1938 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1939 CurDAG->RemoveDeadNode(N);
1940}
1941
1942/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1943/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1944/// new Base and an SDValue representing the new offset.
1945std::tuple<unsigned, SDValue, SDValue>
1946AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1947 unsigned Opc_ri,
1948 const SDValue &OldBase,
1949 const SDValue &OldOffset,
1950 unsigned Scale) {
1951 SDValue NewBase = OldBase;
1952 SDValue NewOffset = OldOffset;
1953 // Detect a possible Reg+Imm addressing mode.
1954 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1955 N, OldBase, NewBase, NewOffset);
1956
1957 // Detect a possible reg+reg addressing mode, but only if we haven't already
1958 // detected a Reg+Imm one.
1959 const bool IsRegReg =
1960 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1961
1962 // Select the instruction.
1963 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1964}
1965
1966enum class SelectTypeKind {
1967 Int1 = 0,
1968 Int = 1,
1969 FP = 2,
1971};
1972
1973/// This function selects an opcode from a list of opcodes, which is
1974/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1975/// element types, in this order.
1976template <SelectTypeKind Kind>
1977static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1978 // Only match scalable vector VTs
1979 if (!VT.isScalableVector())
1980 return 0;
1981
1982 EVT EltVT = VT.getVectorElementType();
1983 unsigned Key = VT.getVectorMinNumElements();
1984 switch (Kind) {
1986 break;
1988 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1989 EltVT != MVT::i64)
1990 return 0;
1991 break;
1993 if (EltVT != MVT::i1)
1994 return 0;
1995 break;
1996 case SelectTypeKind::FP:
1997 if (EltVT == MVT::bf16)
1998 Key = 16;
1999 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
2000 EltVT != MVT::f64)
2001 return 0;
2002 break;
2003 }
2004
2005 unsigned Offset;
2006 switch (Key) {
2007 case 16: // 8-bit or bf16
2008 Offset = 0;
2009 break;
2010 case 8: // 16-bit
2011 Offset = 1;
2012 break;
2013 case 4: // 32-bit
2014 Offset = 2;
2015 break;
2016 case 2: // 64-bit
2017 Offset = 3;
2018 break;
2019 default:
2020 return 0;
2021 }
2022
2023 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
2024}
2025
2026// This function is almost identical to SelectWhilePair, but has an
2027// extra check on the range of the immediate operand.
2028// TODO: Merge these two functions together at some point?
2029void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
2030 // Immediate can be either 0 or 1.
2031 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
2032 if (Imm->getZExtValue() > 1)
2033 return;
2034
2035 SDLoc DL(N);
2036 EVT VT = N->getValueType(0);
2037 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
2038 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2039 SDValue SuperReg = SDValue(WhilePair, 0);
2040
2041 for (unsigned I = 0; I < 2; ++I)
2042 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2043 AArch64::psub0 + I, DL, VT, SuperReg));
2044
2045 CurDAG->RemoveDeadNode(N);
2046}
2047
2048void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
2049 SDLoc DL(N);
2050 EVT VT = N->getValueType(0);
2051
2052 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
2053
2054 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2055 SDValue SuperReg = SDValue(WhilePair, 0);
2056
2057 for (unsigned I = 0; I < 2; ++I)
2058 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2059 AArch64::psub0 + I, DL, VT, SuperReg));
2060
2061 CurDAG->RemoveDeadNode(N);
2062}
2063
2064void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
2065 unsigned Opcode) {
2066 EVT VT = N->getValueType(0);
2067 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2068 SDValue Ops = createZTuple(Regs);
2069 SDLoc DL(N);
2070 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2071 SDValue SuperReg = SDValue(Intrinsic, 0);
2072 for (unsigned i = 0; i < NumVecs; ++i)
2073 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2074 AArch64::zsub0 + i, DL, VT, SuperReg));
2075
2076 CurDAG->RemoveDeadNode(N);
2077}
2078
2079void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
2080 unsigned Opcode) {
2081 SDLoc DL(N);
2082 EVT VT = N->getValueType(0);
2083 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
2084 Ops.push_back(/*Chain*/ N->getOperand(0));
2085
2086 SDNode *Instruction =
2087 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
2088 SDValue SuperReg = SDValue(Instruction, 0);
2089
2090 for (unsigned i = 0; i < NumVecs; ++i)
2091 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2092 AArch64::zsub0 + i, DL, VT, SuperReg));
2093
2094 // Copy chain
2095 unsigned ChainIdx = NumVecs;
2096 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
2097 CurDAG->RemoveDeadNode(N);
2098}
2099
2100void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
2101 unsigned NumVecs,
2102 bool IsZmMulti,
2103 unsigned Opcode,
2104 bool HasPred) {
2105 assert(Opcode != 0 && "Unexpected opcode");
2106
2107 SDLoc DL(N);
2108 EVT VT = N->getValueType(0);
2109 SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID
2111
2112 auto GetMultiVecOperand = [&]() {
2113 SmallVector<SDValue, 4> Regs(OpsIter, OpsIter + NumVecs);
2114 OpsIter += NumVecs;
2115 return createZMulTuple(Regs);
2116 };
2117
2118 if (HasPred)
2119 Ops.push_back(*OpsIter++);
2120
2121 Ops.push_back(GetMultiVecOperand());
2122 if (IsZmMulti)
2123 Ops.push_back(GetMultiVecOperand());
2124 else
2125 Ops.push_back(*OpsIter++);
2126
2127 // Append any remaining operands.
2128 Ops.append(OpsIter, N->op_end());
2129 SDNode *Intrinsic;
2130 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2131 SDValue SuperReg = SDValue(Intrinsic, 0);
2132 for (unsigned i = 0; i < NumVecs; ++i)
2133 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2134 AArch64::zsub0 + i, DL, VT, SuperReg));
2135
2136 CurDAG->RemoveDeadNode(N);
2137}
2138
2139void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2140 unsigned Scale, unsigned Opc_ri,
2141 unsigned Opc_rr, bool IsIntr) {
2142 assert(Scale < 5 && "Invalid scaling value.");
2143 SDLoc DL(N);
2144 EVT VT = N->getValueType(0);
2145 SDValue Chain = N->getOperand(0);
2146
2147 // Optimize addressing mode.
2149 unsigned Opc;
2150 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2151 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2152 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2153
2154 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2155 Base, // Memory operand
2156 Offset, Chain};
2157
2158 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2159
2160 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2161 SDValue SuperReg = SDValue(Load, 0);
2162 for (unsigned i = 0; i < NumVecs; ++i)
2163 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2164 AArch64::zsub0 + i, DL, VT, SuperReg));
2165
2166 // Copy chain
2167 unsigned ChainIdx = NumVecs;
2168 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2169 CurDAG->RemoveDeadNode(N);
2170}
2171
2172void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2173 unsigned NumVecs,
2174 unsigned Scale,
2175 unsigned Opc_ri,
2176 unsigned Opc_rr) {
2177 assert(Scale < 4 && "Invalid scaling value.");
2178 SDLoc DL(N);
2179 EVT VT = N->getValueType(0);
2180 SDValue Chain = N->getOperand(0);
2181
2182 SDValue PNg = N->getOperand(2);
2183 SDValue Base = N->getOperand(3);
2184 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2185 unsigned Opc;
2186 std::tie(Opc, Base, Offset) =
2187 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2188
2189 SDValue Ops[] = {PNg, // Predicate-as-counter
2190 Base, // Memory operand
2191 Offset, Chain};
2192
2193 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2194
2195 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2196 SDValue SuperReg = SDValue(Load, 0);
2197 for (unsigned i = 0; i < NumVecs; ++i)
2198 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2199 AArch64::zsub0 + i, DL, VT, SuperReg));
2200
2201 // Copy chain
2202 unsigned ChainIdx = NumVecs;
2203 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2204 CurDAG->RemoveDeadNode(N);
2205}
2206
2207void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2208 unsigned Opcode) {
2209 if (N->getValueType(0) != MVT::nxv4f32)
2210 return;
2211 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2212}
2213
2214void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2215 unsigned NumOutVecs,
2216 unsigned Opc,
2217 uint32_t MaxImm) {
2218 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2219 if (Imm->getZExtValue() > MaxImm)
2220 return;
2221
2222 SDValue ZtValue;
2223 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2224 return;
2225
2226 SDValue Chain = Node->getOperand(0);
2227 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
2228 SDLoc DL(Node);
2229 EVT VT = Node->getValueType(0);
2230
2231 SDNode *Instruction =
2232 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2233 SDValue SuperReg = SDValue(Instruction, 0);
2234
2235 for (unsigned I = 0; I < NumOutVecs; ++I)
2236 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2237 AArch64::zsub0 + I, DL, VT, SuperReg));
2238
2239 // Copy chain
2240 unsigned ChainIdx = NumOutVecs;
2241 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2242 CurDAG->RemoveDeadNode(Node);
2243}
2244
2245void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2246 unsigned NumOutVecs,
2247 unsigned Opc) {
2248 SDValue ZtValue;
2249 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2250 return;
2251
2252 SDValue Chain = Node->getOperand(0);
2253 SDValue Ops[] = {ZtValue,
2254 createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
2255 Chain};
2256
2257 SDLoc DL(Node);
2258 EVT VT = Node->getValueType(0);
2259
2260 SDNode *Instruction =
2261 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2262 SDValue SuperReg = SDValue(Instruction, 0);
2263
2264 for (unsigned I = 0; I < NumOutVecs; ++I)
2265 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2266 AArch64::zsub0 + I, DL, VT, SuperReg));
2267
2268 // Copy chain
2269 unsigned ChainIdx = NumOutVecs;
2270 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2271 CurDAG->RemoveDeadNode(Node);
2272}
2273
2274void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2275 unsigned Op) {
2276 SDLoc DL(N);
2277 EVT VT = N->getValueType(0);
2278
2279 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2280 SDValue Zd = createZMulTuple(Regs);
2281 SDValue Zn = N->getOperand(1 + NumVecs);
2282 SDValue Zm = N->getOperand(2 + NumVecs);
2283
2284 SDValue Ops[] = {Zd, Zn, Zm};
2285
2286 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2287 SDValue SuperReg = SDValue(Intrinsic, 0);
2288 for (unsigned i = 0; i < NumVecs; ++i)
2289 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2290 AArch64::zsub0 + i, DL, VT, SuperReg));
2291
2292 CurDAG->RemoveDeadNode(N);
2293}
2294
2295bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2296 switch (BaseReg) {
2297 default:
2298 return false;
2299 case AArch64::ZA:
2300 case AArch64::ZAB0:
2301 if (TileNum == 0)
2302 break;
2303 return false;
2304 case AArch64::ZAH0:
2305 if (TileNum <= 1)
2306 break;
2307 return false;
2308 case AArch64::ZAS0:
2309 if (TileNum <= 3)
2310 break;
2311 return false;
2312 case AArch64::ZAD0:
2313 if (TileNum <= 7)
2314 break;
2315 return false;
2316 }
2317
2318 BaseReg += TileNum;
2319 return true;
2320}
2321
2322template <unsigned MaxIdx, unsigned Scale>
2323void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2324 unsigned BaseReg, unsigned Op) {
2325 unsigned TileNum = 0;
2326 if (BaseReg != AArch64::ZA)
2327 TileNum = N->getConstantOperandVal(2);
2328
2329 if (!SelectSMETile(BaseReg, TileNum))
2330 return;
2331
2332 SDValue SliceBase, Base, Offset;
2333 if (BaseReg == AArch64::ZA)
2334 SliceBase = N->getOperand(2);
2335 else
2336 SliceBase = N->getOperand(3);
2337
2338 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2339 return;
2340
2341 SDLoc DL(N);
2342 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2343 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2344 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2345
2346 EVT VT = N->getValueType(0);
2347 for (unsigned I = 0; I < NumVecs; ++I)
2348 ReplaceUses(SDValue(N, I),
2349 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2350 SDValue(Mov, 0)));
2351 // Copy chain
2352 unsigned ChainIdx = NumVecs;
2353 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2354 CurDAG->RemoveDeadNode(N);
2355}
2356
2357void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2358 unsigned Op, unsigned MaxIdx,
2359 unsigned Scale, unsigned BaseReg) {
2360 // Slice can be in different positions
2361 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2362 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2363 SDValue SliceBase = N->getOperand(2);
2364 if (BaseReg != AArch64::ZA)
2365 SliceBase = N->getOperand(3);
2366
2368 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2369 return;
2370 // The correct Za tile number is computed in Machine Instruction
2371 // See EmitZAInstr
2372 // DAG cannot select Za tile as an output register with ZReg
2373 SDLoc DL(N);
2375 if (BaseReg != AArch64::ZA )
2376 Ops.push_back(N->getOperand(2));
2377 Ops.push_back(Base);
2378 Ops.push_back(Offset);
2379 Ops.push_back(N->getOperand(0)); //Chain
2380 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2381
2382 EVT VT = N->getValueType(0);
2383 for (unsigned I = 0; I < NumVecs; ++I)
2384 ReplaceUses(SDValue(N, I),
2385 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2386 SDValue(Mov, 0)));
2387
2388 // Copy chain
2389 unsigned ChainIdx = NumVecs;
2390 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2391 CurDAG->RemoveDeadNode(N);
2392}
2393
2394void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2395 unsigned NumOutVecs,
2396 bool IsTupleInput,
2397 unsigned Opc) {
2398 SDLoc DL(N);
2399 EVT VT = N->getValueType(0);
2400 unsigned NumInVecs = N->getNumOperands() - 1;
2401
2403 if (IsTupleInput) {
2404 assert((NumInVecs == 2 || NumInVecs == 4) &&
2405 "Don't know how to handle multi-register input!");
2406 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2407 Ops.push_back(createZMulTuple(Regs));
2408 } else {
2409 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2410 for (unsigned I = 0; I < NumInVecs; I++)
2411 Ops.push_back(N->getOperand(1 + I));
2412 }
2413
2414 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2415 SDValue SuperReg = SDValue(Res, 0);
2416
2417 for (unsigned I = 0; I < NumOutVecs; I++)
2418 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2419 AArch64::zsub0 + I, DL, VT, SuperReg));
2420 CurDAG->RemoveDeadNode(N);
2421}
2422
2423void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2424 unsigned Opc) {
2425 SDLoc dl(N);
2426 EVT VT = N->getOperand(2)->getValueType(0);
2427
2428 // Form a REG_SEQUENCE to force register allocation.
2429 bool Is128Bit = VT.getSizeInBits() == 128;
2430 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2431 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2432
2433 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2434 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2435
2436 // Transfer memoperands.
2437 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2438 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2439
2440 ReplaceNode(N, St);
2441}
2442
2443void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2444 unsigned Scale, unsigned Opc_rr,
2445 unsigned Opc_ri) {
2446 SDLoc dl(N);
2447
2448 // Form a REG_SEQUENCE to force register allocation.
2449 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2450 SDValue RegSeq = createZTuple(Regs);
2451
2452 // Optimize addressing mode.
2453 unsigned Opc;
2455 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2456 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2457 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2458
2459 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2460 Base, // address
2461 Offset, // offset
2462 N->getOperand(0)}; // chain
2463 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2464
2465 ReplaceNode(N, St);
2466}
2467
2468bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2469 SDValue &OffImm) {
2470 SDLoc dl(N);
2471 const DataLayout &DL = CurDAG->getDataLayout();
2472 const TargetLowering *TLI = getTargetLowering();
2473
2474 // Try to match it for the frame address
2475 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2476 int FI = FINode->getIndex();
2477 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2478 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2479 return true;
2480 }
2481
2482 return false;
2483}
2484
2485void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2486 unsigned Opc) {
2487 SDLoc dl(N);
2488 EVT VT = N->getOperand(2)->getValueType(0);
2489 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2490 MVT::Other}; // Type for the Chain
2491
2492 // Form a REG_SEQUENCE to force register allocation.
2493 bool Is128Bit = VT.getSizeInBits() == 128;
2494 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2495 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2496
2497 SDValue Ops[] = {RegSeq,
2498 N->getOperand(NumVecs + 1), // base register
2499 N->getOperand(NumVecs + 2), // Incremental
2500 N->getOperand(0)}; // Chain
2501 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2502
2503 ReplaceNode(N, St);
2504}
2505
2506namespace {
2507/// WidenVector - Given a value in the V64 register class, produce the
2508/// equivalent value in the V128 register class.
2509class WidenVector {
2510 SelectionDAG &DAG;
2511
2512public:
2513 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2514
2515 SDValue operator()(SDValue V64Reg) {
2516 EVT VT = V64Reg.getValueType();
2517 unsigned NarrowSize = VT.getVectorNumElements();
2518 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2519 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2520 SDLoc DL(V64Reg);
2521
2522 SDValue Undef =
2523 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2524 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2525 }
2526};
2527} // namespace
2528
2529/// NarrowVector - Given a value in the V128 register class, produce the
2530/// equivalent value in the V64 register class.
2532 EVT VT = V128Reg.getValueType();
2533 unsigned WideSize = VT.getVectorNumElements();
2534 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2535 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2536
2537 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2538 V128Reg);
2539}
2540
2541void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2542 unsigned Opc) {
2543 SDLoc dl(N);
2544 EVT VT = N->getValueType(0);
2545 bool Narrow = VT.getSizeInBits() == 64;
2546
2547 // Form a REG_SEQUENCE to force register allocation.
2548 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2549
2550 if (Narrow)
2551 transform(Regs, Regs.begin(),
2552 WidenVector(*CurDAG));
2553
2554 SDValue RegSeq = createQTuple(Regs);
2555
2556 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2557
2558 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2559
2560 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2561 N->getOperand(NumVecs + 3), N->getOperand(0)};
2562 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2563 SDValue SuperReg = SDValue(Ld, 0);
2564
2565 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2566 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2567 AArch64::qsub2, AArch64::qsub3 };
2568 for (unsigned i = 0; i < NumVecs; ++i) {
2569 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2570 if (Narrow)
2571 NV = NarrowVector(NV, *CurDAG);
2572 ReplaceUses(SDValue(N, i), NV);
2573 }
2574
2575 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2576 CurDAG->RemoveDeadNode(N);
2577}
2578
2579void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2580 unsigned Opc) {
2581 SDLoc dl(N);
2582 EVT VT = N->getValueType(0);
2583 bool Narrow = VT.getSizeInBits() == 64;
2584
2585 // Form a REG_SEQUENCE to force register allocation.
2586 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2587
2588 if (Narrow)
2589 transform(Regs, Regs.begin(),
2590 WidenVector(*CurDAG));
2591
2592 SDValue RegSeq = createQTuple(Regs);
2593
2594 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2595 RegSeq->getValueType(0), MVT::Other};
2596
2597 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2598
2599 SDValue Ops[] = {RegSeq,
2600 CurDAG->getTargetConstant(LaneNo, dl,
2601 MVT::i64), // Lane Number
2602 N->getOperand(NumVecs + 2), // Base register
2603 N->getOperand(NumVecs + 3), // Incremental
2604 N->getOperand(0)};
2605 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2606
2607 // Update uses of the write back register
2608 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2609
2610 // Update uses of the vector list
2611 SDValue SuperReg = SDValue(Ld, 1);
2612 if (NumVecs == 1) {
2613 ReplaceUses(SDValue(N, 0),
2614 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2615 } else {
2616 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2617 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2618 AArch64::qsub2, AArch64::qsub3 };
2619 for (unsigned i = 0; i < NumVecs; ++i) {
2620 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2621 SuperReg);
2622 if (Narrow)
2623 NV = NarrowVector(NV, *CurDAG);
2624 ReplaceUses(SDValue(N, i), NV);
2625 }
2626 }
2627
2628 // Update the Chain
2629 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2630 CurDAG->RemoveDeadNode(N);
2631}
2632
2633void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2634 unsigned Opc) {
2635 SDLoc dl(N);
2636 EVT VT = N->getOperand(2)->getValueType(0);
2637 bool Narrow = VT.getSizeInBits() == 64;
2638
2639 // Form a REG_SEQUENCE to force register allocation.
2640 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2641
2642 if (Narrow)
2643 transform(Regs, Regs.begin(),
2644 WidenVector(*CurDAG));
2645
2646 SDValue RegSeq = createQTuple(Regs);
2647
2648 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2649
2650 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2651 N->getOperand(NumVecs + 3), N->getOperand(0)};
2652 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2653
2654 // Transfer memoperands.
2655 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2656 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2657
2658 ReplaceNode(N, St);
2659}
2660
2661void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2662 unsigned Opc) {
2663 SDLoc dl(N);
2664 EVT VT = N->getOperand(2)->getValueType(0);
2665 bool Narrow = VT.getSizeInBits() == 64;
2666
2667 // Form a REG_SEQUENCE to force register allocation.
2668 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2669
2670 if (Narrow)
2671 transform(Regs, Regs.begin(),
2672 WidenVector(*CurDAG));
2673
2674 SDValue RegSeq = createQTuple(Regs);
2675
2676 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2677 MVT::Other};
2678
2679 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2680
2681 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2682 N->getOperand(NumVecs + 2), // Base Register
2683 N->getOperand(NumVecs + 3), // Incremental
2684 N->getOperand(0)};
2685 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2686
2687 // Transfer memoperands.
2688 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2689 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2690
2691 ReplaceNode(N, St);
2692}
2693
2695 unsigned &Opc, SDValue &Opd0,
2696 unsigned &LSB, unsigned &MSB,
2697 unsigned NumberOfIgnoredLowBits,
2698 bool BiggerPattern) {
2699 assert(N->getOpcode() == ISD::AND &&
2700 "N must be a AND operation to call this function");
2701
2702 EVT VT = N->getValueType(0);
2703
2704 // Here we can test the type of VT and return false when the type does not
2705 // match, but since it is done prior to that call in the current context
2706 // we turned that into an assert to avoid redundant code.
2707 assert((VT == MVT::i32 || VT == MVT::i64) &&
2708 "Type checking must have been done before calling this function");
2709
2710 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2711 // changed the AND node to a 32-bit mask operation. We'll have to
2712 // undo that as part of the transform here if we want to catch all
2713 // the opportunities.
2714 // Currently the NumberOfIgnoredLowBits argument helps to recover
2715 // from these situations when matching bigger pattern (bitfield insert).
2716
2717 // For unsigned extracts, check for a shift right and mask
2718 uint64_t AndImm = 0;
2719 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2720 return false;
2721
2722 const SDNode *Op0 = N->getOperand(0).getNode();
2723
2724 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2725 // simplified. Try to undo that
2726 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2727
2728 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2729 if (AndImm & (AndImm + 1))
2730 return false;
2731
2732 bool ClampMSB = false;
2733 uint64_t SrlImm = 0;
2734 // Handle the SRL + ANY_EXTEND case.
2735 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2736 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2737 // Extend the incoming operand of the SRL to 64-bit.
2738 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2739 // Make sure to clamp the MSB so that we preserve the semantics of the
2740 // original operations.
2741 ClampMSB = true;
2742 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2744 SrlImm)) {
2745 // If the shift result was truncated, we can still combine them.
2746 Opd0 = Op0->getOperand(0).getOperand(0);
2747
2748 // Use the type of SRL node.
2749 VT = Opd0->getValueType(0);
2750 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2751 Opd0 = Op0->getOperand(0);
2752 ClampMSB = (VT == MVT::i32);
2753 } else if (BiggerPattern) {
2754 // Let's pretend a 0 shift right has been performed.
2755 // The resulting code will be at least as good as the original one
2756 // plus it may expose more opportunities for bitfield insert pattern.
2757 // FIXME: Currently we limit this to the bigger pattern, because
2758 // some optimizations expect AND and not UBFM.
2759 Opd0 = N->getOperand(0);
2760 } else
2761 return false;
2762
2763 // Bail out on large immediates. This happens when no proper
2764 // combining/constant folding was performed.
2765 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2766 LLVM_DEBUG(
2767 (dbgs() << N
2768 << ": Found large shift immediate, this should not happen\n"));
2769 return false;
2770 }
2771
2772 LSB = SrlImm;
2773 MSB = SrlImm +
2774 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2775 : llvm::countr_one<uint64_t>(AndImm)) -
2776 1;
2777 if (ClampMSB)
2778 // Since we're moving the extend before the right shift operation, we need
2779 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2780 // the zeros which would get shifted in with the original right shift
2781 // operation.
2782 MSB = MSB > 31 ? 31 : MSB;
2783
2784 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2785 return true;
2786}
2787
2789 SDValue &Opd0, unsigned &Immr,
2790 unsigned &Imms) {
2791 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2792
2793 EVT VT = N->getValueType(0);
2794 unsigned BitWidth = VT.getSizeInBits();
2795 assert((VT == MVT::i32 || VT == MVT::i64) &&
2796 "Type checking must have been done before calling this function");
2797
2798 SDValue Op = N->getOperand(0);
2799 if (Op->getOpcode() == ISD::TRUNCATE) {
2800 Op = Op->getOperand(0);
2801 VT = Op->getValueType(0);
2802 BitWidth = VT.getSizeInBits();
2803 }
2804
2805 uint64_t ShiftImm;
2806 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2807 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2808 return false;
2809
2810 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2811 if (ShiftImm + Width > BitWidth)
2812 return false;
2813
2814 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2815 Opd0 = Op.getOperand(0);
2816 Immr = ShiftImm;
2817 Imms = ShiftImm + Width - 1;
2818 return true;
2819}
2820
2822 SDValue &Opd0, unsigned &LSB,
2823 unsigned &MSB) {
2824 // We are looking for the following pattern which basically extracts several
2825 // continuous bits from the source value and places it from the LSB of the
2826 // destination value, all other bits of the destination value or set to zero:
2827 //
2828 // Value2 = AND Value, MaskImm
2829 // SRL Value2, ShiftImm
2830 //
2831 // with MaskImm >> ShiftImm to search for the bit width.
2832 //
2833 // This gets selected into a single UBFM:
2834 //
2835 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2836 //
2837
2838 if (N->getOpcode() != ISD::SRL)
2839 return false;
2840
2841 uint64_t AndMask = 0;
2842 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2843 return false;
2844
2845 Opd0 = N->getOperand(0).getOperand(0);
2846
2847 uint64_t SrlImm = 0;
2848 if (!isIntImmediate(N->getOperand(1), SrlImm))
2849 return false;
2850
2851 // Check whether we really have several bits extract here.
2852 if (!isMask_64(AndMask >> SrlImm))
2853 return false;
2854
2855 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2856 LSB = SrlImm;
2857 MSB = llvm::Log2_64(AndMask);
2858 return true;
2859}
2860
2861static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2862 unsigned &Immr, unsigned &Imms,
2863 bool BiggerPattern) {
2864 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2865 "N must be a SHR/SRA operation to call this function");
2866
2867 EVT VT = N->getValueType(0);
2868
2869 // Here we can test the type of VT and return false when the type does not
2870 // match, but since it is done prior to that call in the current context
2871 // we turned that into an assert to avoid redundant code.
2872 assert((VT == MVT::i32 || VT == MVT::i64) &&
2873 "Type checking must have been done before calling this function");
2874
2875 // Check for AND + SRL doing several bits extract.
2876 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2877 return true;
2878
2879 // We're looking for a shift of a shift.
2880 uint64_t ShlImm = 0;
2881 uint64_t TruncBits = 0;
2882 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2883 Opd0 = N->getOperand(0).getOperand(0);
2884 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2885 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2886 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2887 // be considered as setting high 32 bits as zero. Our strategy here is to
2888 // always generate 64bit UBFM. This consistency will help the CSE pass
2889 // later find more redundancy.
2890 Opd0 = N->getOperand(0).getOperand(0);
2891 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2892 VT = Opd0.getValueType();
2893 assert(VT == MVT::i64 && "the promoted type should be i64");
2894 } else if (BiggerPattern) {
2895 // Let's pretend a 0 shift left has been performed.
2896 // FIXME: Currently we limit this to the bigger pattern case,
2897 // because some optimizations expect AND and not UBFM
2898 Opd0 = N->getOperand(0);
2899 } else
2900 return false;
2901
2902 // Missing combines/constant folding may have left us with strange
2903 // constants.
2904 if (ShlImm >= VT.getSizeInBits()) {
2905 LLVM_DEBUG(
2906 (dbgs() << N
2907 << ": Found large shift immediate, this should not happen\n"));
2908 return false;
2909 }
2910
2911 uint64_t SrlImm = 0;
2912 if (!isIntImmediate(N->getOperand(1), SrlImm))
2913 return false;
2914
2915 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2916 "bad amount in shift node!");
2917 int immr = SrlImm - ShlImm;
2918 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2919 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2920 // SRA requires a signed extraction
2921 if (VT == MVT::i32)
2922 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2923 else
2924 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2925 return true;
2926}
2927
2928bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2929 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2930
2931 EVT VT = N->getValueType(0);
2932 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2933 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2934 return false;
2935
2936 uint64_t ShiftImm;
2937 SDValue Op = N->getOperand(0);
2938 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2939 return false;
2940
2941 SDLoc dl(N);
2942 // Extend the incoming operand of the shift to 64-bits.
2943 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2944 unsigned Immr = ShiftImm;
2945 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2946 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2947 CurDAG->getTargetConstant(Imms, dl, VT)};
2948 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2949 return true;
2950}
2951
2952static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2953 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2954 unsigned NumberOfIgnoredLowBits = 0,
2955 bool BiggerPattern = false) {
2956 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2957 return false;
2958
2959 switch (N->getOpcode()) {
2960 default:
2961 if (!N->isMachineOpcode())
2962 return false;
2963 break;
2964 case ISD::AND:
2965 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2966 NumberOfIgnoredLowBits, BiggerPattern);
2967 case ISD::SRL:
2968 case ISD::SRA:
2969 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2970
2972 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2973 }
2974
2975 unsigned NOpc = N->getMachineOpcode();
2976 switch (NOpc) {
2977 default:
2978 return false;
2979 case AArch64::SBFMWri:
2980 case AArch64::UBFMWri:
2981 case AArch64::SBFMXri:
2982 case AArch64::UBFMXri:
2983 Opc = NOpc;
2984 Opd0 = N->getOperand(0);
2985 Immr = N->getConstantOperandVal(1);
2986 Imms = N->getConstantOperandVal(2);
2987 return true;
2988 }
2989 // Unreachable
2990 return false;
2991}
2992
2993bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2994 unsigned Opc, Immr, Imms;
2995 SDValue Opd0;
2996 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2997 return false;
2998
2999 EVT VT = N->getValueType(0);
3000 SDLoc dl(N);
3001
3002 // If the bit extract operation is 64bit but the original type is 32bit, we
3003 // need to add one EXTRACT_SUBREG.
3004 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
3005 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
3006 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
3007
3008 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
3009 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
3010 MVT::i32, SDValue(BFM, 0));
3011 ReplaceNode(N, Inner.getNode());
3012 return true;
3013 }
3014
3015 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
3016 CurDAG->getTargetConstant(Imms, dl, VT)};
3017 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3018 return true;
3019}
3020
3021/// Does DstMask form a complementary pair with the mask provided by
3022/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
3023/// this asks whether DstMask zeroes precisely those bits that will be set by
3024/// the other half.
3025static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
3026 unsigned NumberOfIgnoredHighBits, EVT VT) {
3027 assert((VT == MVT::i32 || VT == MVT::i64) &&
3028 "i32 or i64 mask type expected!");
3029 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
3030
3031 // Enable implicitTrunc as we're intentionally ignoring high bits.
3032 APInt SignificantDstMask =
3033 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
3034 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
3035
3036 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
3037 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
3038}
3039
3040// Look for bits that will be useful for later uses.
3041// A bit is consider useless as soon as it is dropped and never used
3042// before it as been dropped.
3043// E.g., looking for useful bit of x
3044// 1. y = x & 0x7
3045// 2. z = y >> 2
3046// After #1, x useful bits are 0x7, then the useful bits of x, live through
3047// y.
3048// After #2, the useful bits of x are 0x4.
3049// However, if x is used on an unpredictable instruction, then all its bits
3050// are useful.
3051// E.g.
3052// 1. y = x & 0x7
3053// 2. z = y >> 2
3054// 3. str x, [@x]
3055static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
3056
3058 unsigned Depth) {
3059 uint64_t Imm =
3060 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
3061 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
3062 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
3063 getUsefulBits(Op, UsefulBits, Depth + 1);
3064}
3065
3067 uint64_t Imm, uint64_t MSB,
3068 unsigned Depth) {
3069 // inherit the bitwidth value
3070 APInt OpUsefulBits(UsefulBits);
3071 OpUsefulBits = 1;
3072
3073 if (MSB >= Imm) {
3074 OpUsefulBits <<= MSB - Imm + 1;
3075 --OpUsefulBits;
3076 // The interesting part will be in the lower part of the result
3077 getUsefulBits(Op, OpUsefulBits, Depth + 1);
3078 // The interesting part was starting at Imm in the argument
3079 OpUsefulBits <<= Imm;
3080 } else {
3081 OpUsefulBits <<= MSB + 1;
3082 --OpUsefulBits;
3083 // The interesting part will be shifted in the result
3084 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
3085 getUsefulBits(Op, OpUsefulBits, Depth + 1);
3086 // The interesting part was at zero in the argument
3087 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
3088 }
3089
3090 UsefulBits &= OpUsefulBits;
3091}
3092
3093static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
3094 unsigned Depth) {
3095 uint64_t Imm =
3096 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
3097 uint64_t MSB =
3098 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3099
3100 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
3101}
3102
3104 unsigned Depth) {
3105 uint64_t ShiftTypeAndValue =
3106 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3107 APInt Mask(UsefulBits);
3108 Mask.clearAllBits();
3109 Mask.flipAllBits();
3110
3111 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
3112 // Shift Left
3113 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3114 Mask <<= ShiftAmt;
3115 getUsefulBits(Op, Mask, Depth + 1);
3116 Mask.lshrInPlace(ShiftAmt);
3117 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
3118 // Shift Right
3119 // We do not handle AArch64_AM::ASR, because the sign will change the
3120 // number of useful bits
3121 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3122 Mask.lshrInPlace(ShiftAmt);
3123 getUsefulBits(Op, Mask, Depth + 1);
3124 Mask <<= ShiftAmt;
3125 } else
3126 return;
3127
3128 UsefulBits &= Mask;
3129}
3130
3131static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3132 unsigned Depth) {
3133 uint64_t Imm =
3134 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3135 uint64_t MSB =
3136 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3137
3138 APInt OpUsefulBits(UsefulBits);
3139 OpUsefulBits = 1;
3140
3141 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3142 ResultUsefulBits.flipAllBits();
3143 APInt Mask(UsefulBits.getBitWidth(), 0);
3144
3145 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3146
3147 if (MSB >= Imm) {
3148 // The instruction is a BFXIL.
3149 uint64_t Width = MSB - Imm + 1;
3150 uint64_t LSB = Imm;
3151
3152 OpUsefulBits <<= Width;
3153 --OpUsefulBits;
3154
3155 if (Op.getOperand(1) == Orig) {
3156 // Copy the low bits from the result to bits starting from LSB.
3157 Mask = ResultUsefulBits & OpUsefulBits;
3158 Mask <<= LSB;
3159 }
3160
3161 if (Op.getOperand(0) == Orig)
3162 // Bits starting from LSB in the input contribute to the result.
3163 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3164 } else {
3165 // The instruction is a BFI.
3166 uint64_t Width = MSB + 1;
3167 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3168
3169 OpUsefulBits <<= Width;
3170 --OpUsefulBits;
3171 OpUsefulBits <<= LSB;
3172
3173 if (Op.getOperand(1) == Orig) {
3174 // Copy the bits from the result to the zero bits.
3175 Mask = ResultUsefulBits & OpUsefulBits;
3176 Mask.lshrInPlace(LSB);
3177 }
3178
3179 if (Op.getOperand(0) == Orig)
3180 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3181 }
3182
3183 UsefulBits &= Mask;
3184}
3185
3186static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3187 SDValue Orig, unsigned Depth) {
3188
3189 // Users of this node should have already been instruction selected
3190 // FIXME: Can we turn that into an assert?
3191 if (!UserNode->isMachineOpcode())
3192 return;
3193
3194 switch (UserNode->getMachineOpcode()) {
3195 default:
3196 return;
3197 case AArch64::ANDSWri:
3198 case AArch64::ANDSXri:
3199 case AArch64::ANDWri:
3200 case AArch64::ANDXri:
3201 // We increment Depth only when we call the getUsefulBits
3202 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3203 Depth);
3204 case AArch64::UBFMWri:
3205 case AArch64::UBFMXri:
3206 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3207
3208 case AArch64::ORRWrs:
3209 case AArch64::ORRXrs:
3210 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3211 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3212 Depth);
3213 return;
3214 case AArch64::BFMWri:
3215 case AArch64::BFMXri:
3216 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3217
3218 case AArch64::STRBBui:
3219 case AArch64::STURBBi:
3220 if (UserNode->getOperand(0) != Orig)
3221 return;
3222 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3223 return;
3224
3225 case AArch64::STRHHui:
3226 case AArch64::STURHHi:
3227 if (UserNode->getOperand(0) != Orig)
3228 return;
3229 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3230 return;
3231 }
3232}
3233
3234static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3236 return;
3237 // Initialize UsefulBits
3238 if (!Depth) {
3239 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3240 // At the beginning, assume every produced bits is useful
3241 UsefulBits = APInt(Bitwidth, 0);
3242 UsefulBits.flipAllBits();
3243 }
3244 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3245
3246 for (SDNode *Node : Op.getNode()->users()) {
3247 // A use cannot produce useful bits
3248 APInt UsefulBitsForUse = APInt(UsefulBits);
3249 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3250 UsersUsefulBits |= UsefulBitsForUse;
3251 }
3252 // UsefulBits contains the produced bits that are meaningful for the
3253 // current definition, thus a user cannot make a bit meaningful at
3254 // this point
3255 UsefulBits &= UsersUsefulBits;
3256}
3257
3258/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3259/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3260/// 0, return Op unchanged.
3261static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3262 if (ShlAmount == 0)
3263 return Op;
3264
3265 EVT VT = Op.getValueType();
3266 SDLoc dl(Op);
3267 unsigned BitWidth = VT.getSizeInBits();
3268 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3269
3270 SDNode *ShiftNode;
3271 if (ShlAmount > 0) {
3272 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3273 ShiftNode = CurDAG->getMachineNode(
3274 UBFMOpc, dl, VT, Op,
3275 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3276 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3277 } else {
3278 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3279 assert(ShlAmount < 0 && "expected right shift");
3280 int ShrAmount = -ShlAmount;
3281 ShiftNode = CurDAG->getMachineNode(
3282 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3283 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3284 }
3285
3286 return SDValue(ShiftNode, 0);
3287}
3288
3289// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3290static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3291 bool BiggerPattern,
3292 const uint64_t NonZeroBits,
3293 SDValue &Src, int &DstLSB,
3294 int &Width);
3295
3296// For bit-field-positioning pattern "shl VAL, N)".
3297static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3298 bool BiggerPattern,
3299 const uint64_t NonZeroBits,
3300 SDValue &Src, int &DstLSB,
3301 int &Width);
3302
3303/// Does this tree qualify as an attempt to move a bitfield into position,
3304/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3306 bool BiggerPattern, SDValue &Src,
3307 int &DstLSB, int &Width) {
3308 EVT VT = Op.getValueType();
3309 unsigned BitWidth = VT.getSizeInBits();
3310 (void)BitWidth;
3311 assert(BitWidth == 32 || BitWidth == 64);
3312
3313 KnownBits Known = CurDAG->computeKnownBits(Op);
3314
3315 // Non-zero in the sense that they're not provably zero, which is the key
3316 // point if we want to use this value
3317 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3318 if (!isShiftedMask_64(NonZeroBits))
3319 return false;
3320
3321 switch (Op.getOpcode()) {
3322 default:
3323 break;
3324 case ISD::AND:
3325 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3326 NonZeroBits, Src, DstLSB, Width);
3327 case ISD::SHL:
3328 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3329 NonZeroBits, Src, DstLSB, Width);
3330 }
3331
3332 return false;
3333}
3334
3336 bool BiggerPattern,
3337 const uint64_t NonZeroBits,
3338 SDValue &Src, int &DstLSB,
3339 int &Width) {
3340 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3341
3342 EVT VT = Op.getValueType();
3343 assert((VT == MVT::i32 || VT == MVT::i64) &&
3344 "Caller guarantees VT is one of i32 or i64");
3345 (void)VT;
3346
3347 uint64_t AndImm;
3348 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3349 return false;
3350
3351 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3352 // 1) (AndImm & (1 << POS) == 0)
3353 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3354 //
3355 // 1) and 2) don't agree so something must be wrong (e.g., in
3356 // 'SelectionDAG::computeKnownBits')
3357 assert((~AndImm & NonZeroBits) == 0 &&
3358 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3359
3360 SDValue AndOp0 = Op.getOperand(0);
3361
3362 uint64_t ShlImm;
3363 SDValue ShlOp0;
3364 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3365 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3366 ShlOp0 = AndOp0.getOperand(0);
3367 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3369 ShlImm)) {
3370 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3371
3372 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3373 SDValue ShlVal = AndOp0.getOperand(0);
3374
3375 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3376 // expect VT to be MVT::i32.
3377 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3378
3379 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3380 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3381 } else
3382 return false;
3383
3384 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3385 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3386 // AndOp0+AND.
3387 if (!BiggerPattern && !AndOp0.hasOneUse())
3388 return false;
3389
3390 DstLSB = llvm::countr_zero(NonZeroBits);
3391 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3392
3393 // Bail out on large Width. This happens when no proper combining / constant
3394 // folding was performed.
3395 if (Width >= (int)VT.getSizeInBits()) {
3396 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3397 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3398 // "val".
3399 // If VT is i32, what Width >= 32 means:
3400 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3401 // demands at least 'Width' bits (after dag-combiner). This together with
3402 // `any_extend` Op (undefined higher bits) indicates missed combination
3403 // when lowering the 'and' IR instruction to an machine IR instruction.
3404 LLVM_DEBUG(
3405 dbgs()
3406 << "Found large Width in bit-field-positioning -- this indicates no "
3407 "proper combining / constant folding was performed\n");
3408 return false;
3409 }
3410
3411 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3412 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3413 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3414 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3415 // which case it is not profitable to insert an extra shift.
3416 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3417 return false;
3418
3419 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3420 return true;
3421}
3422
3423// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3424// UBFIZ.
3426 SDValue &Src, int &DstLSB,
3427 int &Width) {
3428 // Caller should have verified that N is a left shift with constant shift
3429 // amount; asserts that.
3430 assert(Op.getOpcode() == ISD::SHL &&
3431 "Op.getNode() should be a SHL node to call this function");
3432 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3433 "Op.getNode() should shift ShlImm to call this function");
3434
3435 uint64_t AndImm = 0;
3436 SDValue Op0 = Op.getOperand(0);
3437 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3438 return false;
3439
3440 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3441 if (isMask_64(ShiftedAndImm)) {
3442 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3443 // should end with Mask, and could be prefixed with random bits if those
3444 // bits are shifted out.
3445 //
3446 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3447 // the AND result corresponding to those bits are shifted out, so it's fine
3448 // to not extract them.
3449 Width = llvm::countr_one(ShiftedAndImm);
3450 DstLSB = ShlImm;
3451 Src = Op0.getOperand(0);
3452 return true;
3453 }
3454 return false;
3455}
3456
3458 bool BiggerPattern,
3459 const uint64_t NonZeroBits,
3460 SDValue &Src, int &DstLSB,
3461 int &Width) {
3462 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3463
3464 EVT VT = Op.getValueType();
3465 assert((VT == MVT::i32 || VT == MVT::i64) &&
3466 "Caller guarantees that type is i32 or i64");
3467 (void)VT;
3468
3469 uint64_t ShlImm;
3470 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3471 return false;
3472
3473 if (!BiggerPattern && !Op.hasOneUse())
3474 return false;
3475
3476 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3477 return true;
3478
3479 DstLSB = llvm::countr_zero(NonZeroBits);
3480 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3481
3482 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3483 return false;
3484
3485 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3486 return true;
3487}
3488
3489static bool isShiftedMask(uint64_t Mask, EVT VT) {
3490 assert(VT == MVT::i32 || VT == MVT::i64);
3491 if (VT == MVT::i32)
3492 return isShiftedMask_32(Mask);
3493 return isShiftedMask_64(Mask);
3494}
3495
3496// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3497// inserted only sets known zero bits.
3499 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3500
3501 EVT VT = N->getValueType(0);
3502 if (VT != MVT::i32 && VT != MVT::i64)
3503 return false;
3504
3505 unsigned BitWidth = VT.getSizeInBits();
3506
3507 uint64_t OrImm;
3508 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3509 return false;
3510
3511 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3512 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3513 // performance neutral.
3515 return false;
3516
3517 uint64_t MaskImm;
3518 SDValue And = N->getOperand(0);
3519 // Must be a single use AND with an immediate operand.
3520 if (!And.hasOneUse() ||
3521 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3522 return false;
3523
3524 // Compute the Known Zero for the AND as this allows us to catch more general
3525 // cases than just looking for AND with imm.
3526 KnownBits Known = CurDAG->computeKnownBits(And);
3527
3528 // Non-zero in the sense that they're not provably zero, which is the key
3529 // point if we want to use this value.
3530 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3531
3532 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3533 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3534 return false;
3535
3536 // The bits being inserted must only set those bits that are known to be zero.
3537 if ((OrImm & NotKnownZero) != 0) {
3538 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3539 // currently handle this case.
3540 return false;
3541 }
3542
3543 // BFI/BFXIL dst, src, #lsb, #width.
3544 int LSB = llvm::countr_one(NotKnownZero);
3545 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3546
3547 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3548 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3549 unsigned ImmS = Width - 1;
3550
3551 // If we're creating a BFI instruction avoid cases where we need more
3552 // instructions to materialize the BFI constant as compared to the original
3553 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3554 // should be no worse in this case.
3555 bool IsBFI = LSB != 0;
3556 uint64_t BFIImm = OrImm >> LSB;
3557 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3558 // We have a BFI instruction and we know the constant can't be materialized
3559 // with a ORR-immediate with the zero register.
3560 unsigned OrChunks = 0, BFIChunks = 0;
3561 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3562 if (((OrImm >> Shift) & 0xFFFF) != 0)
3563 ++OrChunks;
3564 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3565 ++BFIChunks;
3566 }
3567 if (BFIChunks > OrChunks)
3568 return false;
3569 }
3570
3571 // Materialize the constant to be inserted.
3572 SDLoc DL(N);
3573 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3574 SDNode *MOVI = CurDAG->getMachineNode(
3575 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3576
3577 // Create the BFI/BFXIL instruction.
3578 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3579 CurDAG->getTargetConstant(ImmR, DL, VT),
3580 CurDAG->getTargetConstant(ImmS, DL, VT)};
3581 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3582 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3583 return true;
3584}
3585
3587 SDValue &ShiftedOperand,
3588 uint64_t &EncodedShiftImm) {
3589 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3590 if (!Dst.hasOneUse())
3591 return false;
3592
3593 EVT VT = Dst.getValueType();
3594 assert((VT == MVT::i32 || VT == MVT::i64) &&
3595 "Caller should guarantee that VT is one of i32 or i64");
3596 const unsigned SizeInBits = VT.getSizeInBits();
3597
3598 SDLoc DL(Dst.getNode());
3599 uint64_t AndImm, ShlImm;
3600 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3601 isShiftedMask_64(AndImm)) {
3602 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3603 SDValue DstOp0 = Dst.getOperand(0);
3604 if (!DstOp0.hasOneUse())
3605 return false;
3606
3607 // An example to illustrate the transformation
3608 // From:
3609 // lsr x8, x1, #1
3610 // and x8, x8, #0x3f80
3611 // bfxil x8, x1, #0, #7
3612 // To:
3613 // and x8, x23, #0x7f
3614 // ubfx x9, x23, #8, #7
3615 // orr x23, x8, x9, lsl #7
3616 //
3617 // The number of instructions remains the same, but ORR is faster than BFXIL
3618 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3619 // the dependency chain is improved after the transformation.
3620 uint64_t SrlImm;
3621 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3622 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3623 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3624 unsigned MaskWidth =
3625 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3626 unsigned UBFMOpc =
3627 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3628 SDNode *UBFMNode = CurDAG->getMachineNode(
3629 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3630 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3631 VT),
3632 CurDAG->getTargetConstant(
3633 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3634 ShiftedOperand = SDValue(UBFMNode, 0);
3635 EncodedShiftImm = AArch64_AM::getShifterImm(
3636 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3637 return true;
3638 }
3639 }
3640 return false;
3641 }
3642
3643 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3644 ShiftedOperand = Dst.getOperand(0);
3645 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3646 return true;
3647 }
3648
3649 uint64_t SrlImm;
3650 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3651 ShiftedOperand = Dst.getOperand(0);
3652 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3653 return true;
3654 }
3655 return false;
3656}
3657
3658// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3659// the operands and select it to AArch64::ORR with shifted registers if
3660// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3661static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3662 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3663 const bool BiggerPattern) {
3664 EVT VT = N->getValueType(0);
3665 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3666 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3667 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3668 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3669 assert((VT == MVT::i32 || VT == MVT::i64) &&
3670 "Expect result type to be i32 or i64 since N is combinable to BFM");
3671 SDLoc DL(N);
3672
3673 // Bail out if BFM simplifies away one node in BFM Dst.
3674 if (OrOpd1 != Dst)
3675 return false;
3676
3677 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3678 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3679 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3680 if (BiggerPattern) {
3681 uint64_t SrcAndImm;
3682 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3683 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3684 // OrOpd0 = AND Src, #Mask
3685 // So BFM simplifies away one AND node from Src and doesn't simplify away
3686 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3687 // one node (from Rd), ORR is better since it has higher throughput and
3688 // smaller latency than BFM on many AArch64 processors (and for the rest
3689 // ORR is at least as good as BFM).
3690 SDValue ShiftedOperand;
3691 uint64_t EncodedShiftImm;
3692 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3693 EncodedShiftImm)) {
3694 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3695 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3696 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3697 return true;
3698 }
3699 }
3700 return false;
3701 }
3702
3703 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3704
3705 uint64_t ShlImm;
3706 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3707 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3708 SDValue Ops[] = {
3709 Dst, Src,
3710 CurDAG->getTargetConstant(
3712 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3713 return true;
3714 }
3715
3716 // Select the following pattern to left-shifted operand rather than BFI.
3717 // %val1 = op ..
3718 // %val2 = shl %val1, #imm
3719 // %res = or %val1, %val2
3720 //
3721 // If N is selected to be BFI, we know that
3722 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3723 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3724 //
3725 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3726 if (OrOpd0.getOperand(0) == OrOpd1) {
3727 SDValue Ops[] = {
3728 OrOpd1, OrOpd1,
3729 CurDAG->getTargetConstant(
3731 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3732 return true;
3733 }
3734 }
3735
3736 uint64_t SrlImm;
3737 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3738 // Select the following pattern to right-shifted operand rather than BFXIL.
3739 // %val1 = op ..
3740 // %val2 = lshr %val1, #imm
3741 // %res = or %val1, %val2
3742 //
3743 // If N is selected to be BFXIL, we know that
3744 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3745 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3746 //
3747 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3748 if (OrOpd0.getOperand(0) == OrOpd1) {
3749 SDValue Ops[] = {
3750 OrOpd1, OrOpd1,
3751 CurDAG->getTargetConstant(
3753 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3754 return true;
3755 }
3756 }
3757
3758 return false;
3759}
3760
3761static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3762 SelectionDAG *CurDAG) {
3763 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3764
3765 EVT VT = N->getValueType(0);
3766 if (VT != MVT::i32 && VT != MVT::i64)
3767 return false;
3768
3769 unsigned BitWidth = VT.getSizeInBits();
3770
3771 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3772 // have the expected shape. Try to undo that.
3773
3774 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3775 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3776
3777 // Given a OR operation, check if we have the following pattern
3778 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3779 // isBitfieldExtractOp)
3780 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3781 // countTrailingZeros(mask2) == imm2 - imm + 1
3782 // f = d | c
3783 // if yes, replace the OR instruction with:
3784 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3785
3786 // OR is commutative, check all combinations of operand order and values of
3787 // BiggerPattern, i.e.
3788 // Opd0, Opd1, BiggerPattern=false
3789 // Opd1, Opd0, BiggerPattern=false
3790 // Opd0, Opd1, BiggerPattern=true
3791 // Opd1, Opd0, BiggerPattern=true
3792 // Several of these combinations may match, so check with BiggerPattern=false
3793 // first since that will produce better results by matching more instructions
3794 // and/or inserting fewer extra instructions.
3795 for (int I = 0; I < 4; ++I) {
3796
3797 SDValue Dst, Src;
3798 unsigned ImmR, ImmS;
3799 bool BiggerPattern = I / 2;
3800 SDValue OrOpd0Val = N->getOperand(I % 2);
3801 SDNode *OrOpd0 = OrOpd0Val.getNode();
3802 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3803 SDNode *OrOpd1 = OrOpd1Val.getNode();
3804
3805 unsigned BFXOpc;
3806 int DstLSB, Width;
3807 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3808 NumberOfIgnoredLowBits, BiggerPattern)) {
3809 // Check that the returned opcode is compatible with the pattern,
3810 // i.e., same type and zero extended (U and not S)
3811 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3812 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3813 continue;
3814
3815 // Compute the width of the bitfield insertion
3816 DstLSB = 0;
3817 Width = ImmS - ImmR + 1;
3818 // FIXME: This constraint is to catch bitfield insertion we may
3819 // want to widen the pattern if we want to grab general bitfield
3820 // move case
3821 if (Width <= 0)
3822 continue;
3823
3824 // If the mask on the insertee is correct, we have a BFXIL operation. We
3825 // can share the ImmR and ImmS values from the already-computed UBFM.
3826 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3827 BiggerPattern,
3828 Src, DstLSB, Width)) {
3829 ImmR = (BitWidth - DstLSB) % BitWidth;
3830 ImmS = Width - 1;
3831 } else
3832 continue;
3833
3834 // Check the second part of the pattern
3835 EVT VT = OrOpd1Val.getValueType();
3836 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3837
3838 // Compute the Known Zero for the candidate of the first operand.
3839 // This allows to catch more general case than just looking for
3840 // AND with imm. Indeed, simplify-demanded-bits may have removed
3841 // the AND instruction because it proves it was useless.
3842 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3843
3844 // Check if there is enough room for the second operand to appear
3845 // in the first one
3846 APInt BitsToBeInserted =
3847 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3848
3849 if ((BitsToBeInserted & ~Known.Zero) != 0)
3850 continue;
3851
3852 // Set the first operand
3853 uint64_t Imm;
3854 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3855 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3856 // In that case, we can eliminate the AND
3857 Dst = OrOpd1->getOperand(0);
3858 else
3859 // Maybe the AND has been removed by simplify-demanded-bits
3860 // or is useful because it discards more bits
3861 Dst = OrOpd1Val;
3862
3863 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3864 // with shifted operand is more efficient.
3865 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3866 BiggerPattern))
3867 return true;
3868
3869 // both parts match
3870 SDLoc DL(N);
3871 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3872 CurDAG->getTargetConstant(ImmS, DL, VT)};
3873 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3874 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3875 return true;
3876 }
3877
3878 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3879 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3880 // mask (e.g., 0x000ffff0).
3881 uint64_t Mask0Imm, Mask1Imm;
3882 SDValue And0 = N->getOperand(0);
3883 SDValue And1 = N->getOperand(1);
3884 if (And0.hasOneUse() && And1.hasOneUse() &&
3885 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3886 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3887 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3888 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3889
3890 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3891 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3892 // bits to be inserted.
3893 if (isShiftedMask(Mask0Imm, VT)) {
3894 std::swap(And0, And1);
3895 std::swap(Mask0Imm, Mask1Imm);
3896 }
3897
3898 SDValue Src = And1->getOperand(0);
3899 SDValue Dst = And0->getOperand(0);
3900 unsigned LSB = llvm::countr_zero(Mask1Imm);
3901 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3902
3903 // The BFXIL inserts the low-order bits from a source register, so right
3904 // shift the needed bits into place.
3905 SDLoc DL(N);
3906 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3907 uint64_t LsrImm = LSB;
3908 if (Src->hasOneUse() &&
3909 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3910 (LsrImm + LSB) < BitWidth) {
3911 Src = Src->getOperand(0);
3912 LsrImm += LSB;
3913 }
3914
3915 SDNode *LSR = CurDAG->getMachineNode(
3916 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3917 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3918
3919 // BFXIL is an alias of BFM, so translate to BFM operands.
3920 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3921 unsigned ImmS = Width - 1;
3922
3923 // Create the BFXIL instruction.
3924 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3925 CurDAG->getTargetConstant(ImmR, DL, VT),
3926 CurDAG->getTargetConstant(ImmS, DL, VT)};
3927 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3928 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3929 return true;
3930 }
3931
3932 return false;
3933}
3934
3935bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3936 if (N->getOpcode() != ISD::OR)
3937 return false;
3938
3939 APInt NUsefulBits;
3940 getUsefulBits(SDValue(N, 0), NUsefulBits);
3941
3942 // If all bits are not useful, just return UNDEF.
3943 if (!NUsefulBits) {
3944 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3945 return true;
3946 }
3947
3948 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3949 return true;
3950
3951 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3952}
3953
3954/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3955/// equivalent of a left shift by a constant amount followed by an and masking
3956/// out a contiguous set of bits.
3957bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3958 if (N->getOpcode() != ISD::AND)
3959 return false;
3960
3961 EVT VT = N->getValueType(0);
3962 if (VT != MVT::i32 && VT != MVT::i64)
3963 return false;
3964
3965 SDValue Op0;
3966 int DstLSB, Width;
3967 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3968 Op0, DstLSB, Width))
3969 return false;
3970
3971 // ImmR is the rotate right amount.
3972 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3973 // ImmS is the most significant bit of the source to be moved.
3974 unsigned ImmS = Width - 1;
3975
3976 SDLoc DL(N);
3977 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3978 CurDAG->getTargetConstant(ImmS, DL, VT)};
3979 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3980 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3981 return true;
3982}
3983
3984/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3985/// variable shift/rotate instructions.
3986bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3987 EVT VT = N->getValueType(0);
3988
3989 unsigned Opc;
3990 switch (N->getOpcode()) {
3991 case ISD::ROTR:
3992 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3993 break;
3994 case ISD::SHL:
3995 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3996 break;
3997 case ISD::SRL:
3998 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3999 break;
4000 case ISD::SRA:
4001 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
4002 break;
4003 default:
4004 return false;
4005 }
4006
4007 uint64_t Size;
4008 uint64_t Bits;
4009 if (VT == MVT::i32) {
4010 Bits = 5;
4011 Size = 32;
4012 } else if (VT == MVT::i64) {
4013 Bits = 6;
4014 Size = 64;
4015 } else
4016 return false;
4017
4018 SDValue ShiftAmt = N->getOperand(1);
4019 SDLoc DL(N);
4020 SDValue NewShiftAmt;
4021
4022 // Skip over an extend of the shift amount.
4023 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
4024 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
4025 ShiftAmt = ShiftAmt->getOperand(0);
4026
4027 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
4028 SDValue Add0 = ShiftAmt->getOperand(0);
4029 SDValue Add1 = ShiftAmt->getOperand(1);
4030 uint64_t Add0Imm;
4031 uint64_t Add1Imm;
4032 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
4033 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
4034 // to avoid the ADD/SUB.
4035 NewShiftAmt = Add0;
4036 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
4037 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
4038 (Add0Imm % Size == 0)) {
4039 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
4040 // to generate a NEG instead of a SUB from a constant.
4041 unsigned NegOpc;
4042 unsigned ZeroReg;
4043 EVT SubVT = ShiftAmt->getValueType(0);
4044 if (SubVT == MVT::i32) {
4045 NegOpc = AArch64::SUBWrr;
4046 ZeroReg = AArch64::WZR;
4047 } else {
4048 assert(SubVT == MVT::i64);
4049 NegOpc = AArch64::SUBXrr;
4050 ZeroReg = AArch64::XZR;
4051 }
4052 SDValue Zero =
4053 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
4054 MachineSDNode *Neg =
4055 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
4056 NewShiftAmt = SDValue(Neg, 0);
4057 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
4058 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
4059 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
4060 // to generate a NOT instead of a SUB from a constant.
4061 unsigned NotOpc;
4062 unsigned ZeroReg;
4063 EVT SubVT = ShiftAmt->getValueType(0);
4064 if (SubVT == MVT::i32) {
4065 NotOpc = AArch64::ORNWrr;
4066 ZeroReg = AArch64::WZR;
4067 } else {
4068 assert(SubVT == MVT::i64);
4069 NotOpc = AArch64::ORNXrr;
4070 ZeroReg = AArch64::XZR;
4071 }
4072 SDValue Zero =
4073 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
4074 MachineSDNode *Not =
4075 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
4076 NewShiftAmt = SDValue(Not, 0);
4077 } else
4078 return false;
4079 } else {
4080 // If the shift amount is masked with an AND, check that the mask covers the
4081 // bits that are implicitly ANDed off by the above opcodes and if so, skip
4082 // the AND.
4083 uint64_t MaskImm;
4084 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
4085 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
4086 return false;
4087
4088 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
4089 return false;
4090
4091 NewShiftAmt = ShiftAmt->getOperand(0);
4092 }
4093
4094 // Narrow/widen the shift amount to match the size of the shift operation.
4095 if (VT == MVT::i32)
4096 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
4097 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
4098 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
4099 MachineSDNode *Ext = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT,
4100 NewShiftAmt, SubReg);
4101 NewShiftAmt = SDValue(Ext, 0);
4102 }
4103
4104 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
4105 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4106 return true;
4107}
4108
4110 SDValue &FixedPos,
4111 unsigned RegWidth,
4112 bool isReciprocal) {
4113 APFloat FVal(0.0);
4115 FVal = CN->getValueAPF();
4116 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
4117 // Some otherwise illegal constants are allowed in this case.
4118 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
4119 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
4120 return false;
4121
4122 ConstantPoolSDNode *CN =
4123 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
4124 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
4125 } else
4126 return false;
4127
4128 if (unsigned FBits =
4129 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4130 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4131 return true;
4132 }
4133
4134 return false;
4135}
4136
4137bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4138 unsigned RegWidth) {
4139 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4140 /*isReciprocal*/ false);
4141}
4142
4143bool AArch64DAGToDAGISel::SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos,
4144 unsigned RegWidth) {
4145 if ((N.getOpcode() == AArch64ISD::NVCAST || N.getOpcode() == ISD::BITCAST) &&
4146 N.getValueType().getScalarSizeInBits() ==
4147 N.getOperand(0).getValueType().getScalarSizeInBits())
4148 N = N.getOperand(0);
4149
4150 auto ImmToFloat = [RegWidth](APInt Imm) {
4151 switch (RegWidth) {
4152 case 16:
4153 return APFloat(APFloat::IEEEhalf(), Imm);
4154 case 32:
4155 return APFloat(APFloat::IEEEsingle(), Imm);
4156 case 64:
4157 return APFloat(APFloat::IEEEdouble(), Imm);
4158 default:
4159 llvm_unreachable("Unexpected RegWidth!");
4160 };
4161 };
4162
4163 APFloat FVal(0.0);
4164 switch (N->getOpcode()) {
4165 case AArch64ISD::MOVIshift:
4166 FVal = ImmToFloat(APInt(RegWidth, N.getConstantOperandVal(0)
4167 << N.getConstantOperandVal(1)));
4168 break;
4169 case AArch64ISD::FMOV:
4170 FVal = ImmToFloat(DecodeFMOVImm(N.getConstantOperandVal(0), RegWidth));
4171 break;
4172 case AArch64ISD::DUP:
4173 if (isa<ConstantSDNode>(N.getOperand(0)))
4174 FVal = ImmToFloat(N.getConstantOperandAPInt(0).trunc(RegWidth));
4175 else
4176 return false;
4177 break;
4178 default:
4179 return false;
4180 }
4181
4182 if (unsigned FBits = CheckFixedPointOperandConstant(FVal, RegWidth,
4183 /*isReciprocal*/ false)) {
4184 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4185 return true;
4186 }
4187
4188 return false;
4189}
4190
4191bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4192 SDValue &FixedPos,
4193 unsigned RegWidth) {
4194 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4195 /*isReciprocal*/ true);
4196}
4197
4198// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4199// of the string and obtains the integer values from them and combines these
4200// into a single value to be used in the MRS/MSR instruction.
4203 RegString.split(Fields, ':');
4204
4205 if (Fields.size() == 1)
4206 return -1;
4207
4208 assert(Fields.size() == 5
4209 && "Invalid number of fields in read register string");
4210
4212 bool AllIntFields = true;
4213
4214 for (StringRef Field : Fields) {
4215 unsigned IntField;
4216 AllIntFields &= !Field.getAsInteger(10, IntField);
4217 Ops.push_back(IntField);
4218 }
4219
4220 assert(AllIntFields &&
4221 "Unexpected non-integer value in special register string.");
4222 (void)AllIntFields;
4223
4224 // Need to combine the integer fields of the string into a single value
4225 // based on the bit encoding of MRS/MSR instruction.
4226 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4227 (Ops[3] << 3) | (Ops[4]);
4228}
4229
4230// Lower the read_register intrinsic to an MRS instruction node if the special
4231// register string argument is either of the form detailed in the ALCE (the
4232// form described in getIntOperandsFromRegisterString) or is a named register
4233// known by the MRS SysReg mapper.
4234bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4235 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4236 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4237 SDLoc DL(N);
4238
4239 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4240
4241 unsigned Opcode64Bit = AArch64::MRS;
4242 int Imm = getIntOperandFromRegisterString(RegString->getString());
4243 if (Imm == -1) {
4244 // No match, Use the sysreg mapper to map the remaining possible strings to
4245 // the value for the register to be used for the instruction operand.
4246 const auto *TheReg =
4247 AArch64SysReg::lookupSysRegByName(RegString->getString());
4248 if (TheReg && TheReg->Readable &&
4249 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4250 Imm = TheReg->Encoding;
4251 else
4252 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4253
4254 if (Imm == -1) {
4255 // Still no match, see if this is "pc" or give up.
4256 if (!ReadIs128Bit && RegString->getString() == "pc") {
4257 Opcode64Bit = AArch64::ADR;
4258 Imm = 0;
4259 } else {
4260 return false;
4261 }
4262 }
4263 }
4264
4265 SDValue InChain = N->getOperand(0);
4266 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4267 if (!ReadIs128Bit) {
4268 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4269 {SysRegImm, InChain});
4270 } else {
4271 SDNode *MRRS = CurDAG->getMachineNode(
4272 AArch64::MRRS, DL,
4273 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4274 {SysRegImm, InChain});
4275
4276 // Sysregs are not endian. The even register always contains the low half
4277 // of the register.
4278 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4279 SDValue(MRRS, 0));
4280 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4281 SDValue(MRRS, 0));
4282 SDValue OutChain = SDValue(MRRS, 1);
4283
4284 ReplaceUses(SDValue(N, 0), Lo);
4285 ReplaceUses(SDValue(N, 1), Hi);
4286 ReplaceUses(SDValue(N, 2), OutChain);
4287 };
4288 return true;
4289}
4290
4291// Lower the write_register intrinsic to an MSR instruction node if the special
4292// register string argument is either of the form detailed in the ALCE (the
4293// form described in getIntOperandsFromRegisterString) or is a named register
4294// known by the MSR SysReg mapper.
4295bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4296 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4297 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4298 SDLoc DL(N);
4299
4300 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4301
4302 if (!WriteIs128Bit) {
4303 // Check if the register was one of those allowed as the pstatefield value
4304 // in the MSR (immediate) instruction. To accept the values allowed in the
4305 // pstatefield for the MSR (immediate) instruction, we also require that an
4306 // immediate value has been provided as an argument, we know that this is
4307 // the case as it has been ensured by semantic checking.
4308 auto trySelectPState = [&](auto PMapper, unsigned State) {
4309 if (PMapper) {
4310 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4311 "Expected a constant integer expression.");
4312 unsigned Reg = PMapper->Encoding;
4313 uint64_t Immed = N->getConstantOperandVal(2);
4314 CurDAG->SelectNodeTo(
4315 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4316 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4317 return true;
4318 }
4319 return false;
4320 };
4321
4322 if (trySelectPState(
4323 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4324 AArch64::MSRpstateImm4))
4325 return true;
4326 if (trySelectPState(
4327 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4328 AArch64::MSRpstateImm1))
4329 return true;
4330 }
4331
4332 int Imm = getIntOperandFromRegisterString(RegString->getString());
4333 if (Imm == -1) {
4334 // Use the sysreg mapper to attempt to map the remaining possible strings
4335 // to the value for the register to be used for the MSR (register)
4336 // instruction operand.
4337 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4338 if (TheReg && TheReg->Writeable &&
4339 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4340 Imm = TheReg->Encoding;
4341 else
4342 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4343
4344 if (Imm == -1)
4345 return false;
4346 }
4347
4348 SDValue InChain = N->getOperand(0);
4349 if (!WriteIs128Bit) {
4350 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4351 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4352 N->getOperand(2), InChain);
4353 } else {
4354 // No endian swap. The lower half always goes into the even subreg, and the
4355 // higher half always into the odd supreg.
4356 SDNode *Pair = CurDAG->getMachineNode(
4357 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4358 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4359 MVT::i32),
4360 N->getOperand(2),
4361 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4362 N->getOperand(3),
4363 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4364
4365 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4366 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4367 SDValue(Pair, 0), InChain);
4368 }
4369
4370 return true;
4371}
4372
4373/// We've got special pseudo-instructions for these
4374bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4375 unsigned Opcode;
4376 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4377
4378 // Leave IR for LSE if subtarget supports it.
4379 if (Subtarget->hasLSE()) return false;
4380
4381 if (MemTy == MVT::i8)
4382 Opcode = AArch64::CMP_SWAP_8;
4383 else if (MemTy == MVT::i16)
4384 Opcode = AArch64::CMP_SWAP_16;
4385 else if (MemTy == MVT::i32)
4386 Opcode = AArch64::CMP_SWAP_32;
4387 else if (MemTy == MVT::i64)
4388 Opcode = AArch64::CMP_SWAP_64;
4389 else
4390 llvm_unreachable("Unknown AtomicCmpSwap type");
4391
4392 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4393 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4394 N->getOperand(0)};
4395 SDNode *CmpSwap = CurDAG->getMachineNode(
4396 Opcode, SDLoc(N),
4397 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4398
4399 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4400 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4401
4402 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4403 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4404 CurDAG->RemoveDeadNode(N);
4405
4406 return true;
4407}
4408
4409bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4410 SDValue &Shift, bool Negate) {
4411 if (!isa<ConstantSDNode>(N))
4412 return false;
4413
4414 APInt Val =
4415 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4416
4417 return SelectSVEAddSubImm(SDLoc(N), Val, VT, Imm, Shift, Negate);
4418}
4419
4420bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDLoc DL, APInt Val, MVT VT,
4421 SDValue &Imm, SDValue &Shift,
4422 bool Negate) {
4423 if (Negate)
4424 Val = -Val;
4425
4426 switch (VT.SimpleTy) {
4427 case MVT::i8:
4428 // All immediates are supported.
4429 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4430 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4431 return true;
4432 case MVT::i16:
4433 case MVT::i32:
4434 case MVT::i64:
4435 // Support 8bit unsigned immediates.
4436 if ((Val & ~0xff) == 0) {
4437 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4438 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4439 return true;
4440 }
4441 // Support 16bit unsigned immediates that are a multiple of 256.
4442 if ((Val & ~0xff00) == 0) {
4443 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4444 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4445 return true;
4446 }
4447 break;
4448 default:
4449 break;
4450 }
4451
4452 return false;
4453}
4454
4455bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4456 SDValue &Imm, SDValue &Shift,
4457 bool Negate) {
4458 if (!isa<ConstantSDNode>(N))
4459 return false;
4460
4461 SDLoc DL(N);
4462 int64_t Val = cast<ConstantSDNode>(N)
4463 ->getAPIntValue()
4465 .getSExtValue();
4466
4467 if (Negate)
4468 Val = -Val;
4469
4470 // Signed saturating instructions treat their immediate operand as unsigned,
4471 // whereas the related intrinsics define their operands to be signed. This
4472 // means we can only use the immediate form when the operand is non-negative.
4473 if (Val < 0)
4474 return false;
4475
4476 switch (VT.SimpleTy) {
4477 case MVT::i8:
4478 // All positive immediates are supported.
4479 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4480 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4481 return true;
4482 case MVT::i16:
4483 case MVT::i32:
4484 case MVT::i64:
4485 // Support 8bit positive immediates.
4486 if (Val <= 255) {
4487 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4488 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4489 return true;
4490 }
4491 // Support 16bit positive immediates that are a multiple of 256.
4492 if (Val <= 65280 && Val % 256 == 0) {
4493 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4494 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4495 return true;
4496 }
4497 break;
4498 default:
4499 break;
4500 }
4501
4502 return false;
4503}
4504
4505bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4506 SDValue &Shift) {
4507 if (!isa<ConstantSDNode>(N))
4508 return false;
4509
4510 SDLoc DL(N);
4511 int64_t Val = cast<ConstantSDNode>(N)
4512 ->getAPIntValue()
4513 .trunc(VT.getFixedSizeInBits())
4514 .getSExtValue();
4515 int32_t ImmVal, ShiftVal;
4516 if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
4517 ShiftVal))
4518 return false;
4519
4520 Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
4521 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4522 return true;
4523}
4524
4525bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4526 if (auto CNode = dyn_cast<ConstantSDNode>(N))
4527 return SelectSVESignedArithImm(SDLoc(N), CNode->getAPIntValue(), Imm);
4528 return false;
4529}
4530
4531bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDLoc DL, APInt Val,
4532 SDValue &Imm) {
4533 int64_t ImmVal = Val.getSExtValue();
4534 if (ImmVal >= -128 && ImmVal < 128) {
4535 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4536 return true;
4537 }
4538 return false;
4539}
4540
4541bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4542 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4543 uint64_t ImmVal = CNode->getZExtValue();
4544
4545 switch (VT.SimpleTy) {
4546 case MVT::i8:
4547 ImmVal &= 0xFF;
4548 break;
4549 case MVT::i16:
4550 ImmVal &= 0xFFFF;
4551 break;
4552 case MVT::i32:
4553 ImmVal &= 0xFFFFFFFF;
4554 break;
4555 case MVT::i64:
4556 break;
4557 default:
4558 llvm_unreachable("Unexpected type");
4559 }
4560
4561 if (ImmVal < 256) {
4562 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4563 return true;
4564 }
4565 }
4566 return false;
4567}
4568
4569bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4570 bool Invert) {
4571 uint64_t ImmVal;
4572 if (auto CI = dyn_cast<ConstantSDNode>(N))
4573 ImmVal = CI->getZExtValue();
4574 else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
4575 ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4576 else
4577 return false;
4578
4579 if (Invert)
4580 ImmVal = ~ImmVal;
4581
4582 uint64_t encoding;
4583 if (!AArch64_AM::isSVELogicalImm(VT.getScalarSizeInBits(), ImmVal, encoding))
4584 return false;
4585
4586 Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
4587 return true;
4588}
4589
4590// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4591// Rather than attempt to normalise everything we can sometimes saturate the
4592// shift amount during selection. This function also allows for consistent
4593// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4594// required by the instructions.
4595bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4596 uint64_t High, bool AllowSaturation,
4597 SDValue &Imm) {
4598 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4599 uint64_t ImmVal = CN->getZExtValue();
4600
4601 // Reject shift amounts that are too small.
4602 if (ImmVal < Low)
4603 return false;
4604
4605 // Reject or saturate shift amounts that are too big.
4606 if (ImmVal > High) {
4607 if (!AllowSaturation)
4608 return false;
4609 ImmVal = High;
4610 }
4611
4612 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4613 return true;
4614 }
4615
4616 return false;
4617}
4618
4619bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4620 // tagp(FrameIndex, IRGstack, tag_offset):
4621 // since the offset between FrameIndex and IRGstack is a compile-time
4622 // constant, this can be lowered to a single ADDG instruction.
4623 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4624 return false;
4625 }
4626
4627 SDValue IRG_SP = N->getOperand(2);
4628 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4629 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4630 return false;
4631 }
4632
4633 const TargetLowering *TLI = getTargetLowering();
4634 SDLoc DL(N);
4635 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4636 SDValue FiOp = CurDAG->getTargetFrameIndex(
4637 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4638 int TagOffset = N->getConstantOperandVal(3);
4639
4640 SDNode *Out = CurDAG->getMachineNode(
4641 AArch64::TAGPstack, DL, MVT::i64,
4642 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4643 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4644 ReplaceNode(N, Out);
4645 return true;
4646}
4647
4648void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4649 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4650 "llvm.aarch64.tagp third argument must be an immediate");
4651 if (trySelectStackSlotTagP(N))
4652 return;
4653 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4654 // compile-time constant, not just for stack allocations.
4655
4656 // General case for unrelated pointers in Op1 and Op2.
4657 SDLoc DL(N);
4658 int TagOffset = N->getConstantOperandVal(3);
4659 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4660 {N->getOperand(1), N->getOperand(2)});
4661 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4662 {SDValue(N1, 0), N->getOperand(2)});
4663 SDNode *N3 = CurDAG->getMachineNode(
4664 AArch64::ADDG, DL, MVT::i64,
4665 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4666 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4667 ReplaceNode(N, N3);
4668}
4669
4670bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4671 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4672
4673 // Bail when not a "cast" like insert_subvector.
4674 if (N->getConstantOperandVal(2) != 0)
4675 return false;
4676 if (!N->getOperand(0).isUndef())
4677 return false;
4678
4679 // Bail when normal isel should do the job.
4680 EVT VT = N->getValueType(0);
4681 EVT InVT = N->getOperand(1).getValueType();
4682 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4683 return false;
4684 if (InVT.getSizeInBits() <= 128)
4685 return false;
4686
4687 // NOTE: We can only get here when doing fixed length SVE code generation.
4688 // We do manual selection because the types involved are not linked to real
4689 // registers (despite being legal) and must be coerced into SVE registers.
4690
4692 "Expected to insert into a packed scalable vector!");
4693
4694 SDLoc DL(N);
4695 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4696 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4697 N->getOperand(1), RC));
4698 return true;
4699}
4700
4701bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4702 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4703
4704 // Bail when not a "cast" like extract_subvector.
4705 if (N->getConstantOperandVal(1) != 0)
4706 return false;
4707
4708 // Bail when normal isel can do the job.
4709 EVT VT = N->getValueType(0);
4710 EVT InVT = N->getOperand(0).getValueType();
4711 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4712 return false;
4713 if (VT.getSizeInBits() <= 128)
4714 return false;
4715
4716 // NOTE: We can only get here when doing fixed length SVE code generation.
4717 // We do manual selection because the types involved are not linked to real
4718 // registers (despite being legal) and must be coerced into SVE registers.
4719
4721 "Expected to extract from a packed scalable vector!");
4722
4723 SDLoc DL(N);
4724 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4725 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4726 N->getOperand(0), RC));
4727 return true;
4728}
4729
4730bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4731 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4732
4733 SDValue N0 = N->getOperand(0);
4734 SDValue N1 = N->getOperand(1);
4735
4736 EVT VT = N->getValueType(0);
4737 SDLoc DL(N);
4738
4739 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4740 // Rotate by a constant is a funnel shift in IR which is expanded to
4741 // an OR with shifted operands.
4742 // We do the following transform:
4743 // OR N0, N1 -> xar (x, y, imm)
4744 // Where:
4745 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4746 // N0 = SHL_PRED true, V, splat(bits-imm)
4747 // V = (xor x, y)
4748 if (VT.isScalableVector() &&
4749 (Subtarget->hasSVE2() ||
4750 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4751 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4752 N1.getOpcode() != AArch64ISD::SRL_PRED)
4753 std::swap(N0, N1);
4754 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4755 N1.getOpcode() != AArch64ISD::SRL_PRED)
4756 return false;
4757
4758 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4759 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4760 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4761 return false;
4762
4763 if (N0.getOperand(1) != N1.getOperand(1))
4764 return false;
4765
4766 SDValue R1, R2;
4767 bool IsXOROperand = true;
4768 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4769 IsXOROperand = false;
4770 } else {
4771 R1 = N0.getOperand(1).getOperand(0);
4772 R2 = N1.getOperand(1).getOperand(1);
4773 }
4774
4775 APInt ShlAmt, ShrAmt;
4776 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4778 return false;
4779
4780 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4781 return false;
4782
4783 if (!IsXOROperand) {
4784 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4785 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4786 SDValue MOVIV = SDValue(MOV, 0);
4787
4788 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4789 SDNode *SubRegToReg =
4790 CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT, MOVIV, ZSub);
4791
4792 R1 = N1->getOperand(1);
4793 R2 = SDValue(SubRegToReg, 0);
4794 }
4795
4796 SDValue Imm =
4797 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4798
4799 SDValue Ops[] = {R1, R2, Imm};
4801 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4802 AArch64::XAR_ZZZI_D})) {
4803 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4804 return true;
4805 }
4806 return false;
4807 }
4808
4809 // We have Neon SHA3 XAR operation for v2i64 but for types
4810 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4811 // is available.
4812 EVT SVT;
4813 switch (VT.getSimpleVT().SimpleTy) {
4814 case MVT::v4i32:
4815 case MVT::v2i32:
4816 SVT = MVT::nxv4i32;
4817 break;
4818 case MVT::v8i16:
4819 case MVT::v4i16:
4820 SVT = MVT::nxv8i16;
4821 break;
4822 case MVT::v16i8:
4823 case MVT::v8i8:
4824 SVT = MVT::nxv16i8;
4825 break;
4826 case MVT::v2i64:
4827 case MVT::v1i64:
4828 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4829 break;
4830 default:
4831 return false;
4832 }
4833
4834 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4835 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4836 return false;
4837
4838 if (N0->getOpcode() != AArch64ISD::VSHL ||
4839 N1->getOpcode() != AArch64ISD::VLSHR)
4840 return false;
4841
4842 if (N0->getOperand(0) != N1->getOperand(0))
4843 return false;
4844
4845 SDValue R1, R2;
4846 bool IsXOROperand = true;
4847 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4848 IsXOROperand = false;
4849 } else {
4850 SDValue XOR = N0.getOperand(0);
4851 R1 = XOR.getOperand(0);
4852 R2 = XOR.getOperand(1);
4853 }
4854
4855 unsigned HsAmt = N0.getConstantOperandVal(1);
4856 unsigned ShAmt = N1.getConstantOperandVal(1);
4857
4858 SDValue Imm = CurDAG->getTargetConstant(
4859 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4860
4861 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4862 if (ShAmt + HsAmt != VTSizeInBits)
4863 return false;
4864
4865 if (!IsXOROperand) {
4866 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4867 SDNode *MOV =
4868 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4869 SDValue MOVIV = SDValue(MOV, 0);
4870
4871 R1 = N1->getOperand(0);
4872 R2 = MOVIV;
4873 }
4874
4875 if (SVT != VT) {
4876 SDValue Undef =
4877 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4878
4879 if (SVT.isScalableVector() && VT.is64BitVector()) {
4880 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4881
4882 SDValue UndefQ = SDValue(
4883 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4884 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4885
4886 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4887 UndefQ, R1, DSub),
4888 0);
4889 if (R2.getValueType() == VT)
4890 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4891 UndefQ, R2, DSub),
4892 0);
4893 }
4894
4895 SDValue SubReg = CurDAG->getTargetConstant(
4896 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4897
4898 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4899 R1, SubReg),
4900 0);
4901
4902 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4903 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4904 Undef, R2, SubReg),
4905 0);
4906 }
4907
4908 SDValue Ops[] = {R1, R2, Imm};
4909 SDNode *XAR = nullptr;
4910
4911 if (SVT.isScalableVector()) {
4913 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4914 AArch64::XAR_ZZZI_D}))
4915 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4916 } else {
4917 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4918 }
4919
4920 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4921
4922 if (SVT != VT) {
4923 if (VT.is64BitVector() && SVT.isScalableVector()) {
4924 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4925
4926 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4927 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4928 SDValue(XAR, 0), ZSub);
4929
4930 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4931 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4932 SDValue(Q, 0), DSub);
4933 } else {
4934 SDValue SubReg = CurDAG->getTargetConstant(
4935 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4936 MVT::i32);
4937 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4938 SDValue(XAR, 0), SubReg);
4939 }
4940 }
4941 ReplaceNode(N, XAR);
4942 return true;
4943}
4944
4945void AArch64DAGToDAGISel::Select(SDNode *Node) {
4946 // If we have a custom node, we already have selected!
4947 if (Node->isMachineOpcode()) {
4948 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4949 Node->setNodeId(-1);
4950 return;
4951 }
4952
4953 // Few custom selection stuff.
4954 EVT VT = Node->getValueType(0);
4955
4956 switch (Node->getOpcode()) {
4957 default:
4958 break;
4959
4961 if (SelectCMP_SWAP(Node))
4962 return;
4963 break;
4964
4965 case ISD::READ_REGISTER:
4966 case AArch64ISD::MRRS:
4967 if (tryReadRegister(Node))
4968 return;
4969 break;
4970
4972 case AArch64ISD::MSRR:
4973 if (tryWriteRegister(Node))
4974 return;
4975 break;
4976
4977 case ISD::LOAD: {
4978 // Try to select as an indexed load. Fall through to normal processing
4979 // if we can't.
4980 if (tryIndexedLoad(Node))
4981 return;
4982 break;
4983 }
4984
4985 case ISD::SRL:
4986 case ISD::AND:
4987 case ISD::SRA:
4989 if (tryBitfieldExtractOp(Node))
4990 return;
4991 if (tryBitfieldInsertInZeroOp(Node))
4992 return;
4993 [[fallthrough]];
4994 case ISD::ROTR:
4995 case ISD::SHL:
4996 if (tryShiftAmountMod(Node))
4997 return;
4998 break;
4999
5000 case ISD::SIGN_EXTEND:
5001 if (tryBitfieldExtractOpFromSExt(Node))
5002 return;
5003 break;
5004
5005 case ISD::OR:
5006 if (tryBitfieldInsertOp(Node))
5007 return;
5008 if (trySelectXAR(Node))
5009 return;
5010 break;
5011
5013 if (trySelectCastScalableToFixedLengthVector(Node))
5014 return;
5015 break;
5016 }
5017
5018 case ISD::INSERT_SUBVECTOR: {
5019 if (trySelectCastFixedLengthToScalableVector(Node))
5020 return;
5021 break;
5022 }
5023
5024 case ISD::Constant: {
5025 // Materialize zero constants as copies from WZR/XZR. This allows
5026 // the coalescer to propagate these into other instructions.
5027 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
5028 if (ConstNode->isZero()) {
5029 if (VT == MVT::i32) {
5030 SDValue New = CurDAG->getCopyFromReg(
5031 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
5032 ReplaceNode(Node, New.getNode());
5033 return;
5034 } else if (VT == MVT::i64) {
5035 SDValue New = CurDAG->getCopyFromReg(
5036 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
5037 ReplaceNode(Node, New.getNode());
5038 return;
5039 }
5040 }
5041 break;
5042 }
5043
5044 case ISD::FrameIndex: {
5045 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
5046 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
5047 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
5048 const TargetLowering *TLI = getTargetLowering();
5049 SDValue TFI = CurDAG->getTargetFrameIndex(
5050 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
5051 SDLoc DL(Node);
5052 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
5053 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
5054 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
5055 return;
5056 }
5058 unsigned IntNo = Node->getConstantOperandVal(1);
5059 switch (IntNo) {
5060 default:
5061 break;
5062 case Intrinsic::aarch64_gcsss: {
5063 SDLoc DL(Node);
5064 SDValue Chain = Node->getOperand(0);
5065 SDValue Val = Node->getOperand(2);
5066 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
5067 SDNode *SS1 =
5068 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
5069 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
5070 MVT::Other, Zero, SDValue(SS1, 0));
5071 ReplaceNode(Node, SS2);
5072 return;
5073 }
5074 case Intrinsic::aarch64_ldaxp:
5075 case Intrinsic::aarch64_ldxp: {
5076 unsigned Op =
5077 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
5078 SDValue MemAddr = Node->getOperand(2);
5079 SDLoc DL(Node);
5080 SDValue Chain = Node->getOperand(0);
5081
5082 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
5083 MVT::Other, MemAddr, Chain);
5084
5085 // Transfer memoperands.
5086 MachineMemOperand *MemOp =
5087 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
5088 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
5089 ReplaceNode(Node, Ld);
5090 return;
5091 }
5092 case Intrinsic::aarch64_stlxp:
5093 case Intrinsic::aarch64_stxp: {
5094 unsigned Op =
5095 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
5096 SDLoc DL(Node);
5097 SDValue Chain = Node->getOperand(0);
5098 SDValue ValLo = Node->getOperand(2);
5099 SDValue ValHi = Node->getOperand(3);
5100 SDValue MemAddr = Node->getOperand(4);
5101
5102 // Place arguments in the right order.
5103 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
5104
5105 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
5106 // Transfer memoperands.
5107 MachineMemOperand *MemOp =
5108 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
5109 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
5110
5111 ReplaceNode(Node, St);
5112 return;
5113 }
5114 case Intrinsic::aarch64_neon_ld1x2:
5115 if (VT == MVT::v8i8) {
5116 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
5117 return;
5118 } else if (VT == MVT::v16i8) {
5119 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
5120 return;
5121 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5122 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
5123 return;
5124 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5125 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
5126 return;
5127 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5128 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
5129 return;
5130 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5131 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
5132 return;
5133 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5134 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5135 return;
5136 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5137 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
5138 return;
5139 }
5140 break;
5141 case Intrinsic::aarch64_neon_ld1x3:
5142 if (VT == MVT::v8i8) {
5143 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
5144 return;
5145 } else if (VT == MVT::v16i8) {
5146 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
5147 return;
5148 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5149 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5150 return;
5151 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5152 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5153 return;
5154 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5155 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5156 return;
5157 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5158 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5159 return;
5160 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5161 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5162 return;
5163 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5164 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5165 return;
5166 }
5167 break;
5168 case Intrinsic::aarch64_neon_ld1x4:
5169 if (VT == MVT::v8i8) {
5170 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5171 return;
5172 } else if (VT == MVT::v16i8) {
5173 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5174 return;
5175 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5176 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5177 return;
5178 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5179 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5180 return;
5181 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5182 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5183 return;
5184 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5185 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5186 return;
5187 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5188 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5189 return;
5190 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5191 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5192 return;
5193 }
5194 break;
5195 case Intrinsic::aarch64_neon_ld2:
5196 if (VT == MVT::v8i8) {
5197 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5198 return;
5199 } else if (VT == MVT::v16i8) {
5200 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5201 return;
5202 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5203 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5204 return;
5205 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5206 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5207 return;
5208 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5209 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5210 return;
5211 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5212 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5213 return;
5214 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5215 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5216 return;
5217 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5218 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5219 return;
5220 }
5221 break;
5222 case Intrinsic::aarch64_neon_ld3:
5223 if (VT == MVT::v8i8) {
5224 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5225 return;
5226 } else if (VT == MVT::v16i8) {
5227 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5228 return;
5229 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5230 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5231 return;
5232 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5233 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5234 return;
5235 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5236 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5237 return;
5238 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5239 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5240 return;
5241 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5242 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5243 return;
5244 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5245 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5246 return;
5247 }
5248 break;
5249 case Intrinsic::aarch64_neon_ld4:
5250 if (VT == MVT::v8i8) {
5251 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5252 return;
5253 } else if (VT == MVT::v16i8) {
5254 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5255 return;
5256 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5257 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5258 return;
5259 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5260 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5261 return;
5262 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5263 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5264 return;
5265 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5266 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5267 return;
5268 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5269 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5270 return;
5271 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5272 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5273 return;
5274 }
5275 break;
5276 case Intrinsic::aarch64_neon_ld2r:
5277 if (VT == MVT::v8i8) {
5278 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5279 return;
5280 } else if (VT == MVT::v16i8) {
5281 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5282 return;
5283 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5284 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5285 return;
5286 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5287 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5288 return;
5289 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5290 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5291 return;
5292 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5293 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5294 return;
5295 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5296 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5297 return;
5298 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5299 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5300 return;
5301 }
5302 break;
5303 case Intrinsic::aarch64_neon_ld3r:
5304 if (VT == MVT::v8i8) {
5305 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5306 return;
5307 } else if (VT == MVT::v16i8) {
5308 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5309 return;
5310 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5311 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5312 return;
5313 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5314 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5315 return;
5316 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5317 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5318 return;
5319 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5320 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5321 return;
5322 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5323 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5324 return;
5325 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5326 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5327 return;
5328 }
5329 break;
5330 case Intrinsic::aarch64_neon_ld4r:
5331 if (VT == MVT::v8i8) {
5332 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5333 return;
5334 } else if (VT == MVT::v16i8) {
5335 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5336 return;
5337 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5338 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5339 return;
5340 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5341 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5342 return;
5343 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5344 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5345 return;
5346 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5347 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5348 return;
5349 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5350 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5351 return;
5352 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5353 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5354 return;
5355 }
5356 break;
5357 case Intrinsic::aarch64_neon_ld2lane:
5358 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5359 SelectLoadLane(Node, 2, AArch64::LD2i8);
5360 return;
5361 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5362 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5363 SelectLoadLane(Node, 2, AArch64::LD2i16);
5364 return;
5365 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5366 VT == MVT::v2f32) {
5367 SelectLoadLane(Node, 2, AArch64::LD2i32);
5368 return;
5369 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5370 VT == MVT::v1f64) {
5371 SelectLoadLane(Node, 2, AArch64::LD2i64);
5372 return;
5373 }
5374 break;
5375 case Intrinsic::aarch64_neon_ld3lane:
5376 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5377 SelectLoadLane(Node, 3, AArch64::LD3i8);
5378 return;
5379 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5380 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5381 SelectLoadLane(Node, 3, AArch64::LD3i16);
5382 return;
5383 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5384 VT == MVT::v2f32) {
5385 SelectLoadLane(Node, 3, AArch64::LD3i32);
5386 return;
5387 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5388 VT == MVT::v1f64) {
5389 SelectLoadLane(Node, 3, AArch64::LD3i64);
5390 return;
5391 }
5392 break;
5393 case Intrinsic::aarch64_neon_ld4lane:
5394 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5395 SelectLoadLane(Node, 4, AArch64::LD4i8);
5396 return;
5397 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5398 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5399 SelectLoadLane(Node, 4, AArch64::LD4i16);
5400 return;
5401 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5402 VT == MVT::v2f32) {
5403 SelectLoadLane(Node, 4, AArch64::LD4i32);
5404 return;
5405 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5406 VT == MVT::v1f64) {
5407 SelectLoadLane(Node, 4, AArch64::LD4i64);
5408 return;
5409 }
5410 break;
5411 case Intrinsic::aarch64_ld64b:
5412 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5413 return;
5414 case Intrinsic::aarch64_sve_ld2q_sret: {
5415 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5416 return;
5417 }
5418 case Intrinsic::aarch64_sve_ld3q_sret: {
5419 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5420 return;
5421 }
5422 case Intrinsic::aarch64_sve_ld4q_sret: {
5423 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5424 return;
5425 }
5426 case Intrinsic::aarch64_sve_ld2_sret: {
5427 if (VT == MVT::nxv16i8) {
5428 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5429 true);
5430 return;
5431 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5432 VT == MVT::nxv8bf16) {
5433 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5434 true);
5435 return;
5436 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5437 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5438 true);
5439 return;
5440 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5441 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5442 true);
5443 return;
5444 }
5445 break;
5446 }
5447 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5448 if (VT == MVT::nxv16i8) {
5449 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5450 SelectContiguousMultiVectorLoad(
5451 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5452 else if (Subtarget->hasSVE2p1())
5453 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5454 AArch64::LD1B_2Z);
5455 else
5456 break;
5457 return;
5458 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5459 VT == MVT::nxv8bf16) {
5460 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5461 SelectContiguousMultiVectorLoad(
5462 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5463 else if (Subtarget->hasSVE2p1())
5464 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5465 AArch64::LD1H_2Z);
5466 else
5467 break;
5468 return;
5469 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5470 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5471 SelectContiguousMultiVectorLoad(
5472 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5473 else if (Subtarget->hasSVE2p1())
5474 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5475 AArch64::LD1W_2Z);
5476 else
5477 break;
5478 return;
5479 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5480 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5481 SelectContiguousMultiVectorLoad(
5482 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5483 else if (Subtarget->hasSVE2p1())
5484 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5485 AArch64::LD1D_2Z);
5486 else
5487 break;
5488 return;
5489 }
5490 break;
5491 }
5492 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5493 if (VT == MVT::nxv16i8) {
5494 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5495 SelectContiguousMultiVectorLoad(
5496 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5497 else if (Subtarget->hasSVE2p1())
5498 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5499 AArch64::LD1B_4Z);
5500 else
5501 break;
5502 return;
5503 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5504 VT == MVT::nxv8bf16) {
5505 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5506 SelectContiguousMultiVectorLoad(
5507 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5508 else if (Subtarget->hasSVE2p1())
5509 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5510 AArch64::LD1H_4Z);
5511 else
5512 break;
5513 return;
5514 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5515 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5516 SelectContiguousMultiVectorLoad(
5517 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5518 else if (Subtarget->hasSVE2p1())
5519 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5520 AArch64::LD1W_4Z);
5521 else
5522 break;
5523 return;
5524 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5525 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5526 SelectContiguousMultiVectorLoad(
5527 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5528 else if (Subtarget->hasSVE2p1())
5529 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5530 AArch64::LD1D_4Z);
5531 else
5532 break;
5533 return;
5534 }
5535 break;
5536 }
5537 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5538 if (VT == MVT::nxv16i8) {
5539 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5540 SelectContiguousMultiVectorLoad(Node, 2, 0,
5541 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5542 AArch64::LDNT1B_2Z_PSEUDO);
5543 else if (Subtarget->hasSVE2p1())
5544 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5545 AArch64::LDNT1B_2Z);
5546 else
5547 break;
5548 return;
5549 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5550 VT == MVT::nxv8bf16) {
5551 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5552 SelectContiguousMultiVectorLoad(Node, 2, 1,
5553 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5554 AArch64::LDNT1H_2Z_PSEUDO);
5555 else if (Subtarget->hasSVE2p1())
5556 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5557 AArch64::LDNT1H_2Z);
5558 else
5559 break;
5560 return;
5561 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5562 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5563 SelectContiguousMultiVectorLoad(Node, 2, 2,
5564 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5565 AArch64::LDNT1W_2Z_PSEUDO);
5566 else if (Subtarget->hasSVE2p1())
5567 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5568 AArch64::LDNT1W_2Z);
5569 else
5570 break;
5571 return;
5572 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5573 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5574 SelectContiguousMultiVectorLoad(Node, 2, 3,
5575 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5576 AArch64::LDNT1D_2Z_PSEUDO);
5577 else if (Subtarget->hasSVE2p1())
5578 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5579 AArch64::LDNT1D_2Z);
5580 else
5581 break;
5582 return;
5583 }
5584 break;
5585 }
5586 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5587 if (VT == MVT::nxv16i8) {
5588 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5589 SelectContiguousMultiVectorLoad(Node, 4, 0,
5590 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5591 AArch64::LDNT1B_4Z_PSEUDO);
5592 else if (Subtarget->hasSVE2p1())
5593 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5594 AArch64::LDNT1B_4Z);
5595 else
5596 break;
5597 return;
5598 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5599 VT == MVT::nxv8bf16) {
5600 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5601 SelectContiguousMultiVectorLoad(Node, 4, 1,
5602 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5603 AArch64::LDNT1H_4Z_PSEUDO);
5604 else if (Subtarget->hasSVE2p1())
5605 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5606 AArch64::LDNT1H_4Z);
5607 else
5608 break;
5609 return;
5610 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5611 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5612 SelectContiguousMultiVectorLoad(Node, 4, 2,
5613 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5614 AArch64::LDNT1W_4Z_PSEUDO);
5615 else if (Subtarget->hasSVE2p1())
5616 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5617 AArch64::LDNT1W_4Z);
5618 else
5619 break;
5620 return;
5621 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5622 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5623 SelectContiguousMultiVectorLoad(Node, 4, 3,
5624 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5625 AArch64::LDNT1D_4Z_PSEUDO);
5626 else if (Subtarget->hasSVE2p1())
5627 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5628 AArch64::LDNT1D_4Z);
5629 else
5630 break;
5631 return;
5632 }
5633 break;
5634 }
5635 case Intrinsic::aarch64_sve_ld3_sret: {
5636 if (VT == MVT::nxv16i8) {
5637 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5638 true);
5639 return;
5640 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5641 VT == MVT::nxv8bf16) {
5642 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5643 true);
5644 return;
5645 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5646 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5647 true);
5648 return;
5649 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5650 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5651 true);
5652 return;
5653 }
5654 break;
5655 }
5656 case Intrinsic::aarch64_sve_ld4_sret: {
5657 if (VT == MVT::nxv16i8) {
5658 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5659 true);
5660 return;
5661 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5662 VT == MVT::nxv8bf16) {
5663 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5664 true);
5665 return;
5666 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5667 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5668 true);
5669 return;
5670 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5671 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5672 true);
5673 return;
5674 }
5675 break;
5676 }
5677 case Intrinsic::aarch64_sme_read_hor_vg2: {
5678 if (VT == MVT::nxv16i8) {
5679 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5680 AArch64::MOVA_2ZMXI_H_B);
5681 return;
5682 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5683 VT == MVT::nxv8bf16) {
5684 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5685 AArch64::MOVA_2ZMXI_H_H);
5686 return;
5687 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5688 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5689 AArch64::MOVA_2ZMXI_H_S);
5690 return;
5691 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5692 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5693 AArch64::MOVA_2ZMXI_H_D);
5694 return;
5695 }
5696 break;
5697 }
5698 case Intrinsic::aarch64_sme_read_ver_vg2: {
5699 if (VT == MVT::nxv16i8) {
5700 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5701 AArch64::MOVA_2ZMXI_V_B);
5702 return;
5703 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5704 VT == MVT::nxv8bf16) {
5705 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5706 AArch64::MOVA_2ZMXI_V_H);
5707 return;
5708 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5709 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5710 AArch64::MOVA_2ZMXI_V_S);
5711 return;
5712 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5713 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5714 AArch64::MOVA_2ZMXI_V_D);
5715 return;
5716 }
5717 break;
5718 }
5719 case Intrinsic::aarch64_sme_read_hor_vg4: {
5720 if (VT == MVT::nxv16i8) {
5721 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5722 AArch64::MOVA_4ZMXI_H_B);
5723 return;
5724 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5725 VT == MVT::nxv8bf16) {
5726 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5727 AArch64::MOVA_4ZMXI_H_H);
5728 return;
5729 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5730 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5731 AArch64::MOVA_4ZMXI_H_S);
5732 return;
5733 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5734 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5735 AArch64::MOVA_4ZMXI_H_D);
5736 return;
5737 }
5738 break;
5739 }
5740 case Intrinsic::aarch64_sme_read_ver_vg4: {
5741 if (VT == MVT::nxv16i8) {
5742 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5743 AArch64::MOVA_4ZMXI_V_B);
5744 return;
5745 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5746 VT == MVT::nxv8bf16) {
5747 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5748 AArch64::MOVA_4ZMXI_V_H);
5749 return;
5750 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5751 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5752 AArch64::MOVA_4ZMXI_V_S);
5753 return;
5754 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5755 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5756 AArch64::MOVA_4ZMXI_V_D);
5757 return;
5758 }
5759 break;
5760 }
5761 case Intrinsic::aarch64_sme_read_vg1x2: {
5762 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5763 AArch64::MOVA_VG2_2ZMXI);
5764 return;
5765 }
5766 case Intrinsic::aarch64_sme_read_vg1x4: {
5767 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5768 AArch64::MOVA_VG4_4ZMXI);
5769 return;
5770 }
5771 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5772 if (VT == MVT::nxv16i8) {
5773 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5774 return;
5775 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5776 VT == MVT::nxv8bf16) {
5777 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5778 return;
5779 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5780 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5781 return;
5782 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5783 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5784 return;
5785 }
5786 break;
5787 }
5788 case Intrinsic::aarch64_sme_readz_vert_x2: {
5789 if (VT == MVT::nxv16i8) {
5790 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5791 return;
5792 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5793 VT == MVT::nxv8bf16) {
5794 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5795 return;
5796 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5797 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5798 return;
5799 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5800 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5801 return;
5802 }
5803 break;
5804 }
5805 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5806 if (VT == MVT::nxv16i8) {
5807 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5808 return;
5809 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5810 VT == MVT::nxv8bf16) {
5811 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5812 return;
5813 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5814 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5815 return;
5816 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5817 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5818 return;
5819 }
5820 break;
5821 }
5822 case Intrinsic::aarch64_sme_readz_vert_x4: {
5823 if (VT == MVT::nxv16i8) {
5824 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5825 return;
5826 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5827 VT == MVT::nxv8bf16) {
5828 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5829 return;
5830 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5831 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5832 return;
5833 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5834 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5835 return;
5836 }
5837 break;
5838 }
5839 case Intrinsic::aarch64_sme_readz_x2: {
5840 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5841 AArch64::ZA);
5842 return;
5843 }
5844 case Intrinsic::aarch64_sme_readz_x4: {
5845 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5846 AArch64::ZA);
5847 return;
5848 }
5849 case Intrinsic::swift_async_context_addr: {
5850 SDLoc DL(Node);
5851 SDValue Chain = Node->getOperand(0);
5852 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5853 SDValue Res = SDValue(
5854 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5855 CurDAG->getTargetConstant(8, DL, MVT::i32),
5856 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5857 0);
5858 ReplaceUses(SDValue(Node, 0), Res);
5859 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5860 CurDAG->RemoveDeadNode(Node);
5861
5862 auto &MF = CurDAG->getMachineFunction();
5863 MF.getFrameInfo().setFrameAddressIsTaken(true);
5864 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5865 return;
5866 }
5867 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5869 Node->getValueType(0),
5870 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5871 AArch64::LUTI2_4ZTZI_S}))
5872 // Second Immediate must be <= 3:
5873 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5874 return;
5875 }
5876 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5878 Node->getValueType(0),
5879 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5880 // Second Immediate must be <= 1:
5881 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5882 return;
5883 }
5884 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5886 Node->getValueType(0),
5887 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5888 AArch64::LUTI2_2ZTZI_S}))
5889 // Second Immediate must be <= 7:
5890 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5891 return;
5892 }
5893 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5895 Node->getValueType(0),
5896 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5897 AArch64::LUTI4_2ZTZI_S}))
5898 // Second Immediate must be <= 3:
5899 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5900 return;
5901 }
5902 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5903 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5904 return;
5905 }
5906 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5908 Node->getValueType(0),
5909 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5910 SelectCVTIntrinsicFP8(Node, 2, Opc);
5911 return;
5912 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5914 Node->getValueType(0),
5915 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5916 SelectCVTIntrinsicFP8(Node, 2, Opc);
5917 return;
5918 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5920 Node->getValueType(0),
5921 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5922 SelectCVTIntrinsicFP8(Node, 2, Opc);
5923 return;
5924 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5926 Node->getValueType(0),
5927 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5928 SelectCVTIntrinsicFP8(Node, 2, Opc);
5929 return;
5930 case Intrinsic::ptrauth_resign_load_relative:
5931 SelectPtrauthResign(Node);
5932 return;
5933 }
5934 } break;
5936 unsigned IntNo = Node->getConstantOperandVal(0);
5937 switch (IntNo) {
5938 default:
5939 break;
5940 case Intrinsic::aarch64_tagp:
5941 SelectTagP(Node);
5942 return;
5943
5944 case Intrinsic::ptrauth_auth:
5945 SelectPtrauthAuth(Node);
5946 return;
5947
5948 case Intrinsic::ptrauth_resign:
5949 SelectPtrauthResign(Node);
5950 return;
5951
5952 case Intrinsic::aarch64_neon_tbl2:
5953 SelectTable(Node, 2,
5954 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5955 false);
5956 return;
5957 case Intrinsic::aarch64_neon_tbl3:
5958 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5959 : AArch64::TBLv16i8Three,
5960 false);
5961 return;
5962 case Intrinsic::aarch64_neon_tbl4:
5963 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5964 : AArch64::TBLv16i8Four,
5965 false);
5966 return;
5967 case Intrinsic::aarch64_neon_tbx2:
5968 SelectTable(Node, 2,
5969 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5970 true);
5971 return;
5972 case Intrinsic::aarch64_neon_tbx3:
5973 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5974 : AArch64::TBXv16i8Three,
5975 true);
5976 return;
5977 case Intrinsic::aarch64_neon_tbx4:
5978 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5979 : AArch64::TBXv16i8Four,
5980 true);
5981 return;
5982 case Intrinsic::aarch64_sve_srshl_single_x2:
5984 Node->getValueType(0),
5985 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5986 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5987 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5988 return;
5989 case Intrinsic::aarch64_sve_srshl_single_x4:
5991 Node->getValueType(0),
5992 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5993 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5994 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5995 return;
5996 case Intrinsic::aarch64_sve_urshl_single_x2:
5998 Node->getValueType(0),
5999 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
6000 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
6001 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6002 return;
6003 case Intrinsic::aarch64_sve_urshl_single_x4:
6005 Node->getValueType(0),
6006 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
6007 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
6008 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6009 return;
6010 case Intrinsic::aarch64_sve_srshl_x2:
6012 Node->getValueType(0),
6013 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
6014 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
6015 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6016 return;
6017 case Intrinsic::aarch64_sve_srshl_x4:
6019 Node->getValueType(0),
6020 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
6021 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
6022 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6023 return;
6024 case Intrinsic::aarch64_sve_urshl_x2:
6026 Node->getValueType(0),
6027 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
6028 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
6029 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6030 return;
6031 case Intrinsic::aarch64_sve_urshl_x4:
6033 Node->getValueType(0),
6034 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
6035 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
6036 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6037 return;
6038 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
6040 Node->getValueType(0),
6041 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
6042 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
6043 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6044 return;
6045 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
6047 Node->getValueType(0),
6048 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
6049 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
6050 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6051 return;
6052 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
6054 Node->getValueType(0),
6055 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
6056 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
6057 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6058 return;
6059 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
6061 Node->getValueType(0),
6062 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
6063 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
6064 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6065 return;
6066 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
6068 Node->getValueType(0),
6069 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
6070 AArch64::FSCALE_2ZZ_D}))
6071 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6072 return;
6073 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
6075 Node->getValueType(0),
6076 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
6077 AArch64::FSCALE_4ZZ_D}))
6078 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6079 return;
6080 case Intrinsic::aarch64_sme_fp8_scale_x2:
6082 Node->getValueType(0),
6083 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
6084 AArch64::FSCALE_2Z2Z_D}))
6085 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6086 return;
6087 case Intrinsic::aarch64_sme_fp8_scale_x4:
6089 Node->getValueType(0),
6090 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
6091 AArch64::FSCALE_4Z4Z_D}))
6092 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6093 return;
6094 case Intrinsic::aarch64_sve_whilege_x2:
6096 Node->getValueType(0),
6097 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
6098 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
6099 SelectWhilePair(Node, Op);
6100 return;
6101 case Intrinsic::aarch64_sve_whilegt_x2:
6103 Node->getValueType(0),
6104 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
6105 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
6106 SelectWhilePair(Node, Op);
6107 return;
6108 case Intrinsic::aarch64_sve_whilehi_x2:
6110 Node->getValueType(0),
6111 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
6112 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
6113 SelectWhilePair(Node, Op);
6114 return;
6115 case Intrinsic::aarch64_sve_whilehs_x2:
6117 Node->getValueType(0),
6118 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
6119 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
6120 SelectWhilePair(Node, Op);
6121 return;
6122 case Intrinsic::aarch64_sve_whilele_x2:
6124 Node->getValueType(0),
6125 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
6126 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
6127 SelectWhilePair(Node, Op);
6128 return;
6129 case Intrinsic::aarch64_sve_whilelo_x2:
6131 Node->getValueType(0),
6132 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
6133 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
6134 SelectWhilePair(Node, Op);
6135 return;
6136 case Intrinsic::aarch64_sve_whilels_x2:
6138 Node->getValueType(0),
6139 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
6140 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
6141 SelectWhilePair(Node, Op);
6142 return;
6143 case Intrinsic::aarch64_sve_whilelt_x2:
6145 Node->getValueType(0),
6146 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
6147 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
6148 SelectWhilePair(Node, Op);
6149 return;
6150 case Intrinsic::aarch64_sve_smax_single_x2:
6152 Node->getValueType(0),
6153 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6154 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6155 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6156 return;
6157 case Intrinsic::aarch64_sve_umax_single_x2:
6159 Node->getValueType(0),
6160 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6161 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6162 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6163 return;
6164 case Intrinsic::aarch64_sve_fmax_single_x2:
6166 Node->getValueType(0),
6167 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6168 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6169 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6170 return;
6171 case Intrinsic::aarch64_sve_smax_single_x4:
6173 Node->getValueType(0),
6174 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6175 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6176 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6177 return;
6178 case Intrinsic::aarch64_sve_umax_single_x4:
6180 Node->getValueType(0),
6181 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6182 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6183 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6184 return;
6185 case Intrinsic::aarch64_sve_fmax_single_x4:
6187 Node->getValueType(0),
6188 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6189 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6190 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6191 return;
6192 case Intrinsic::aarch64_sve_smin_single_x2:
6194 Node->getValueType(0),
6195 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6196 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6197 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6198 return;
6199 case Intrinsic::aarch64_sve_umin_single_x2:
6201 Node->getValueType(0),
6202 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6203 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6204 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6205 return;
6206 case Intrinsic::aarch64_sve_fmin_single_x2:
6208 Node->getValueType(0),
6209 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6210 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6211 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6212 return;
6213 case Intrinsic::aarch64_sve_smin_single_x4:
6215 Node->getValueType(0),
6216 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6217 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6218 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6219 return;
6220 case Intrinsic::aarch64_sve_umin_single_x4:
6222 Node->getValueType(0),
6223 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6224 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6225 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6226 return;
6227 case Intrinsic::aarch64_sve_fmin_single_x4:
6229 Node->getValueType(0),
6230 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6231 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6232 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6233 return;
6234 case Intrinsic::aarch64_sve_smax_x2:
6236 Node->getValueType(0),
6237 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6238 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6239 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6240 return;
6241 case Intrinsic::aarch64_sve_umax_x2:
6243 Node->getValueType(0),
6244 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6245 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6246 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6247 return;
6248 case Intrinsic::aarch64_sve_fmax_x2:
6250 Node->getValueType(0),
6251 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6252 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6253 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6254 return;
6255 case Intrinsic::aarch64_sve_smax_x4:
6257 Node->getValueType(0),
6258 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6259 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6260 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6261 return;
6262 case Intrinsic::aarch64_sve_umax_x4:
6264 Node->getValueType(0),
6265 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6266 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6267 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6268 return;
6269 case Intrinsic::aarch64_sve_fmax_x4:
6271 Node->getValueType(0),
6272 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6273 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6274 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6275 return;
6276 case Intrinsic::aarch64_sme_famax_x2:
6278 Node->getValueType(0),
6279 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6280 AArch64::FAMAX_2Z2Z_D}))
6281 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6282 return;
6283 case Intrinsic::aarch64_sme_famax_x4:
6285 Node->getValueType(0),
6286 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6287 AArch64::FAMAX_4Z4Z_D}))
6288 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6289 return;
6290 case Intrinsic::aarch64_sme_famin_x2:
6292 Node->getValueType(0),
6293 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6294 AArch64::FAMIN_2Z2Z_D}))
6295 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6296 return;
6297 case Intrinsic::aarch64_sme_famin_x4:
6299 Node->getValueType(0),
6300 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6301 AArch64::FAMIN_4Z4Z_D}))
6302 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6303 return;
6304 case Intrinsic::aarch64_sve_smin_x2:
6306 Node->getValueType(0),
6307 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6308 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6309 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6310 return;
6311 case Intrinsic::aarch64_sve_umin_x2:
6313 Node->getValueType(0),
6314 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6315 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6316 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6317 return;
6318 case Intrinsic::aarch64_sve_fmin_x2:
6320 Node->getValueType(0),
6321 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6322 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6323 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6324 return;
6325 case Intrinsic::aarch64_sve_smin_x4:
6327 Node->getValueType(0),
6328 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6329 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6330 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6331 return;
6332 case Intrinsic::aarch64_sve_umin_x4:
6334 Node->getValueType(0),
6335 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6336 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6337 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6338 return;
6339 case Intrinsic::aarch64_sve_fmin_x4:
6341 Node->getValueType(0),
6342 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6343 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6344 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6345 return;
6346 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6348 Node->getValueType(0),
6349 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6350 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6351 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6352 return;
6353 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6355 Node->getValueType(0),
6356 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6357 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6358 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6359 return;
6360 case Intrinsic::aarch64_sve_fminnm_single_x2:
6362 Node->getValueType(0),
6363 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6364 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6365 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6366 return;
6367 case Intrinsic::aarch64_sve_fminnm_single_x4:
6369 Node->getValueType(0),
6370 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6371 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6372 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6373 return;
6374 case Intrinsic::aarch64_sve_fscale_single_x4:
6375 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::BFSCALE_4ZZ);
6376 return;
6377 case Intrinsic::aarch64_sve_fscale_single_x2:
6378 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::BFSCALE_2ZZ);
6379 return;
6380 case Intrinsic::aarch64_sve_fmul_single_x4:
6382 Node->getValueType(0),
6383 {AArch64::BFMUL_4ZZ, AArch64::FMUL_4ZZ_H, AArch64::FMUL_4ZZ_S,
6384 AArch64::FMUL_4ZZ_D}))
6385 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6386 return;
6387 case Intrinsic::aarch64_sve_fmul_single_x2:
6389 Node->getValueType(0),
6390 {AArch64::BFMUL_2ZZ, AArch64::FMUL_2ZZ_H, AArch64::FMUL_2ZZ_S,
6391 AArch64::FMUL_2ZZ_D}))
6392 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6393 return;
6394 case Intrinsic::aarch64_sve_fmaxnm_x2:
6396 Node->getValueType(0),
6397 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6398 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6399 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6400 return;
6401 case Intrinsic::aarch64_sve_fmaxnm_x4:
6403 Node->getValueType(0),
6404 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6405 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6406 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6407 return;
6408 case Intrinsic::aarch64_sve_fminnm_x2:
6410 Node->getValueType(0),
6411 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6412 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6413 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6414 return;
6415 case Intrinsic::aarch64_sve_fminnm_x4:
6417 Node->getValueType(0),
6418 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6419 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6420 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6421 return;
6422 case Intrinsic::aarch64_sve_aese_lane_x2:
6423 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESE_2ZZI_B);
6424 return;
6425 case Intrinsic::aarch64_sve_aesd_lane_x2:
6426 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESD_2ZZI_B);
6427 return;
6428 case Intrinsic::aarch64_sve_aesemc_lane_x2:
6429 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESEMC_2ZZI_B);
6430 return;
6431 case Intrinsic::aarch64_sve_aesdimc_lane_x2:
6432 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESDIMC_2ZZI_B);
6433 return;
6434 case Intrinsic::aarch64_sve_aese_lane_x4:
6435 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESE_4ZZI_B);
6436 return;
6437 case Intrinsic::aarch64_sve_aesd_lane_x4:
6438 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESD_4ZZI_B);
6439 return;
6440 case Intrinsic::aarch64_sve_aesemc_lane_x4:
6441 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESEMC_4ZZI_B);
6442 return;
6443 case Intrinsic::aarch64_sve_aesdimc_lane_x4:
6444 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESDIMC_4ZZI_B);
6445 return;
6446 case Intrinsic::aarch64_sve_pmlal_pair_x2:
6447 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::PMLAL_2ZZZ_Q);
6448 return;
6449 case Intrinsic::aarch64_sve_pmull_pair_x2: {
6450 SDLoc DL(Node);
6451 SmallVector<SDValue, 4> Regs(Node->ops().slice(1, 2));
6452 SDNode *Res =
6453 CurDAG->getMachineNode(AArch64::PMULL_2ZZZ_Q, DL, MVT::Untyped, Regs);
6454 SDValue SuperReg = SDValue(Res, 0);
6455 for (unsigned I = 0; I < 2; I++)
6456 ReplaceUses(SDValue(Node, I),
6457 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
6458 SuperReg));
6459 CurDAG->RemoveDeadNode(Node);
6460 return;
6461 }
6462 case Intrinsic::aarch64_sve_fscale_x4:
6463 SelectDestructiveMultiIntrinsic(Node, 4, true, AArch64::BFSCALE_4Z4Z);
6464 return;
6465 case Intrinsic::aarch64_sve_fscale_x2:
6466 SelectDestructiveMultiIntrinsic(Node, 2, true, AArch64::BFSCALE_2Z2Z);
6467 return;
6468 case Intrinsic::aarch64_sve_fmul_x4:
6470 Node->getValueType(0),
6471 {AArch64::BFMUL_4Z4Z, AArch64::FMUL_4Z4Z_H, AArch64::FMUL_4Z4Z_S,
6472 AArch64::FMUL_4Z4Z_D}))
6473 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6474 return;
6475 case Intrinsic::aarch64_sve_fmul_x2:
6477 Node->getValueType(0),
6478 {AArch64::BFMUL_2Z2Z, AArch64::FMUL_2Z2Z_H, AArch64::FMUL_2Z2Z_S,
6479 AArch64::FMUL_2Z2Z_D}))
6480 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6481 return;
6482 case Intrinsic::aarch64_sve_fcvtzs_x2:
6483 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6484 return;
6485 case Intrinsic::aarch64_sve_scvtf_x2:
6486 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6487 return;
6488 case Intrinsic::aarch64_sve_fcvtzu_x2:
6489 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6490 return;
6491 case Intrinsic::aarch64_sve_ucvtf_x2:
6492 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6493 return;
6494 case Intrinsic::aarch64_sve_fcvtzs_x4:
6495 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6496 return;
6497 case Intrinsic::aarch64_sve_scvtf_x4:
6498 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6499 return;
6500 case Intrinsic::aarch64_sve_fcvtzu_x4:
6501 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6502 return;
6503 case Intrinsic::aarch64_sve_ucvtf_x4:
6504 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6505 return;
6506 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6507 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6508 return;
6509 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6510 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6511 return;
6512 case Intrinsic::aarch64_sve_sclamp_single_x2:
6514 Node->getValueType(0),
6515 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6516 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6517 SelectClamp(Node, 2, Op);
6518 return;
6519 case Intrinsic::aarch64_sve_uclamp_single_x2:
6521 Node->getValueType(0),
6522 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6523 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6524 SelectClamp(Node, 2, Op);
6525 return;
6526 case Intrinsic::aarch64_sve_fclamp_single_x2:
6528 Node->getValueType(0),
6529 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6530 AArch64::FCLAMP_VG2_2Z2Z_D}))
6531 SelectClamp(Node, 2, Op);
6532 return;
6533 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6534 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6535 return;
6536 case Intrinsic::aarch64_sve_sclamp_single_x4:
6538 Node->getValueType(0),
6539 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6540 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6541 SelectClamp(Node, 4, Op);
6542 return;
6543 case Intrinsic::aarch64_sve_uclamp_single_x4:
6545 Node->getValueType(0),
6546 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6547 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6548 SelectClamp(Node, 4, Op);
6549 return;
6550 case Intrinsic::aarch64_sve_fclamp_single_x4:
6552 Node->getValueType(0),
6553 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6554 AArch64::FCLAMP_VG4_4Z4Z_D}))
6555 SelectClamp(Node, 4, Op);
6556 return;
6557 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6558 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6559 return;
6560 case Intrinsic::aarch64_sve_add_single_x2:
6562 Node->getValueType(0),
6563 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6564 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6565 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6566 return;
6567 case Intrinsic::aarch64_sve_add_single_x4:
6569 Node->getValueType(0),
6570 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6571 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6572 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6573 return;
6574 case Intrinsic::aarch64_sve_zip_x2:
6576 Node->getValueType(0),
6577 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6578 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6579 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6580 return;
6581 case Intrinsic::aarch64_sve_zipq_x2:
6582 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6583 AArch64::ZIP_VG2_2ZZZ_Q);
6584 return;
6585 case Intrinsic::aarch64_sve_zip_x4:
6587 Node->getValueType(0),
6588 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6589 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6590 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6591 return;
6592 case Intrinsic::aarch64_sve_zipq_x4:
6593 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6594 AArch64::ZIP_VG4_4Z4Z_Q);
6595 return;
6596 case Intrinsic::aarch64_sve_uzp_x2:
6598 Node->getValueType(0),
6599 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6600 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6601 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6602 return;
6603 case Intrinsic::aarch64_sve_uzpq_x2:
6604 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6605 AArch64::UZP_VG2_2ZZZ_Q);
6606 return;
6607 case Intrinsic::aarch64_sve_uzp_x4:
6609 Node->getValueType(0),
6610 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6611 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6612 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6613 return;
6614 case Intrinsic::aarch64_sve_uzpq_x4:
6615 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6616 AArch64::UZP_VG4_4Z4Z_Q);
6617 return;
6618 case Intrinsic::aarch64_sve_sel_x2:
6620 Node->getValueType(0),
6621 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6622 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6623 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6624 return;
6625 case Intrinsic::aarch64_sve_sel_x4:
6627 Node->getValueType(0),
6628 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6629 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6630 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6631 return;
6632 case Intrinsic::aarch64_sve_frinta_x2:
6633 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6634 return;
6635 case Intrinsic::aarch64_sve_frinta_x4:
6636 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6637 return;
6638 case Intrinsic::aarch64_sve_frintm_x2:
6639 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6640 return;
6641 case Intrinsic::aarch64_sve_frintm_x4:
6642 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6643 return;
6644 case Intrinsic::aarch64_sve_frintn_x2:
6645 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6646 return;
6647 case Intrinsic::aarch64_sve_frintn_x4:
6648 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6649 return;
6650 case Intrinsic::aarch64_sve_frintp_x2:
6651 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6652 return;
6653 case Intrinsic::aarch64_sve_frintp_x4:
6654 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6655 return;
6656 case Intrinsic::aarch64_sve_sunpk_x2:
6658 Node->getValueType(0),
6659 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6660 AArch64::SUNPK_VG2_2ZZ_D}))
6661 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6662 return;
6663 case Intrinsic::aarch64_sve_uunpk_x2:
6665 Node->getValueType(0),
6666 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6667 AArch64::UUNPK_VG2_2ZZ_D}))
6668 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6669 return;
6670 case Intrinsic::aarch64_sve_sunpk_x4:
6672 Node->getValueType(0),
6673 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6674 AArch64::SUNPK_VG4_4Z2Z_D}))
6675 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6676 return;
6677 case Intrinsic::aarch64_sve_uunpk_x4:
6679 Node->getValueType(0),
6680 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6681 AArch64::UUNPK_VG4_4Z2Z_D}))
6682 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6683 return;
6684 case Intrinsic::aarch64_sve_pext_x2: {
6686 Node->getValueType(0),
6687 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6688 AArch64::PEXT_2PCI_D}))
6689 SelectPExtPair(Node, Op);
6690 return;
6691 }
6692 }
6693 break;
6694 }
6695 case ISD::INTRINSIC_VOID: {
6696 unsigned IntNo = Node->getConstantOperandVal(1);
6697 if (Node->getNumOperands() >= 3)
6698 VT = Node->getOperand(2)->getValueType(0);
6699 switch (IntNo) {
6700 default:
6701 break;
6702 case Intrinsic::aarch64_neon_st1x2: {
6703 if (VT == MVT::v8i8) {
6704 SelectStore(Node, 2, AArch64::ST1Twov8b);
6705 return;
6706 } else if (VT == MVT::v16i8) {
6707 SelectStore(Node, 2, AArch64::ST1Twov16b);
6708 return;
6709 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6710 VT == MVT::v4bf16) {
6711 SelectStore(Node, 2, AArch64::ST1Twov4h);
6712 return;
6713 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6714 VT == MVT::v8bf16) {
6715 SelectStore(Node, 2, AArch64::ST1Twov8h);
6716 return;
6717 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6718 SelectStore(Node, 2, AArch64::ST1Twov2s);
6719 return;
6720 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6721 SelectStore(Node, 2, AArch64::ST1Twov4s);
6722 return;
6723 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6724 SelectStore(Node, 2, AArch64::ST1Twov2d);
6725 return;
6726 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6727 SelectStore(Node, 2, AArch64::ST1Twov1d);
6728 return;
6729 }
6730 break;
6731 }
6732 case Intrinsic::aarch64_neon_st1x3: {
6733 if (VT == MVT::v8i8) {
6734 SelectStore(Node, 3, AArch64::ST1Threev8b);
6735 return;
6736 } else if (VT == MVT::v16i8) {
6737 SelectStore(Node, 3, AArch64::ST1Threev16b);
6738 return;
6739 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6740 VT == MVT::v4bf16) {
6741 SelectStore(Node, 3, AArch64::ST1Threev4h);
6742 return;
6743 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6744 VT == MVT::v8bf16) {
6745 SelectStore(Node, 3, AArch64::ST1Threev8h);
6746 return;
6747 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6748 SelectStore(Node, 3, AArch64::ST1Threev2s);
6749 return;
6750 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6751 SelectStore(Node, 3, AArch64::ST1Threev4s);
6752 return;
6753 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6754 SelectStore(Node, 3, AArch64::ST1Threev2d);
6755 return;
6756 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6757 SelectStore(Node, 3, AArch64::ST1Threev1d);
6758 return;
6759 }
6760 break;
6761 }
6762 case Intrinsic::aarch64_neon_st1x4: {
6763 if (VT == MVT::v8i8) {
6764 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6765 return;
6766 } else if (VT == MVT::v16i8) {
6767 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6768 return;
6769 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6770 VT == MVT::v4bf16) {
6771 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6772 return;
6773 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6774 VT == MVT::v8bf16) {
6775 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6776 return;
6777 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6778 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6779 return;
6780 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6781 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6782 return;
6783 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6784 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6785 return;
6786 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6787 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6788 return;
6789 }
6790 break;
6791 }
6792 case Intrinsic::aarch64_neon_st2: {
6793 if (VT == MVT::v8i8) {
6794 SelectStore(Node, 2, AArch64::ST2Twov8b);
6795 return;
6796 } else if (VT == MVT::v16i8) {
6797 SelectStore(Node, 2, AArch64::ST2Twov16b);
6798 return;
6799 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6800 VT == MVT::v4bf16) {
6801 SelectStore(Node, 2, AArch64::ST2Twov4h);
6802 return;
6803 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6804 VT == MVT::v8bf16) {
6805 SelectStore(Node, 2, AArch64::ST2Twov8h);
6806 return;
6807 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6808 SelectStore(Node, 2, AArch64::ST2Twov2s);
6809 return;
6810 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6811 SelectStore(Node, 2, AArch64::ST2Twov4s);
6812 return;
6813 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6814 SelectStore(Node, 2, AArch64::ST2Twov2d);
6815 return;
6816 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6817 SelectStore(Node, 2, AArch64::ST1Twov1d);
6818 return;
6819 }
6820 break;
6821 }
6822 case Intrinsic::aarch64_neon_st3: {
6823 if (VT == MVT::v8i8) {
6824 SelectStore(Node, 3, AArch64::ST3Threev8b);
6825 return;
6826 } else if (VT == MVT::v16i8) {
6827 SelectStore(Node, 3, AArch64::ST3Threev16b);
6828 return;
6829 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6830 VT == MVT::v4bf16) {
6831 SelectStore(Node, 3, AArch64::ST3Threev4h);
6832 return;
6833 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6834 VT == MVT::v8bf16) {
6835 SelectStore(Node, 3, AArch64::ST3Threev8h);
6836 return;
6837 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6838 SelectStore(Node, 3, AArch64::ST3Threev2s);
6839 return;
6840 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6841 SelectStore(Node, 3, AArch64::ST3Threev4s);
6842 return;
6843 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6844 SelectStore(Node, 3, AArch64::ST3Threev2d);
6845 return;
6846 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6847 SelectStore(Node, 3, AArch64::ST1Threev1d);
6848 return;
6849 }
6850 break;
6851 }
6852 case Intrinsic::aarch64_neon_st4: {
6853 if (VT == MVT::v8i8) {
6854 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6855 return;
6856 } else if (VT == MVT::v16i8) {
6857 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6858 return;
6859 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6860 VT == MVT::v4bf16) {
6861 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6862 return;
6863 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6864 VT == MVT::v8bf16) {
6865 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6866 return;
6867 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6868 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6869 return;
6870 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6871 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6872 return;
6873 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6874 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6875 return;
6876 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6877 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6878 return;
6879 }
6880 break;
6881 }
6882 case Intrinsic::aarch64_neon_st2lane: {
6883 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6884 SelectStoreLane(Node, 2, AArch64::ST2i8);
6885 return;
6886 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6887 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6888 SelectStoreLane(Node, 2, AArch64::ST2i16);
6889 return;
6890 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6891 VT == MVT::v2f32) {
6892 SelectStoreLane(Node, 2, AArch64::ST2i32);
6893 return;
6894 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6895 VT == MVT::v1f64) {
6896 SelectStoreLane(Node, 2, AArch64::ST2i64);
6897 return;
6898 }
6899 break;
6900 }
6901 case Intrinsic::aarch64_neon_st3lane: {
6902 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6903 SelectStoreLane(Node, 3, AArch64::ST3i8);
6904 return;
6905 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6906 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6907 SelectStoreLane(Node, 3, AArch64::ST3i16);
6908 return;
6909 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6910 VT == MVT::v2f32) {
6911 SelectStoreLane(Node, 3, AArch64::ST3i32);
6912 return;
6913 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6914 VT == MVT::v1f64) {
6915 SelectStoreLane(Node, 3, AArch64::ST3i64);
6916 return;
6917 }
6918 break;
6919 }
6920 case Intrinsic::aarch64_neon_st4lane: {
6921 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6922 SelectStoreLane(Node, 4, AArch64::ST4i8);
6923 return;
6924 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6925 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6926 SelectStoreLane(Node, 4, AArch64::ST4i16);
6927 return;
6928 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6929 VT == MVT::v2f32) {
6930 SelectStoreLane(Node, 4, AArch64::ST4i32);
6931 return;
6932 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6933 VT == MVT::v1f64) {
6934 SelectStoreLane(Node, 4, AArch64::ST4i64);
6935 return;
6936 }
6937 break;
6938 }
6939 case Intrinsic::aarch64_sve_st2q: {
6940 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6941 return;
6942 }
6943 case Intrinsic::aarch64_sve_st3q: {
6944 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6945 return;
6946 }
6947 case Intrinsic::aarch64_sve_st4q: {
6948 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6949 return;
6950 }
6951 case Intrinsic::aarch64_sve_st2: {
6952 if (VT == MVT::nxv16i8) {
6953 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6954 return;
6955 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6956 VT == MVT::nxv8bf16) {
6957 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6958 return;
6959 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6960 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6961 return;
6962 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6963 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6964 return;
6965 }
6966 break;
6967 }
6968 case Intrinsic::aarch64_sve_st3: {
6969 if (VT == MVT::nxv16i8) {
6970 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6971 return;
6972 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6973 VT == MVT::nxv8bf16) {
6974 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6975 return;
6976 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6977 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6978 return;
6979 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6980 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6981 return;
6982 }
6983 break;
6984 }
6985 case Intrinsic::aarch64_sve_st4: {
6986 if (VT == MVT::nxv16i8) {
6987 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6988 return;
6989 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6990 VT == MVT::nxv8bf16) {
6991 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6992 return;
6993 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6994 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6995 return;
6996 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6997 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6998 return;
6999 }
7000 break;
7001 }
7002 }
7003 break;
7004 }
7005 case AArch64ISD::LD2post: {
7006 if (VT == MVT::v8i8) {
7007 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
7008 return;
7009 } else if (VT == MVT::v16i8) {
7010 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
7011 return;
7012 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7013 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
7014 return;
7015 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7016 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
7017 return;
7018 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7019 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
7020 return;
7021 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7022 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
7023 return;
7024 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7025 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
7026 return;
7027 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7028 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
7029 return;
7030 }
7031 break;
7032 }
7033 case AArch64ISD::LD3post: {
7034 if (VT == MVT::v8i8) {
7035 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
7036 return;
7037 } else if (VT == MVT::v16i8) {
7038 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
7039 return;
7040 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7041 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
7042 return;
7043 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7044 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
7045 return;
7046 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7047 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
7048 return;
7049 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7050 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
7051 return;
7052 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7053 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
7054 return;
7055 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7056 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
7057 return;
7058 }
7059 break;
7060 }
7061 case AArch64ISD::LD4post: {
7062 if (VT == MVT::v8i8) {
7063 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
7064 return;
7065 } else if (VT == MVT::v16i8) {
7066 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
7067 return;
7068 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7069 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
7070 return;
7071 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7072 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
7073 return;
7074 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7075 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
7076 return;
7077 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7078 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
7079 return;
7080 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7081 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7082 return;
7083 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7084 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
7085 return;
7086 }
7087 break;
7088 }
7089 case AArch64ISD::LD1x2post: {
7090 if (VT == MVT::v8i8) {
7091 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
7092 return;
7093 } else if (VT == MVT::v16i8) {
7094 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
7095 return;
7096 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7097 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
7098 return;
7099 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7100 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
7101 return;
7102 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7103 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
7104 return;
7105 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7106 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
7107 return;
7108 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7109 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
7110 return;
7111 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7112 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
7113 return;
7114 }
7115 break;
7116 }
7117 case AArch64ISD::LD1x3post: {
7118 if (VT == MVT::v8i8) {
7119 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
7120 return;
7121 } else if (VT == MVT::v16i8) {
7122 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
7123 return;
7124 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7125 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
7126 return;
7127 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7128 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
7129 return;
7130 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7131 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
7132 return;
7133 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7134 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
7135 return;
7136 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7137 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
7138 return;
7139 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7140 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
7141 return;
7142 }
7143 break;
7144 }
7145 case AArch64ISD::LD1x4post: {
7146 if (VT == MVT::v8i8) {
7147 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
7148 return;
7149 } else if (VT == MVT::v16i8) {
7150 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
7151 return;
7152 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7153 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
7154 return;
7155 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7156 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
7157 return;
7158 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7159 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
7160 return;
7161 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7162 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
7163 return;
7164 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7165 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7166 return;
7167 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7168 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
7169 return;
7170 }
7171 break;
7172 }
7173 case AArch64ISD::LD1DUPpost: {
7174 if (VT == MVT::v8i8) {
7175 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
7176 return;
7177 } else if (VT == MVT::v16i8) {
7178 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
7179 return;
7180 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7181 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
7182 return;
7183 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7184 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
7185 return;
7186 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7187 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
7188 return;
7189 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7190 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
7191 return;
7192 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7193 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
7194 return;
7195 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7196 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
7197 return;
7198 }
7199 break;
7200 }
7201 case AArch64ISD::LD2DUPpost: {
7202 if (VT == MVT::v8i8) {
7203 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
7204 return;
7205 } else if (VT == MVT::v16i8) {
7206 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
7207 return;
7208 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7209 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
7210 return;
7211 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7212 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
7213 return;
7214 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7215 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
7216 return;
7217 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7218 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
7219 return;
7220 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7221 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
7222 return;
7223 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7224 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
7225 return;
7226 }
7227 break;
7228 }
7229 case AArch64ISD::LD3DUPpost: {
7230 if (VT == MVT::v8i8) {
7231 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7232 return;
7233 } else if (VT == MVT::v16i8) {
7234 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7235 return;
7236 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7237 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7238 return;
7239 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7240 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7241 return;
7242 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7243 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7244 return;
7245 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7246 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7247 return;
7248 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7249 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7250 return;
7251 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7252 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7253 return;
7254 }
7255 break;
7256 }
7257 case AArch64ISD::LD4DUPpost: {
7258 if (VT == MVT::v8i8) {
7259 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7260 return;
7261 } else if (VT == MVT::v16i8) {
7262 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7263 return;
7264 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7265 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7266 return;
7267 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7268 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7269 return;
7270 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7271 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7272 return;
7273 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7274 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7275 return;
7276 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7277 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7278 return;
7279 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7280 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7281 return;
7282 }
7283 break;
7284 }
7285 case AArch64ISD::LD1LANEpost: {
7286 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7287 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7288 return;
7289 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7290 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7291 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7292 return;
7293 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7294 VT == MVT::v2f32) {
7295 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7296 return;
7297 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7298 VT == MVT::v1f64) {
7299 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7300 return;
7301 }
7302 break;
7303 }
7304 case AArch64ISD::LD2LANEpost: {
7305 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7306 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7307 return;
7308 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7309 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7310 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7311 return;
7312 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7313 VT == MVT::v2f32) {
7314 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7315 return;
7316 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7317 VT == MVT::v1f64) {
7318 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7319 return;
7320 }
7321 break;
7322 }
7323 case AArch64ISD::LD3LANEpost: {
7324 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7325 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7326 return;
7327 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7328 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7329 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7330 return;
7331 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7332 VT == MVT::v2f32) {
7333 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7334 return;
7335 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7336 VT == MVT::v1f64) {
7337 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7338 return;
7339 }
7340 break;
7341 }
7342 case AArch64ISD::LD4LANEpost: {
7343 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7344 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7345 return;
7346 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7347 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7348 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7349 return;
7350 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7351 VT == MVT::v2f32) {
7352 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7353 return;
7354 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7355 VT == MVT::v1f64) {
7356 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7357 return;
7358 }
7359 break;
7360 }
7361 case AArch64ISD::ST2post: {
7362 VT = Node->getOperand(1).getValueType();
7363 if (VT == MVT::v8i8) {
7364 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7365 return;
7366 } else if (VT == MVT::v16i8) {
7367 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7368 return;
7369 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7370 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7371 return;
7372 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7373 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7374 return;
7375 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7376 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7377 return;
7378 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7379 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7380 return;
7381 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7382 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7383 return;
7384 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7385 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7386 return;
7387 }
7388 break;
7389 }
7390 case AArch64ISD::ST3post: {
7391 VT = Node->getOperand(1).getValueType();
7392 if (VT == MVT::v8i8) {
7393 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7394 return;
7395 } else if (VT == MVT::v16i8) {
7396 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7397 return;
7398 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7399 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7400 return;
7401 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7402 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7403 return;
7404 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7405 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7406 return;
7407 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7408 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7409 return;
7410 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7411 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7412 return;
7413 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7414 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7415 return;
7416 }
7417 break;
7418 }
7419 case AArch64ISD::ST4post: {
7420 VT = Node->getOperand(1).getValueType();
7421 if (VT == MVT::v8i8) {
7422 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7423 return;
7424 } else if (VT == MVT::v16i8) {
7425 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7426 return;
7427 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7428 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7429 return;
7430 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7431 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7432 return;
7433 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7434 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7435 return;
7436 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7437 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7438 return;
7439 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7440 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7441 return;
7442 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7443 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7444 return;
7445 }
7446 break;
7447 }
7448 case AArch64ISD::ST1x2post: {
7449 VT = Node->getOperand(1).getValueType();
7450 if (VT == MVT::v8i8) {
7451 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7452 return;
7453 } else if (VT == MVT::v16i8) {
7454 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7455 return;
7456 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7457 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7458 return;
7459 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7460 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7461 return;
7462 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7463 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7464 return;
7465 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7466 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7467 return;
7468 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7469 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7470 return;
7471 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7472 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7473 return;
7474 }
7475 break;
7476 }
7477 case AArch64ISD::ST1x3post: {
7478 VT = Node->getOperand(1).getValueType();
7479 if (VT == MVT::v8i8) {
7480 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7481 return;
7482 } else if (VT == MVT::v16i8) {
7483 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7484 return;
7485 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7486 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7487 return;
7488 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7489 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7490 return;
7491 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7492 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7493 return;
7494 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7495 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7496 return;
7497 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7498 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7499 return;
7500 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7501 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7502 return;
7503 }
7504 break;
7505 }
7506 case AArch64ISD::ST1x4post: {
7507 VT = Node->getOperand(1).getValueType();
7508 if (VT == MVT::v8i8) {
7509 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7510 return;
7511 } else if (VT == MVT::v16i8) {
7512 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7513 return;
7514 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7515 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7516 return;
7517 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7518 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7519 return;
7520 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7521 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7522 return;
7523 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7524 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7525 return;
7526 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7527 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7528 return;
7529 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7530 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7531 return;
7532 }
7533 break;
7534 }
7535 case AArch64ISD::ST2LANEpost: {
7536 VT = Node->getOperand(1).getValueType();
7537 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7538 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7539 return;
7540 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7541 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7542 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7543 return;
7544 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7545 VT == MVT::v2f32) {
7546 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7547 return;
7548 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7549 VT == MVT::v1f64) {
7550 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7551 return;
7552 }
7553 break;
7554 }
7555 case AArch64ISD::ST3LANEpost: {
7556 VT = Node->getOperand(1).getValueType();
7557 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7558 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7559 return;
7560 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7561 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7562 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7563 return;
7564 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7565 VT == MVT::v2f32) {
7566 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7567 return;
7568 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7569 VT == MVT::v1f64) {
7570 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7571 return;
7572 }
7573 break;
7574 }
7575 case AArch64ISD::ST4LANEpost: {
7576 VT = Node->getOperand(1).getValueType();
7577 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7578 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7579 return;
7580 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7581 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7582 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7583 return;
7584 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7585 VT == MVT::v2f32) {
7586 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7587 return;
7588 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7589 VT == MVT::v1f64) {
7590 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7591 return;
7592 }
7593 break;
7594 }
7595 }
7596
7597 // Select the default instruction
7598 SelectCode(Node);
7599}
7600
7601/// createAArch64ISelDag - This pass converts a legalized DAG into a
7602/// AArch64-specific DAG, ready for instruction scheduling.
7604 CodeGenOptLevel OptLevel) {
7605 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7606}
7607
7608/// When \p PredVT is a scalable vector predicate in the form
7609/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7610/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7611/// structured vectors (NumVec >1), the output data type is
7612/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7613/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7614/// EVT.
7616 unsigned NumVec) {
7617 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7618 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7619 return EVT();
7620
7621 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7622 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7623 return EVT();
7624
7625 ElementCount EC = PredVT.getVectorElementCount();
7626 EVT ScalarVT =
7627 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7628 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7629
7630 return MemVT;
7631}
7632
7633/// Return the EVT of the data associated to a memory operation in \p
7634/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7636 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7637 return MemIntr->getMemoryVT();
7638
7639 if (isa<MemSDNode>(Root)) {
7640 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7641
7642 EVT DataVT;
7643 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7644 DataVT = Load->getValueType(0);
7645 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7646 DataVT = Load->getValueType(0);
7647 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7648 DataVT = Store->getValue().getValueType();
7649 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7650 DataVT = Store->getValue().getValueType();
7651 else
7652 llvm_unreachable("Unexpected MemSDNode!");
7653
7654 return DataVT.changeVectorElementType(Ctx, MemVT.getVectorElementType());
7655 }
7656
7657 const unsigned Opcode = Root->getOpcode();
7658 // For custom ISD nodes, we have to look at them individually to extract the
7659 // type of the data moved to/from memory.
7660 switch (Opcode) {
7661 case AArch64ISD::LD1_MERGE_ZERO:
7662 case AArch64ISD::LD1S_MERGE_ZERO:
7663 case AArch64ISD::LDNF1_MERGE_ZERO:
7664 case AArch64ISD::LDNF1S_MERGE_ZERO:
7665 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7666 case AArch64ISD::ST1_PRED:
7667 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7668 default:
7669 break;
7670 }
7671
7672 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7673 return EVT();
7674
7675 switch (Root->getConstantOperandVal(1)) {
7676 default:
7677 return EVT();
7678 case Intrinsic::aarch64_sme_ldr:
7679 case Intrinsic::aarch64_sme_str:
7680 return MVT::nxv16i8;
7681 case Intrinsic::aarch64_sve_prf:
7682 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7683 // width of the predicate.
7685 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7686 case Intrinsic::aarch64_sve_ld2_sret:
7687 case Intrinsic::aarch64_sve_ld2q_sret:
7689 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7690 case Intrinsic::aarch64_sve_st2q:
7692 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7693 case Intrinsic::aarch64_sve_ld3_sret:
7694 case Intrinsic::aarch64_sve_ld3q_sret:
7696 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7697 case Intrinsic::aarch64_sve_st3q:
7699 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7700 case Intrinsic::aarch64_sve_ld4_sret:
7701 case Intrinsic::aarch64_sve_ld4q_sret:
7703 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7704 case Intrinsic::aarch64_sve_st4q:
7706 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7707 case Intrinsic::aarch64_sve_ld1udq:
7708 case Intrinsic::aarch64_sve_st1dq:
7709 return EVT(MVT::nxv1i64);
7710 case Intrinsic::aarch64_sve_ld1uwq:
7711 case Intrinsic::aarch64_sve_st1wq:
7712 return EVT(MVT::nxv1i32);
7713 }
7714}
7715
7716/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7717/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7718/// where Root is the memory access using N for its address.
7719template <int64_t Min, int64_t Max>
7720bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7721 SDValue &Base,
7722 SDValue &OffImm) {
7723 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7724 const DataLayout &DL = CurDAG->getDataLayout();
7725 const MachineFrameInfo &MFI = MF->getFrameInfo();
7726
7727 if (N.getOpcode() == ISD::FrameIndex) {
7728 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7729 // We can only encode VL scaled offsets, so only fold in frame indexes
7730 // referencing SVE objects.
7731 if (MFI.hasScalableStackID(FI)) {
7732 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7733 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7734 return true;
7735 }
7736
7737 return false;
7738 }
7739
7740 if (MemVT == EVT())
7741 return false;
7742
7743 if (N.getOpcode() != ISD::ADD)
7744 return false;
7745
7746 SDValue VScale = N.getOperand(1);
7747 int64_t MulImm = std::numeric_limits<int64_t>::max();
7748 if (VScale.getOpcode() == ISD::VSCALE) {
7749 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7750 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7751 int64_t ByteOffset = C->getSExtValue();
7752 const auto KnownVScale =
7754
7755 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7756 return false;
7757
7758 MulImm = ByteOffset / KnownVScale;
7759 } else
7760 return false;
7761
7762 TypeSize TS = MemVT.getSizeInBits();
7763 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7764
7765 if ((MulImm % MemWidthBytes) != 0)
7766 return false;
7767
7768 int64_t Offset = MulImm / MemWidthBytes;
7770 return false;
7771
7772 Base = N.getOperand(0);
7773 if (Base.getOpcode() == ISD::FrameIndex) {
7774 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7775 // We can only encode VL scaled offsets, so only fold in frame indexes
7776 // referencing SVE objects.
7777 if (MFI.hasScalableStackID(FI))
7778 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7779 }
7780
7781 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7782 return true;
7783}
7784
7785/// Select register plus register addressing mode for SVE, with scaled
7786/// offset.
7787bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7788 SDValue &Base,
7789 SDValue &Offset) {
7790 if (N.getOpcode() != ISD::ADD)
7791 return false;
7792
7793 // Process an ADD node.
7794 const SDValue LHS = N.getOperand(0);
7795 const SDValue RHS = N.getOperand(1);
7796
7797 // 8 bit data does not come with the SHL node, so it is treated
7798 // separately.
7799 if (Scale == 0) {
7800 Base = LHS;
7801 Offset = RHS;
7802 return true;
7803 }
7804
7805 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7806 int64_t ImmOff = C->getSExtValue();
7807 unsigned Size = 1 << Scale;
7808
7809 // To use the reg+reg addressing mode, the immediate must be a multiple of
7810 // the vector element's byte size.
7811 if (ImmOff % Size)
7812 return false;
7813
7814 SDLoc DL(N);
7815 Base = LHS;
7816 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7817 SDValue Ops[] = {Offset};
7818 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7819 Offset = SDValue(MI, 0);
7820 return true;
7821 }
7822
7823 // Check if the RHS is a shift node with a constant.
7824 if (RHS.getOpcode() != ISD::SHL)
7825 return false;
7826
7827 const SDValue ShiftRHS = RHS.getOperand(1);
7828 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7829 if (C->getZExtValue() == Scale) {
7830 Base = LHS;
7831 Offset = RHS.getOperand(0);
7832 return true;
7833 }
7834
7835 return false;
7836}
7837
7838bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7839 const AArch64TargetLowering *TLI =
7840 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7841
7842 return TLI->isAllActivePredicate(*CurDAG, N);
7843}
7844
7845bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7846 EVT VT = N.getValueType();
7847 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7848}
7849
7850bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7852 unsigned Scale) {
7853 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7854 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7855 int64_t ImmOff = C->getSExtValue();
7856 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7857 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7858 }
7859 return SDValue();
7860 };
7861
7862 if (SDValue C = MatchConstantOffset(N)) {
7863 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7864 Offset = C;
7865 return true;
7866 }
7867
7868 // Try to untangle an ADD node into a 'reg + offset'
7869 if (CurDAG->isBaseWithConstantOffset(N)) {
7870 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7871 Base = N.getOperand(0);
7872 Offset = C;
7873 return true;
7874 }
7875 }
7876
7877 // By default, just match reg + 0.
7878 Base = N;
7879 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7880 return true;
7881}
7882
7883bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7884 SDValue &Imm) {
7886 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7887 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7888 // Check conservatively if the immediate fits the valid range [0, 64).
7889 // Immediate variants for GE and HS definitely need to be decremented
7890 // when lowering the pseudos later, so an immediate of 1 would become 0.
7891 // For the inverse conditions LT and LO we don't know for sure if they
7892 // will need a decrement but should the decision be made to reverse the
7893 // branch condition, we again end up with the need to decrement.
7894 // The same argument holds for LE, LS, GT and HI and possibly
7895 // incremented immediates. This can lead to slightly less optimal
7896 // codegen, e.g. we never codegen the legal case
7897 // cblt w0, #63, A
7898 // because we could end up with the illegal case
7899 // cbge w0, #64, B
7900 // should the decision to reverse the branch direction be made. For the
7901 // lower bound cases this is no problem since we can express comparisons
7902 // against 0 with either tbz/tnbz or using wzr/xzr.
7903 uint64_t LowerBound = 0, UpperBound = 64;
7904 switch (CC) {
7905 case AArch64CC::GE:
7906 case AArch64CC::HS:
7907 case AArch64CC::LT:
7908 case AArch64CC::LO:
7909 LowerBound = 1;
7910 break;
7911 case AArch64CC::LE:
7912 case AArch64CC::LS:
7913 case AArch64CC::GT:
7914 case AArch64CC::HI:
7915 UpperBound = 63;
7916 break;
7917 default:
7918 break;
7919 }
7920
7921 if (CN->getAPIntValue().uge(LowerBound) &&
7922 CN->getAPIntValue().ult(UpperBound)) {
7923 SDLoc DL(N);
7924 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
7925 return true;
7926 }
7927 }
7928
7929 return false;
7930}
7931
7932template <bool MatchCBB>
7933bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
7934 SDValue &ExtType) {
7935
7936 // Use an invalid shift-extend value to indicate we don't need to extend later
7937 if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) {
7938 EVT Ty = cast<VTSDNode>(N.getOperand(1))->getVT();
7939 if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
7940 return false;
7941 Reg = N.getOperand(0);
7942 ExtType = CurDAG->getSignedTargetConstant(AArch64_AM::InvalidShiftExtend,
7943 SDLoc(N), MVT::i32);
7944 return true;
7945 }
7946
7948
7949 if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) ||
7950 (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) {
7951 Reg = N.getOperand(0);
7952 ExtType =
7953 CurDAG->getTargetConstant(getExtendEncoding(ET), SDLoc(N), MVT::i32);
7954 return true;
7955 }
7956
7957 return false;
7958}
7959
7960void AArch64DAGToDAGISel::PreprocessISelDAG() {
7961 bool MadeChange = false;
7962 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
7963 if (N.use_empty())
7964 continue;
7965
7967 switch (N.getOpcode()) {
7968 case ISD::SCALAR_TO_VECTOR: {
7969 EVT ScalarTy = N.getValueType(0).getVectorElementType();
7970 if ((ScalarTy == MVT::i32 || ScalarTy == MVT::i64) &&
7971 ScalarTy == N.getOperand(0).getValueType())
7972 Result = addBitcastHints(*CurDAG, N);
7973
7974 break;
7975 }
7976 default:
7977 break;
7978 }
7979
7980 if (Result) {
7981 LLVM_DEBUG(dbgs() << "AArch64 DAG preprocessing replacing:\nOld: ");
7982 LLVM_DEBUG(N.dump(CurDAG));
7983 LLVM_DEBUG(dbgs() << "\nNew: ");
7984 LLVM_DEBUG(Result.dump(CurDAG));
7985 LLVM_DEBUG(dbgs() << "\n");
7986
7987 CurDAG->ReplaceAllUsesOfValueWith(SDValue(&N, 0), Result);
7988 MadeChange = true;
7989 }
7990 }
7991
7992 if (MadeChange)
7993 CurDAG->RemoveDeadNodes();
7994
7996}
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static SDValue addBitcastHints(SelectionDAG &DAG, SDNode &N)
addBitcastHints - This method adds bitcast hints to the operands of a node to help instruction select...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static std::optional< APInt > DecodeNEONSplat(SDValue N)
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
Value * RHS
Value * LHS
const AArch64RegisterInfo * getRegisterInfo() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1685
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1044
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1654
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1613
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1467
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasScalableStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
LLVM Value Representation.
Definition Value.h:75
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:967
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t decodeAdvSIMDModImmType12(uint8_t Imm)
static uint64_t decodeAdvSIMDModImmType11(uint8_t Imm)
unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET)
Mapping from extend bits to required operation: shifter: 000 ==> uxtb 001 ==> uxth 010 ==> uxtw 011 =...
static bool isSVELogicalImm(unsigned SizeInBits, uint64_t ImmVal, uint64_t &Encoding)
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
isSignExtendShiftType - Returns true if Type is sign extending.
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
unsigned CheckFixedPointOperandConstant(APFloat &FVal, unsigned RegWidth, bool isReciprocal)
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2026
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
Extended Value Type.
Definition ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:479
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:367
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:215
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
bool isFixedLengthVector() const
Definition ValueTypes.h:189
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:182
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:210
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.