LLVM 23.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64 void PreprocessISelDAG() override;
65
66 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
67 /// inline asm expressions.
68 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
69 InlineAsm::ConstraintCode ConstraintID,
70 std::vector<SDValue> &OutOps) override;
71
72 template <signed Low, signed High, signed Scale>
73 bool SelectRDVLImm(SDValue N, SDValue &Imm);
74
75 template <signed Low, signed High>
76 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
77
78 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
79 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
80 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
81 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
82 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
83 return SelectShiftedRegister(N, false, Reg, Shift);
84 }
85 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
86 return SelectShiftedRegister(N, true, Reg, Shift);
87 }
88 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
89 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
90 }
91 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
92 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
93 }
94 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
95 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
96 }
97 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
98 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
99 }
100 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
101 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
102 }
103 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
104 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
105 }
106 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
107 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
108 }
109 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
110 return SelectAddrModeIndexed(N, 1, Base, OffImm);
111 }
112 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
113 return SelectAddrModeIndexed(N, 2, Base, OffImm);
114 }
115 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
116 return SelectAddrModeIndexed(N, 4, Base, OffImm);
117 }
118 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
119 return SelectAddrModeIndexed(N, 8, Base, OffImm);
120 }
121 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
122 return SelectAddrModeIndexed(N, 16, Base, OffImm);
123 }
124 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
125 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
126 }
127 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
128 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
129 }
130 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
131 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
132 }
133 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
134 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
135 }
136 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
137 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
138 }
139 template <unsigned Size, unsigned Max>
140 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
141 // Test if there is an appropriate addressing mode and check if the
142 // immediate fits.
143 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
144 if (Found) {
145 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
146 int64_t C = CI->getSExtValue();
147 if (C <= Max)
148 return true;
149 }
150 }
151
152 // Otherwise, base only, materialize address in register.
153 Base = N;
154 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
155 return true;
156 }
157
158 template<int Width>
159 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
160 SDValue &SignExtend, SDValue &DoShift) {
161 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
162 }
163
164 template<int Width>
165 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
166 SDValue &SignExtend, SDValue &DoShift) {
167 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
168 }
169
170 bool SelectExtractHigh(SDValue N, SDValue &Res) {
171 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
172 N = N->getOperand(0);
173 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
174 !isa<ConstantSDNode>(N->getOperand(1)))
175 return false;
176 EVT VT = N->getValueType(0);
177 EVT LVT = N->getOperand(0).getValueType();
178 unsigned Index = N->getConstantOperandVal(1);
179 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
180 Index != VT.getVectorNumElements())
181 return false;
182 Res = N->getOperand(0);
183 return true;
184 }
185
186 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
187 if (N.getOpcode() != AArch64ISD::VLSHR)
188 return false;
189 SDValue Op = N->getOperand(0);
190 EVT VT = Op.getValueType();
191 unsigned ShtAmt = N->getConstantOperandVal(1);
192 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
193 return false;
194
195 APInt Imm;
196 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
197 Imm = APInt(VT.getScalarSizeInBits(),
198 Op.getOperand(1).getConstantOperandVal(0)
199 << Op.getOperand(1).getConstantOperandVal(1));
200 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
201 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
202 Imm = APInt(VT.getScalarSizeInBits(),
203 Op.getOperand(1).getConstantOperandVal(0));
204 else
205 return false;
206
207 if (Imm != 1ULL << (ShtAmt - 1))
208 return false;
209
210 Res1 = Op.getOperand(0);
211 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
212 return true;
213 }
214
215 bool SelectDupZeroOrUndef(SDValue N) {
216 switch(N->getOpcode()) {
217 case ISD::UNDEF:
218 return true;
219 case AArch64ISD::DUP:
220 case ISD::SPLAT_VECTOR: {
221 auto Opnd0 = N->getOperand(0);
222 if (isNullConstant(Opnd0))
223 return true;
224 if (isNullFPConstant(Opnd0))
225 return true;
226 break;
227 }
228 default:
229 break;
230 }
231
232 return false;
233 }
234
235 bool SelectAny(SDValue) { return true; }
236
237 bool SelectDupZero(SDValue N) {
238 switch(N->getOpcode()) {
239 case AArch64ISD::DUP:
240 case ISD::SPLAT_VECTOR: {
241 auto Opnd0 = N->getOperand(0);
242 if (isNullConstant(Opnd0))
243 return true;
244 if (isNullFPConstant(Opnd0))
245 return true;
246 break;
247 }
248 }
249
250 return false;
251 }
252
253 template <MVT::SimpleValueType VT, bool Negate>
254 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
255 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
256 }
257
258 template <MVT::SimpleValueType VT, bool Negate>
259 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
260 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
261 }
262
263 template <MVT::SimpleValueType VT>
264 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
265 return SelectSVECpyDupImm(N, VT, Imm, Shift);
266 }
267
268 template <MVT::SimpleValueType VT, bool Invert = false>
269 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
270 return SelectSVELogicalImm(N, VT, Imm, Invert);
271 }
272
273 template <MVT::SimpleValueType VT>
274 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
275 return SelectSVEArithImm(N, VT, Imm);
276 }
277
278 template <unsigned Low, unsigned High, bool AllowSaturation = false>
279 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
280 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
281 }
282
283 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
284 if (N->getOpcode() != ISD::SPLAT_VECTOR)
285 return false;
286
287 EVT EltVT = N->getValueType(0).getVectorElementType();
288 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
289 /* High */ EltVT.getFixedSizeInBits(),
290 /* AllowSaturation */ true, Imm);
291 }
292
293 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
294 template<signed Min, signed Max, signed Scale, bool Shift>
295 bool SelectCntImm(SDValue N, SDValue &Imm) {
297 return false;
298
299 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
300 if (Shift)
301 MulImm = 1LL << MulImm;
302
303 if ((MulImm % std::abs(Scale)) != 0)
304 return false;
305
306 MulImm /= Scale;
307 if ((MulImm >= Min) && (MulImm <= Max)) {
308 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
309 return true;
310 }
311
312 return false;
313 }
314
315 template <signed Max, signed Scale>
316 bool SelectEXTImm(SDValue N, SDValue &Imm) {
318 return false;
319
320 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
321
322 if (MulImm >= 0 && MulImm <= Max) {
323 MulImm *= Scale;
324 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
325 return true;
326 }
327
328 return false;
329 }
330
331 template <unsigned BaseReg, unsigned Max>
332 bool ImmToReg(SDValue N, SDValue &Imm) {
333 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
334 uint64_t C = CI->getZExtValue();
335
336 if (C > Max)
337 return false;
338
339 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
340 return true;
341 }
342 return false;
343 }
344
345 /// Form sequences of consecutive 64/128-bit registers for use in NEON
346 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
347 /// between 1 and 4 elements. If it contains a single element that is returned
348 /// unchanged; otherwise a REG_SEQUENCE value is returned.
351 // Form a sequence of SVE registers for instructions using list of vectors,
352 // e.g. structured loads and stores (ldN, stN).
353 SDValue createZTuple(ArrayRef<SDValue> Vecs);
354
355 // Similar to above, except the register must start at a multiple of the
356 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
357 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
358
359 /// Generic helper for the createDTuple/createQTuple
360 /// functions. Those should almost always be called instead.
361 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
362 const unsigned SubRegs[]);
363
364 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
365
366 bool tryIndexedLoad(SDNode *N);
367
368 void SelectPtrauthAuth(SDNode *N);
369 void SelectPtrauthResign(SDNode *N);
370
371 bool trySelectStackSlotTagP(SDNode *N);
372 void SelectTagP(SDNode *N);
373
374 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
375 unsigned SubRegIdx);
376 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
377 unsigned SubRegIdx);
378 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
380 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
381 unsigned Opc_rr, unsigned Opc_ri,
382 bool IsIntr = false);
383 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
384 unsigned Scale, unsigned Opc_ri,
385 unsigned Opc_rr);
386 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
387 bool IsZmMulti, unsigned Opcode,
388 bool HasPred = false);
389 void SelectPExtPair(SDNode *N, unsigned Opc);
390 void SelectWhilePair(SDNode *N, unsigned Opc);
391 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
394 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
395 bool IsTupleInput, unsigned Opc);
396 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
397
398 template <unsigned MaxIdx, unsigned Scale>
399 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
400 unsigned Op);
401 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
402 unsigned Op, unsigned MaxIdx, unsigned Scale,
403 unsigned BaseReg = 0);
404 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
405 /// SVE Reg+Imm addressing mode.
406 template <int64_t Min, int64_t Max>
407 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
408 SDValue &OffImm);
409 /// SVE Reg+Reg address mode.
410 template <unsigned Scale>
411 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
412 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
413 }
414
415 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
416 unsigned Opc, uint32_t MaxImm);
417
418 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
419
420 template <unsigned MaxIdx, unsigned Scale>
421 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
422 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
423 }
424
425 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
426 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
429 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
430 unsigned Opc_rr, unsigned Opc_ri);
431 std::tuple<unsigned, SDValue, SDValue>
432 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
433 const SDValue &OldBase, const SDValue &OldOffset,
434 unsigned Scale);
435
436 bool tryBitfieldExtractOp(SDNode *N);
437 bool tryBitfieldExtractOpFromSExt(SDNode *N);
438 bool tryBitfieldInsertOp(SDNode *N);
439 bool tryBitfieldInsertInZeroOp(SDNode *N);
440 bool tryShiftAmountMod(SDNode *N);
441
442 bool tryReadRegister(SDNode *N);
443 bool tryWriteRegister(SDNode *N);
444
445 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
446 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
447
448 bool trySelectXAR(SDNode *N);
449
450// Include the pieces autogenerated from the target description.
451#include "AArch64GenDAGISel.inc"
452
453private:
454 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
455 SDValue &Shift);
456 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
457 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
458 SDValue &OffImm) {
459 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
460 }
461 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
462 unsigned Size, SDValue &Base,
463 SDValue &OffImm);
464 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
465 SDValue &OffImm);
466 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
467 SDValue &OffImm);
468 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
469 SDValue &Offset, SDValue &SignExtend,
470 SDValue &DoShift);
471 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
472 SDValue &Offset, SDValue &SignExtend,
473 SDValue &DoShift);
474 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
475 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
476 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
477 SDValue &Offset, SDValue &SignExtend);
478
479 template<unsigned RegWidth>
480 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
481 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
482 }
483 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
484
485 template <unsigned RegWidth>
486 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos) {
487 return SelectCVTFixedPointVec(N, FixedPos, RegWidth);
488 }
489 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos, unsigned Width);
490
491 template<unsigned RegWidth>
492 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
493 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
494 }
495
496 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
497 unsigned Width);
498
499 bool SelectCMP_SWAP(SDNode *N);
500
501 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
502 bool Negate);
503 bool SelectSVEAddSubImm(SDLoc DL, APInt Value, MVT VT, SDValue &Imm,
504 SDValue &Shift, bool Negate);
505 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
506 bool Negate);
507 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
508 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
509
510 // Match `<NEON Splat> SVEImm` (where <NEON Splat> could be fmov, movi, etc).
511 bool SelectNEONSplatOfSVELogicalImm(SDValue N, SDValue &Imm);
512 bool SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift);
513 bool SelectNEONSplatOfSVEArithSImm(SDValue N, SDValue &Imm);
514
515 bool SelectSVESignedArithImm(SDLoc DL, APInt Value, SDValue &Imm);
516 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
517 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
518 bool AllowSaturation, SDValue &Imm);
519
520 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
521 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
522 SDValue &Offset);
523 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
524 SDValue &Offset, unsigned Scale = 1);
525
526 bool SelectAllActivePredicate(SDValue N);
527 bool SelectAnyPredicate(SDValue N);
528
529 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
530
531 template <bool MatchCBB>
532 bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
533};
534
535class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
536public:
537 static char ID;
538 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
539 CodeGenOptLevel OptLevel)
541 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
542};
543} // end anonymous namespace
544
545char AArch64DAGToDAGISelLegacy::ID = 0;
546
547INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
548
549/// addBitcastHints - This method adds bitcast hints to the operands of a node
550/// to help instruction selector determine which operands are in Neon registers.
552 SDLoc DL(&N);
553 auto getFloatVT = [&](EVT VT) {
554 EVT ScalarVT = VT.getScalarType();
555 assert((ScalarVT == MVT::i32 || ScalarVT == MVT::i64) && "Unexpected VT");
556 return VT.changeElementType(*(DAG.getContext()),
557 ScalarVT == MVT::i32 ? MVT::f32 : MVT::f64);
558 };
560 NewOps.reserve(N.getNumOperands());
561
562 for (unsigned I = 0, E = N.getNumOperands(); I < E; ++I) {
563 auto bitcasted = DAG.getBitcast(getFloatVT(N.getOperand(I).getValueType()),
564 N.getOperand(I));
565 NewOps.push_back(bitcasted);
566 }
567 EVT OrigVT = N.getValueType(0);
568 SDValue OpNode = DAG.getNode(N.getOpcode(), DL, getFloatVT(OrigVT), NewOps);
569 return DAG.getBitcast(OrigVT, OpNode);
570}
571
572/// isIntImmediate - This method tests to see if the node is a constant
573/// operand. If so Imm will receive the 32-bit value.
574static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
576 Imm = C->getZExtValue();
577 return true;
578 }
579 return false;
580}
581
582// isIntImmediate - This method tests to see if a constant operand.
583// If so Imm will receive the value.
584static bool isIntImmediate(SDValue N, uint64_t &Imm) {
585 return isIntImmediate(N.getNode(), Imm);
586}
587
588// isOpcWithIntImmediate - This method tests to see if the node is a specific
589// opcode and that it has a immediate integer right operand.
590// If so Imm will receive the 32 bit value.
591static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
592 uint64_t &Imm) {
593 return N->getOpcode() == Opc &&
594 isIntImmediate(N->getOperand(1).getNode(), Imm);
595}
596
597// isIntImmediateEq - This method tests to see if N is a constant operand that
598// is equivalent to 'ImmExpected'.
599#ifndef NDEBUG
600static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
601 uint64_t Imm;
602 if (!isIntImmediate(N.getNode(), Imm))
603 return false;
604 return Imm == ImmExpected;
605}
606#endif
607
608static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth) {
609 assert(RegWidth == 32 || RegWidth == 64);
610 if (RegWidth == 32)
611 return APInt(RegWidth,
613 return APInt(RegWidth, AArch64_AM::decodeAdvSIMDModImmType12(Imm));
614}
615
616// Decodes the raw integer splat value from a NEON splat operation.
617static std::optional<APInt> DecodeNEONSplat(SDValue N) {
618 assert(N.getValueType().isInteger() && "Only integers are supported");
619 if (N->getOpcode() == AArch64ISD::NVCAST)
620 N = N->getOperand(0);
621 unsigned SplatWidth = N.getScalarValueSizeInBits();
622 if (N.getOpcode() == AArch64ISD::FMOV)
623 return DecodeFMOVImm(N.getConstantOperandVal(0), SplatWidth);
624 if (N->getOpcode() == AArch64ISD::MOVI)
625 return APInt(SplatWidth, N.getConstantOperandVal(0));
626 if (N->getOpcode() == AArch64ISD::MOVIshift)
627 return APInt(SplatWidth, N.getConstantOperandVal(0)
628 << N.getConstantOperandVal(1));
629 if (N->getOpcode() == AArch64ISD::MVNIshift)
630 return ~APInt(SplatWidth, N.getConstantOperandVal(0)
631 << N.getConstantOperandVal(1));
632 if (N->getOpcode() == AArch64ISD::MOVIedit)
634 N.getConstantOperandVal(0)));
635 if (N->getOpcode() == AArch64ISD::DUP)
636 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(0)))
637 return Const->getAPIntValue().trunc(SplatWidth);
638 // TODO: Recognize more splat-like NEON operations. See ConstantBuildVector
639 // in AArch64ISelLowering.
640 return std::nullopt;
641}
642
643// If \p N is a NEON splat operation (movi, fmov, etc), return the splat value
644// matching the element size of N.
645static std::optional<APInt> GetNEONSplatValue(SDValue N) {
646 unsigned SplatWidth = N.getScalarValueSizeInBits();
647 if (std::optional<APInt> SplatVal = DecodeNEONSplat(N)) {
648 if (SplatVal->getBitWidth() <= SplatWidth)
649 return APInt::getSplat(SplatWidth, *SplatVal);
650 if (SplatVal->isSplat(SplatWidth))
651 return SplatVal->trunc(SplatWidth);
652 }
653 return std::nullopt;
654}
655
656bool AArch64DAGToDAGISel::SelectNEONSplatOfSVELogicalImm(SDValue N,
657 SDValue &Imm) {
658 std::optional<APInt> ImmVal = GetNEONSplatValue(N);
659 if (!ImmVal)
660 return false;
661 uint64_t Encoding;
662 if (!AArch64_AM::isSVELogicalImm(N.getScalarValueSizeInBits(),
663 ImmVal->getZExtValue(), Encoding))
664 return false;
665
666 Imm = CurDAG->getTargetConstant(Encoding, SDLoc(N), MVT::i64);
667 return true;
668}
669
670bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm,
671 SDValue &Shift) {
672 if (std::optional<APInt> ImmVal = GetNEONSplatValue(N))
673 return SelectSVEAddSubImm(SDLoc(N), *ImmVal,
674 N.getValueType().getScalarType().getSimpleVT(),
675 Imm, Shift,
676 /*Negate=*/false);
677 return false;
678}
679
680bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEArithSImm(SDValue N,
681 SDValue &Imm) {
682 if (std::optional<APInt> ImmVal = GetNEONSplatValue(N))
683 return SelectSVESignedArithImm(SDLoc(N), *ImmVal, Imm);
684 return false;
685}
686
687bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
688 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
689 std::vector<SDValue> &OutOps) {
690 switch(ConstraintID) {
691 default:
692 llvm_unreachable("Unexpected asm memory constraint");
693 case InlineAsm::ConstraintCode::m:
694 case InlineAsm::ConstraintCode::o:
695 case InlineAsm::ConstraintCode::Q:
696 // We need to make sure that this one operand does not end up in XZR, thus
697 // require the address to be in a PointerRegClass register.
698 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
699 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
700 SDLoc dl(Op);
701 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
702 SDValue NewOp =
703 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
704 dl, Op.getValueType(),
705 Op, RC), 0);
706 OutOps.push_back(NewOp);
707 return false;
708 }
709 return true;
710}
711
712/// SelectArithImmed - Select an immediate value that can be represented as
713/// a 12-bit value shifted left by either 0 or 12. If so, return true with
714/// Val set to the 12-bit value and Shift set to the shifter operand.
715bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
716 SDValue &Shift) {
717 // This function is called from the addsub_shifted_imm ComplexPattern,
718 // which lists [imm] as the list of opcode it's interested in, however
719 // we still need to check whether the operand is actually an immediate
720 // here because the ComplexPattern opcode list is only used in
721 // root-level opcode matching.
722 if (!isa<ConstantSDNode>(N.getNode()))
723 return false;
724
725 uint64_t Immed = N.getNode()->getAsZExtVal();
726 unsigned ShiftAmt;
727
728 if (Immed >> 12 == 0) {
729 ShiftAmt = 0;
730 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
731 ShiftAmt = 12;
732 Immed = Immed >> 12;
733 } else
734 return false;
735
736 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
737 SDLoc dl(N);
738 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
739 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
740 return true;
741}
742
743/// SelectNegArithImmed - As above, but negates the value before trying to
744/// select it.
745bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
746 SDValue &Shift) {
747 // This function is called from the addsub_shifted_imm ComplexPattern,
748 // which lists [imm] as the list of opcode it's interested in, however
749 // we still need to check whether the operand is actually an immediate
750 // here because the ComplexPattern opcode list is only used in
751 // root-level opcode matching.
752 if (!isa<ConstantSDNode>(N.getNode()))
753 return false;
754
755 // The immediate operand must be a 24-bit zero-extended immediate.
756 uint64_t Immed = N.getNode()->getAsZExtVal();
757
758 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
759 // have the opposite effect on the C flag, so this pattern mustn't match under
760 // those circumstances.
761 if (Immed == 0)
762 return false;
763
764 if (N.getValueType() == MVT::i32)
765 Immed = ~((uint32_t)Immed) + 1;
766 else
767 Immed = ~Immed + 1ULL;
768 if (Immed & 0xFFFFFFFFFF000000ULL)
769 return false;
770
771 Immed &= 0xFFFFFFULL;
772 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
773 Shift);
774}
775
776/// getShiftTypeForNode - Translate a shift node to the corresponding
777/// ShiftType value.
779 switch (N.getOpcode()) {
780 default:
782 case ISD::SHL:
783 return AArch64_AM::LSL;
784 case ISD::SRL:
785 return AArch64_AM::LSR;
786 case ISD::SRA:
787 return AArch64_AM::ASR;
788 case ISD::ROTR:
789 return AArch64_AM::ROR;
790 }
791}
792
794 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
795}
796
797/// Determine whether it is worth it to fold SHL into the addressing
798/// mode.
800 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
801 // It is worth folding logical shift of up to three places.
802 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
803 if (!CSD)
804 return false;
805 unsigned ShiftVal = CSD->getZExtValue();
806 if (ShiftVal > 3)
807 return false;
808
809 // Check if this particular node is reused in any non-memory related
810 // operation. If yes, do not try to fold this node into the address
811 // computation, since the computation will be kept.
812 const SDNode *Node = V.getNode();
813 for (SDNode *UI : Node->users())
814 if (!isMemOpOrPrefetch(UI))
815 for (SDNode *UII : UI->users())
816 if (!isMemOpOrPrefetch(UII))
817 return false;
818 return true;
819}
820
821/// Determine whether it is worth to fold V into an extended register addressing
822/// mode.
823bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
824 // Trivial if we are optimizing for code size or if there is only
825 // one use of the value.
826 if (CurDAG->shouldOptForSize() || V.hasOneUse())
827 return true;
828
829 // If a subtarget has a slow shift, folding a shift into multiple loads
830 // costs additional micro-ops.
831 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
832 return false;
833
834 // Check whether we're going to emit the address arithmetic anyway because
835 // it's used by a non-address operation.
836 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
837 return true;
838 if (V.getOpcode() == ISD::ADD) {
839 const SDValue LHS = V.getOperand(0);
840 const SDValue RHS = V.getOperand(1);
841 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
842 return true;
843 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
844 return true;
845 }
846
847 // It hurts otherwise, since the value will be reused.
848 return false;
849}
850
851/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
852/// to select more shifted register
853bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
854 SDValue &Shift) {
855 EVT VT = N.getValueType();
856 if (VT != MVT::i32 && VT != MVT::i64)
857 return false;
858
859 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
860 return false;
861 SDValue LHS = N.getOperand(0);
862 if (!LHS->hasOneUse())
863 return false;
864
865 unsigned LHSOpcode = LHS->getOpcode();
866 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
867 return false;
868
869 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
870 if (!ShiftAmtNode)
871 return false;
872
873 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
874 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
875 if (!RHSC)
876 return false;
877
878 APInt AndMask = RHSC->getAPIntValue();
879 unsigned LowZBits, MaskLen;
880 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
881 return false;
882
883 unsigned BitWidth = N.getValueSizeInBits();
884 SDLoc DL(LHS);
885 uint64_t NewShiftC;
886 unsigned NewShiftOp;
887 if (LHSOpcode == ISD::SHL) {
888 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
889 // BitWidth != LowZBits + MaskLen doesn't match the pattern
890 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
891 return false;
892
893 NewShiftC = LowZBits - ShiftAmtC;
894 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
895 } else {
896 if (LowZBits == 0)
897 return false;
898
899 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
900 NewShiftC = LowZBits + ShiftAmtC;
901 if (NewShiftC >= BitWidth)
902 return false;
903
904 // SRA need all high bits
905 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
906 return false;
907
908 // SRL high bits can be 0 or 1
909 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
910 return false;
911
912 if (LHSOpcode == ISD::SRL)
913 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
914 else
915 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
916 }
917
918 assert(NewShiftC < BitWidth && "Invalid shift amount");
919 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
920 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
921 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
922 NewShiftAmt, BitWidthMinus1),
923 0);
924 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
925 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
926 return true;
927}
928
929/// getExtendTypeForNode - Translate an extend node to the corresponding
930/// ExtendType value.
932getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
933 if (N.getOpcode() == ISD::SIGN_EXTEND ||
934 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
935 EVT SrcVT;
936 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
937 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
938 else
939 SrcVT = N.getOperand(0).getValueType();
940
941 if (!IsLoadStore && SrcVT == MVT::i8)
942 return AArch64_AM::SXTB;
943 else if (!IsLoadStore && SrcVT == MVT::i16)
944 return AArch64_AM::SXTH;
945 else if (SrcVT == MVT::i32)
946 return AArch64_AM::SXTW;
947 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
948
950 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
951 N.getOpcode() == ISD::ANY_EXTEND) {
952 EVT SrcVT = N.getOperand(0).getValueType();
953 if (!IsLoadStore && SrcVT == MVT::i8)
954 return AArch64_AM::UXTB;
955 else if (!IsLoadStore && SrcVT == MVT::i16)
956 return AArch64_AM::UXTH;
957 else if (SrcVT == MVT::i32)
958 return AArch64_AM::UXTW;
959 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
960
962 } else if (N.getOpcode() == ISD::AND) {
963 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
964 if (!CSD)
966 uint64_t AndMask = CSD->getZExtValue();
967
968 switch (AndMask) {
969 default:
971 case 0xFF:
972 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
973 case 0xFFFF:
974 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
975 case 0xFFFFFFFF:
976 return AArch64_AM::UXTW;
977 }
978 }
979
981}
982
983/// Determine whether it is worth to fold V into an extended register of an
984/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
985/// instruction, and the shift should be treated as worth folding even if has
986/// multiple uses.
987bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
988 // Trivial if we are optimizing for code size or if there is only
989 // one use of the value.
990 if (CurDAG->shouldOptForSize() || V.hasOneUse())
991 return true;
992
993 // If a subtarget has a fastpath LSL we can fold a logical shift into
994 // the add/sub and save a cycle.
995 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
996 V.getConstantOperandVal(1) <= 4 &&
998 return true;
999
1000 // It hurts otherwise, since the value will be reused.
1001 return false;
1002}
1003
1004/// SelectShiftedRegister - Select a "shifted register" operand. If the value
1005/// is not shifted, set the Shift operand to default of "LSL 0". The logical
1006/// instructions allow the shifted register to be rotated, but the arithmetic
1007/// instructions do not. The AllowROR parameter specifies whether ROR is
1008/// supported.
1009bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
1010 SDValue &Reg, SDValue &Shift) {
1011 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
1012 return true;
1013
1015 if (ShType == AArch64_AM::InvalidShiftExtend)
1016 return false;
1017 if (!AllowROR && ShType == AArch64_AM::ROR)
1018 return false;
1019
1020 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1021 unsigned BitSize = N.getValueSizeInBits();
1022 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
1023 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
1024
1025 Reg = N.getOperand(0);
1026 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
1027 return isWorthFoldingALU(N, true);
1028 }
1029
1030 return false;
1031}
1032
1033/// Instructions that accept extend modifiers like UXTW expect the register
1034/// being extended to be a GPR32, but the incoming DAG might be acting on a
1035/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
1036/// this is the case.
1038 if (N.getValueType() == MVT::i32)
1039 return N;
1040
1041 SDLoc dl(N);
1042 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
1043}
1044
1045// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
1046template<signed Low, signed High, signed Scale>
1047bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
1048 if (!isa<ConstantSDNode>(N))
1049 return false;
1050
1051 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
1052 if ((MulImm % std::abs(Scale)) == 0) {
1053 int64_t RDVLImm = MulImm / Scale;
1054 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
1055 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
1056 return true;
1057 }
1058 }
1059
1060 return false;
1061}
1062
1063// Returns a suitable RDSVL multiplier from a left shift.
1064template <signed Low, signed High>
1065bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
1066 if (!isa<ConstantSDNode>(N))
1067 return false;
1068
1069 int64_t MulImm = 1LL << cast<ConstantSDNode>(N)->getSExtValue();
1070 if (MulImm >= Low && MulImm <= High) {
1071 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
1072 return true;
1073 }
1074
1075 return false;
1076}
1077
1078/// SelectArithExtendedRegister - Select a "extended register" operand. This
1079/// operand folds in an extend followed by an optional left shift.
1080bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
1081 SDValue &Shift) {
1082 unsigned ShiftVal = 0;
1084
1085 if (N.getOpcode() == ISD::SHL) {
1086 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1087 if (!CSD)
1088 return false;
1089 ShiftVal = CSD->getZExtValue();
1090 if (ShiftVal > 4)
1091 return false;
1092
1093 Ext = getExtendTypeForNode(N.getOperand(0));
1095 return false;
1096
1097 Reg = N.getOperand(0).getOperand(0);
1098 } else {
1099 Ext = getExtendTypeForNode(N);
1101 return false;
1102
1103 // Don't match sext of vector extracts. These can use SMOV, but if we match
1104 // this as an extended register, we'll always fold the extend into an ALU op
1105 // user of the extend (which results in a UMOV).
1107 SDValue Op = N.getOperand(0);
1108 if (Op->getOpcode() == ISD::ANY_EXTEND)
1109 Op = Op->getOperand(0);
1110 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
1111 Op.getOperand(0).getValueType().isFixedLengthVector())
1112 return false;
1113 }
1114
1115 Reg = N.getOperand(0);
1116
1117 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
1118 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
1119 auto isDef32 = [](SDValue N) {
1120 unsigned Opc = N.getOpcode();
1121 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
1124 Opc != ISD::FREEZE;
1125 };
1126 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
1127 isDef32(Reg))
1128 return false;
1129 }
1130
1131 // AArch64 mandates that the RHS of the operation must use the smallest
1132 // register class that could contain the size being extended from. Thus,
1133 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1134 // there might not be an actual 32-bit value in the program. We can
1135 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1136 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1137 Reg = narrowIfNeeded(CurDAG, Reg);
1138 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1139 MVT::i32);
1140 return isWorthFoldingALU(N);
1141}
1142
1143/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1144/// operand is referred by the instructions have SP operand
1145bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1146 SDValue &Shift) {
1147 unsigned ShiftVal = 0;
1149
1150 if (N.getOpcode() != ISD::SHL)
1151 return false;
1152
1153 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1154 if (!CSD)
1155 return false;
1156 ShiftVal = CSD->getZExtValue();
1157 if (ShiftVal > 4)
1158 return false;
1159
1160 Ext = AArch64_AM::UXTX;
1161 Reg = N.getOperand(0);
1162 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1163 MVT::i32);
1164 return isWorthFoldingALU(N);
1165}
1166
1167/// If there's a use of this ADDlow that's not itself a load/store then we'll
1168/// need to create a real ADD instruction from it anyway and there's no point in
1169/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1170/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1171/// leads to duplicated ADRP instructions.
1173 for (auto *User : N->users()) {
1174 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1175 User->getOpcode() != ISD::ATOMIC_LOAD &&
1176 User->getOpcode() != ISD::ATOMIC_STORE)
1177 return false;
1178
1179 // ldar and stlr have much more restrictive addressing modes (just a
1180 // register).
1181 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1182 return false;
1183 }
1184
1185 return true;
1186}
1187
1188/// Check if the immediate offset is valid as a scaled immediate.
1189static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1190 unsigned Size) {
1191 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1192 Offset < (Range << Log2_32(Size)))
1193 return true;
1194 return false;
1195}
1196
1197/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1198/// immediate" address. The "Size" argument is the size in bytes of the memory
1199/// reference, which determines the scale.
1200bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1201 unsigned BW, unsigned Size,
1202 SDValue &Base,
1203 SDValue &OffImm) {
1204 SDLoc dl(N);
1205 const DataLayout &DL = CurDAG->getDataLayout();
1206 const TargetLowering *TLI = getTargetLowering();
1207 if (N.getOpcode() == ISD::FrameIndex) {
1208 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1209 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1210 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1211 return true;
1212 }
1213
1214 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1215 // selected here doesn't support labels/immediates, only base+offset.
1216 if (CurDAG->isBaseWithConstantOffset(N)) {
1217 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1218 if (IsSignedImm) {
1219 int64_t RHSC = RHS->getSExtValue();
1220 unsigned Scale = Log2_32(Size);
1221 int64_t Range = 0x1LL << (BW - 1);
1222
1223 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1224 RHSC < (Range << Scale)) {
1225 Base = N.getOperand(0);
1226 if (Base.getOpcode() == ISD::FrameIndex) {
1227 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1228 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1229 }
1230 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1231 return true;
1232 }
1233 } else {
1234 // unsigned Immediate
1235 uint64_t RHSC = RHS->getZExtValue();
1236 unsigned Scale = Log2_32(Size);
1237 uint64_t Range = 0x1ULL << BW;
1238
1239 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1240 Base = N.getOperand(0);
1241 if (Base.getOpcode() == ISD::FrameIndex) {
1242 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1243 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1244 }
1245 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1246 return true;
1247 }
1248 }
1249 }
1250 }
1251 // Base only. The address will be materialized into a register before
1252 // the memory is accessed.
1253 // add x0, Xbase, #offset
1254 // stp x1, x2, [x0]
1255 Base = N;
1256 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1257 return true;
1258}
1259
1260/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1261/// immediate" address. The "Size" argument is the size in bytes of the memory
1262/// reference, which determines the scale.
1263bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1264 SDValue &Base, SDValue &OffImm) {
1265 SDLoc dl(N);
1266 const DataLayout &DL = CurDAG->getDataLayout();
1267 const TargetLowering *TLI = getTargetLowering();
1268 if (N.getOpcode() == ISD::FrameIndex) {
1269 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1270 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1271 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1272 return true;
1273 }
1274
1275 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1276 GlobalAddressSDNode *GAN =
1277 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1278 Base = N.getOperand(0);
1279 OffImm = N.getOperand(1);
1280 if (!GAN)
1281 return true;
1282
1283 if (GAN->getOffset() % Size == 0 &&
1285 return true;
1286 }
1287
1288 if (CurDAG->isBaseWithConstantOffset(N)) {
1289 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1290 int64_t RHSC = (int64_t)RHS->getZExtValue();
1291 unsigned Scale = Log2_32(Size);
1292 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1293 Base = N.getOperand(0);
1294 if (Base.getOpcode() == ISD::FrameIndex) {
1295 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1296 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1297 }
1298 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1299 return true;
1300 }
1301 }
1302 }
1303
1304 // Before falling back to our general case, check if the unscaled
1305 // instructions can handle this. If so, that's preferable.
1306 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1307 return false;
1308
1309 // Base only. The address will be materialized into a register before
1310 // the memory is accessed.
1311 // add x0, Xbase, #offset
1312 // ldr x0, [x0]
1313 Base = N;
1314 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1315 return true;
1316}
1317
1318/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1319/// immediate" address. This should only match when there is an offset that
1320/// is not valid for a scaled immediate addressing mode. The "Size" argument
1321/// is the size in bytes of the memory reference, which is needed here to know
1322/// what is valid for a scaled immediate.
1323bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1324 SDValue &Base,
1325 SDValue &OffImm) {
1326 if (!CurDAG->isBaseWithConstantOffset(N))
1327 return false;
1328 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1329 int64_t RHSC = RHS->getSExtValue();
1330 if (RHSC >= -256 && RHSC < 256) {
1331 Base = N.getOperand(0);
1332 if (Base.getOpcode() == ISD::FrameIndex) {
1333 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1334 const TargetLowering *TLI = getTargetLowering();
1335 Base = CurDAG->getTargetFrameIndex(
1336 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1337 }
1338 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1339 return true;
1340 }
1341 }
1342 return false;
1343}
1344
1346 SDLoc dl(N);
1347 SDValue ImpDef = SDValue(
1348 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1349 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1350 N);
1351}
1352
1353/// Check if the given SHL node (\p N), can be used to form an
1354/// extended register for an addressing mode.
1355bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1356 bool WantExtend, SDValue &Offset,
1357 SDValue &SignExtend) {
1358 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1359 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1360 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1361 return false;
1362
1363 SDLoc dl(N);
1364 if (WantExtend) {
1366 getExtendTypeForNode(N.getOperand(0), true);
1368 return false;
1369
1370 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1371 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1372 MVT::i32);
1373 } else {
1374 Offset = N.getOperand(0);
1375 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1376 }
1377
1378 unsigned LegalShiftVal = Log2_32(Size);
1379 unsigned ShiftVal = CSD->getZExtValue();
1380
1381 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1382 return false;
1383
1384 return isWorthFoldingAddr(N, Size);
1385}
1386
1387bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1389 SDValue &SignExtend,
1390 SDValue &DoShift) {
1391 if (N.getOpcode() != ISD::ADD)
1392 return false;
1393 SDValue LHS = N.getOperand(0);
1394 SDValue RHS = N.getOperand(1);
1395 SDLoc dl(N);
1396
1397 // We don't want to match immediate adds here, because they are better lowered
1398 // to the register-immediate addressing modes.
1400 return false;
1401
1402 // Check if this particular node is reused in any non-memory related
1403 // operation. If yes, do not try to fold this node into the address
1404 // computation, since the computation will be kept.
1405 const SDNode *Node = N.getNode();
1406 for (SDNode *UI : Node->users()) {
1407 if (!isMemOpOrPrefetch(UI))
1408 return false;
1409 }
1410
1411 // Remember if it is worth folding N when it produces extended register.
1412 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1413
1414 // Try to match a shifted extend on the RHS.
1415 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1416 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1417 Base = LHS;
1418 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1419 return true;
1420 }
1421
1422 // Try to match a shifted extend on the LHS.
1423 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1424 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1425 Base = RHS;
1426 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1427 return true;
1428 }
1429
1430 // There was no shift, whatever else we find.
1431 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1432
1434 // Try to match an unshifted extend on the LHS.
1435 if (IsExtendedRegisterWorthFolding &&
1436 (Ext = getExtendTypeForNode(LHS, true)) !=
1438 Base = RHS;
1439 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1440 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1441 MVT::i32);
1442 if (isWorthFoldingAddr(LHS, Size))
1443 return true;
1444 }
1445
1446 // Try to match an unshifted extend on the RHS.
1447 if (IsExtendedRegisterWorthFolding &&
1448 (Ext = getExtendTypeForNode(RHS, true)) !=
1450 Base = LHS;
1451 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1452 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1453 MVT::i32);
1454 if (isWorthFoldingAddr(RHS, Size))
1455 return true;
1456 }
1457
1458 return false;
1459}
1460
1461// Check if the given immediate is preferred by ADD. If an immediate can be
1462// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1463// encoded by one MOVZ, return true.
1464static bool isPreferredADD(int64_t ImmOff) {
1465 // Constant in [0x0, 0xfff] can be encoded in ADD.
1466 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1467 return true;
1468 // Check if it can be encoded in an "ADD LSL #12".
1469 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1470 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1471 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1472 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1473 return false;
1474}
1475
1476bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1478 SDValue &SignExtend,
1479 SDValue &DoShift) {
1480 if (N.getOpcode() != ISD::ADD)
1481 return false;
1482 SDValue LHS = N.getOperand(0);
1483 SDValue RHS = N.getOperand(1);
1484 SDLoc DL(N);
1485
1486 // Check if this particular node is reused in any non-memory related
1487 // operation. If yes, do not try to fold this node into the address
1488 // computation, since the computation will be kept.
1489 const SDNode *Node = N.getNode();
1490 for (SDNode *UI : Node->users()) {
1491 if (!isMemOpOrPrefetch(UI))
1492 return false;
1493 }
1494
1495 // Watch out if RHS is a wide immediate, it can not be selected into
1496 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1497 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1498 // instructions like:
1499 // MOV X0, WideImmediate
1500 // ADD X1, BaseReg, X0
1501 // LDR X2, [X1, 0]
1502 // For such situation, using [BaseReg, XReg] addressing mode can save one
1503 // ADD/SUB:
1504 // MOV X0, WideImmediate
1505 // LDR X2, [BaseReg, X0]
1506 if (isa<ConstantSDNode>(RHS)) {
1507 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1508 // Skip the immediate can be selected by load/store addressing mode.
1509 // Also skip the immediate can be encoded by a single ADD (SUB is also
1510 // checked by using -ImmOff).
1511 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1512 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1513 return false;
1514
1515 SDValue Ops[] = { RHS };
1516 SDNode *MOVI =
1517 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1518 SDValue MOVIV = SDValue(MOVI, 0);
1519 // This ADD of two X register will be selected into [Reg+Reg] mode.
1520 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1521 }
1522
1523 // Remember if it is worth folding N when it produces extended register.
1524 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1525
1526 // Try to match a shifted extend on the RHS.
1527 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1528 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1529 Base = LHS;
1530 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1531 return true;
1532 }
1533
1534 // Try to match a shifted extend on the LHS.
1535 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1536 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1537 Base = RHS;
1538 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1539 return true;
1540 }
1541
1542 // Match any non-shifted, non-extend, non-immediate add expression.
1543 Base = LHS;
1544 Offset = RHS;
1545 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1546 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1547 // Reg1 + Reg2 is free: no check needed.
1548 return true;
1549}
1550
1551SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1552 static const unsigned RegClassIDs[] = {
1553 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1554 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1555 AArch64::dsub2, AArch64::dsub3};
1556
1557 return createTuple(Regs, RegClassIDs, SubRegs);
1558}
1559
1560SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1561 static const unsigned RegClassIDs[] = {
1562 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1563 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1564 AArch64::qsub2, AArch64::qsub3};
1565
1566 return createTuple(Regs, RegClassIDs, SubRegs);
1567}
1568
1569SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1570 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1571 AArch64::ZPR3RegClassID,
1572 AArch64::ZPR4RegClassID};
1573 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1574 AArch64::zsub2, AArch64::zsub3};
1575
1576 return createTuple(Regs, RegClassIDs, SubRegs);
1577}
1578
1579SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1580 assert(Regs.size() == 2 || Regs.size() == 4);
1581
1582 // The createTuple interface requires 3 RegClassIDs for each possible
1583 // tuple type even though we only have them for ZPR2 and ZPR4.
1584 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1585 AArch64::ZPR4Mul4RegClassID};
1586 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1587 AArch64::zsub2, AArch64::zsub3};
1588 return createTuple(Regs, RegClassIDs, SubRegs);
1589}
1590
1591SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1592 const unsigned RegClassIDs[],
1593 const unsigned SubRegs[]) {
1594 // There's no special register-class for a vector-list of 1 element: it's just
1595 // a vector.
1596 if (Regs.size() == 1)
1597 return Regs[0];
1598
1599 assert(Regs.size() >= 2 && Regs.size() <= 4);
1600
1601 SDLoc DL(Regs[0]);
1602
1604
1605 // First operand of REG_SEQUENCE is the desired RegClass.
1606 Ops.push_back(
1607 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1608
1609 // Then we get pairs of source & subregister-position for the components.
1610 for (unsigned i = 0; i < Regs.size(); ++i) {
1611 Ops.push_back(Regs[i]);
1612 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1613 }
1614
1615 SDNode *N =
1616 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1617 return SDValue(N, 0);
1618}
1619
1620void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1621 bool isExt) {
1622 SDLoc dl(N);
1623 EVT VT = N->getValueType(0);
1624
1625 unsigned ExtOff = isExt;
1626
1627 // Form a REG_SEQUENCE to force register allocation.
1628 unsigned Vec0Off = ExtOff + 1;
1629 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1630 SDValue RegSeq = createQTuple(Regs);
1631
1633 if (isExt)
1634 Ops.push_back(N->getOperand(1));
1635 Ops.push_back(RegSeq);
1636 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1637 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1638}
1639
1640static std::tuple<SDValue, SDValue>
1642 SDLoc DL(Disc);
1643 SDValue AddrDisc;
1644 SDValue ConstDisc;
1645
1646 // If this is a blend, remember the constant and address discriminators.
1647 // Otherwise, it's either a constant discriminator, or a non-blended
1648 // address discriminator.
1649 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1650 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1651 AddrDisc = Disc->getOperand(1);
1652 ConstDisc = Disc->getOperand(2);
1653 } else {
1654 ConstDisc = Disc;
1655 }
1656
1657 // If the constant discriminator (either the blend RHS, or the entire
1658 // discriminator value) isn't a 16-bit constant, bail out, and let the
1659 // discriminator be computed separately.
1660 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1661 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1662 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1663
1664 // If there's no address discriminator, use XZR directly.
1665 if (!AddrDisc)
1666 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1667
1668 return std::make_tuple(
1669 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1670 AddrDisc);
1671}
1672
1673void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1674 SDLoc DL(N);
1675 // IntrinsicID is operand #0
1676 SDValue Val = N->getOperand(1);
1677 SDValue AUTKey = N->getOperand(2);
1678 SDValue AUTDisc = N->getOperand(3);
1679
1680 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1681 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1682
1683 SDValue AUTAddrDisc, AUTConstDisc;
1684 std::tie(AUTConstDisc, AUTAddrDisc) =
1685 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1686
1687 if (!Subtarget->isX16X17Safer()) {
1688 std::vector<SDValue> Ops = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1689 // Copy deactivation symbol if present.
1690 if (N->getNumOperands() > 4)
1691 Ops.push_back(N->getOperand(4));
1692
1693 SDNode *AUT =
1694 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1695 ReplaceNode(N, AUT);
1696 } else {
1697 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1698 AArch64::X16, Val, SDValue());
1699 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1700
1701 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1702 ReplaceNode(N, AUT);
1703 }
1704}
1705
1706void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1707 SDLoc DL(N);
1708 // IntrinsicID is operand #0, if W_CHAIN it is #1
1709 int OffsetBase = N->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 1 : 0;
1710 SDValue Val = N->getOperand(OffsetBase + 1);
1711 SDValue AUTKey = N->getOperand(OffsetBase + 2);
1712 SDValue AUTDisc = N->getOperand(OffsetBase + 3);
1713 SDValue PACKey = N->getOperand(OffsetBase + 4);
1714 SDValue PACDisc = N->getOperand(OffsetBase + 5);
1715 uint32_t IntNum = N->getConstantOperandVal(OffsetBase + 0);
1716 bool HasLoad = IntNum == Intrinsic::ptrauth_resign_load_relative;
1717
1718 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1719 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1720
1721 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1722 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1723
1724 SDValue AUTAddrDisc, AUTConstDisc;
1725 std::tie(AUTConstDisc, AUTAddrDisc) =
1726 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1727
1728 SDValue PACAddrDisc, PACConstDisc;
1729 std::tie(PACConstDisc, PACAddrDisc) =
1730 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1731
1732 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1733 AArch64::X16, Val, SDValue());
1734
1735 if (HasLoad) {
1736 SDValue Addend = N->getOperand(OffsetBase + 6);
1737 SDValue IncomingChain = N->getOperand(0);
1738 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc,
1739 PACKey, PACConstDisc, PACAddrDisc,
1740 Addend, IncomingChain, X16Copy.getValue(1)};
1741
1742 SDNode *AUTRELLOADPAC = CurDAG->getMachineNode(AArch64::AUTRELLOADPAC, DL,
1743 MVT::i64, MVT::Other, Ops);
1744 ReplaceNode(N, AUTRELLOADPAC);
1745 } else {
1746 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1747 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1748
1749 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1750 ReplaceNode(N, AUTPAC);
1751 }
1752}
1753
1754bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1755 LoadSDNode *LD = cast<LoadSDNode>(N);
1756 if (LD->isUnindexed())
1757 return false;
1758 EVT VT = LD->getMemoryVT();
1759 EVT DstVT = N->getValueType(0);
1760 ISD::MemIndexedMode AM = LD->getAddressingMode();
1761 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1762 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1763 int OffsetVal = (int)OffsetOp->getZExtValue();
1764
1765 // We're not doing validity checking here. That was done when checking
1766 // if we should mark the load as indexed or not. We're just selecting
1767 // the right instruction.
1768 unsigned Opcode = 0;
1769
1770 ISD::LoadExtType ExtType = LD->getExtensionType();
1771 bool InsertTo64 = false;
1772 if (VT == MVT::i64)
1773 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1774 else if (VT == MVT::i32) {
1775 if (ExtType == ISD::NON_EXTLOAD)
1776 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1777 else if (ExtType == ISD::SEXTLOAD)
1778 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1779 else {
1780 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1781 InsertTo64 = true;
1782 // The result of the load is only i32. It's the subreg_to_reg that makes
1783 // it into an i64.
1784 DstVT = MVT::i32;
1785 }
1786 } else if (VT == MVT::i16) {
1787 if (ExtType == ISD::SEXTLOAD) {
1788 if (DstVT == MVT::i64)
1789 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1790 else
1791 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1792 } else {
1793 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1794 InsertTo64 = DstVT == MVT::i64;
1795 // The result of the load is only i32. It's the subreg_to_reg that makes
1796 // it into an i64.
1797 DstVT = MVT::i32;
1798 }
1799 } else if (VT == MVT::i8) {
1800 if (ExtType == ISD::SEXTLOAD) {
1801 if (DstVT == MVT::i64)
1802 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1803 else
1804 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1805 } else {
1806 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1807 InsertTo64 = DstVT == MVT::i64;
1808 // The result of the load is only i32. It's the subreg_to_reg that makes
1809 // it into an i64.
1810 DstVT = MVT::i32;
1811 }
1812 } else if (VT == MVT::f16) {
1813 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1814 } else if (VT == MVT::bf16) {
1815 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1816 } else if (VT == MVT::f32) {
1817 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1818 } else if (VT == MVT::f64 ||
1819 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1820 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1821 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1822 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1823 } else if (VT.is64BitVector()) {
1824 if (IsPre || OffsetVal != 8)
1825 return false;
1826 switch (VT.getScalarSizeInBits()) {
1827 case 8:
1828 Opcode = AArch64::LD1Onev8b_POST;
1829 break;
1830 case 16:
1831 Opcode = AArch64::LD1Onev4h_POST;
1832 break;
1833 case 32:
1834 Opcode = AArch64::LD1Onev2s_POST;
1835 break;
1836 case 64:
1837 Opcode = AArch64::LD1Onev1d_POST;
1838 break;
1839 default:
1840 llvm_unreachable("Expected vector element to be a power of 2");
1841 }
1842 } else if (VT.is128BitVector()) {
1843 if (IsPre || OffsetVal != 16)
1844 return false;
1845 switch (VT.getScalarSizeInBits()) {
1846 case 8:
1847 Opcode = AArch64::LD1Onev16b_POST;
1848 break;
1849 case 16:
1850 Opcode = AArch64::LD1Onev8h_POST;
1851 break;
1852 case 32:
1853 Opcode = AArch64::LD1Onev4s_POST;
1854 break;
1855 case 64:
1856 Opcode = AArch64::LD1Onev2d_POST;
1857 break;
1858 default:
1859 llvm_unreachable("Expected vector element to be a power of 2");
1860 }
1861 } else
1862 return false;
1863 SDValue Chain = LD->getChain();
1864 SDValue Base = LD->getBasePtr();
1865 SDLoc dl(N);
1866 // LD1 encodes an immediate offset by using XZR as the offset register.
1867 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1868 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1869 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1870 SDValue Ops[] = { Base, Offset, Chain };
1871 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1872 MVT::Other, Ops);
1873
1874 // Transfer memoperands.
1875 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1876 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1877
1878 // Either way, we're replacing the node, so tell the caller that.
1879 SDValue LoadedVal = SDValue(Res, 1);
1880 if (InsertTo64) {
1881 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1882 LoadedVal = SDValue(CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, dl,
1883 MVT::i64, LoadedVal, SubReg),
1884 0);
1885 }
1886
1887 ReplaceUses(SDValue(N, 0), LoadedVal);
1888 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1889 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1890 CurDAG->RemoveDeadNode(N);
1891 return true;
1892}
1893
1894void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1895 unsigned SubRegIdx) {
1896 SDLoc dl(N);
1897 EVT VT = N->getValueType(0);
1898 SDValue Chain = N->getOperand(0);
1899
1900 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1901 Chain};
1902
1903 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1904
1905 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1906 SDValue SuperReg = SDValue(Ld, 0);
1907 for (unsigned i = 0; i < NumVecs; ++i)
1908 ReplaceUses(SDValue(N, i),
1909 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1910
1911 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1912
1913 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1914 // because it's too simple to have needed special treatment during lowering.
1915 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1916 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1917 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1918 }
1919
1920 CurDAG->RemoveDeadNode(N);
1921}
1922
1923void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1924 unsigned Opc, unsigned SubRegIdx) {
1925 SDLoc dl(N);
1926 EVT VT = N->getValueType(0);
1927 SDValue Chain = N->getOperand(0);
1928
1929 SDValue Ops[] = {N->getOperand(1), // Mem operand
1930 N->getOperand(2), // Incremental
1931 Chain};
1932
1933 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1934 MVT::Untyped, MVT::Other};
1935
1936 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1937
1938 // Update uses of write back register
1939 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1940
1941 // Update uses of vector list
1942 SDValue SuperReg = SDValue(Ld, 1);
1943 if (NumVecs == 1)
1944 ReplaceUses(SDValue(N, 0), SuperReg);
1945 else
1946 for (unsigned i = 0; i < NumVecs; ++i)
1947 ReplaceUses(SDValue(N, i),
1948 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1949
1950 // Update the chain
1951 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1952 CurDAG->RemoveDeadNode(N);
1953}
1954
1955/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1956/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1957/// new Base and an SDValue representing the new offset.
1958std::tuple<unsigned, SDValue, SDValue>
1959AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1960 unsigned Opc_ri,
1961 const SDValue &OldBase,
1962 const SDValue &OldOffset,
1963 unsigned Scale) {
1964 SDValue NewBase = OldBase;
1965 SDValue NewOffset = OldOffset;
1966 // Detect a possible Reg+Imm addressing mode.
1967 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1968 N, OldBase, NewBase, NewOffset);
1969
1970 // Detect a possible reg+reg addressing mode, but only if we haven't already
1971 // detected a Reg+Imm one.
1972 const bool IsRegReg =
1973 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1974
1975 // Select the instruction.
1976 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1977}
1978
1979enum class SelectTypeKind {
1980 Int1 = 0,
1981 Int = 1,
1982 FP = 2,
1984};
1985
1986/// This function selects an opcode from a list of opcodes, which is
1987/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1988/// element types, in this order.
1989template <SelectTypeKind Kind>
1990static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1991 // Only match scalable vector VTs
1992 if (!VT.isScalableVector())
1993 return 0;
1994
1995 EVT EltVT = VT.getVectorElementType();
1996 unsigned Key = VT.getVectorMinNumElements();
1997 switch (Kind) {
1999 break;
2001 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
2002 EltVT != MVT::i64)
2003 return 0;
2004 break;
2006 if (EltVT != MVT::i1)
2007 return 0;
2008 break;
2009 case SelectTypeKind::FP:
2010 if (EltVT == MVT::bf16)
2011 Key = 16;
2012 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
2013 EltVT != MVT::f64)
2014 return 0;
2015 break;
2016 }
2017
2018 unsigned Offset;
2019 switch (Key) {
2020 case 16: // 8-bit or bf16
2021 Offset = 0;
2022 break;
2023 case 8: // 16-bit
2024 Offset = 1;
2025 break;
2026 case 4: // 32-bit
2027 Offset = 2;
2028 break;
2029 case 2: // 64-bit
2030 Offset = 3;
2031 break;
2032 default:
2033 return 0;
2034 }
2035
2036 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
2037}
2038
2039// This function is almost identical to SelectWhilePair, but has an
2040// extra check on the range of the immediate operand.
2041// TODO: Merge these two functions together at some point?
2042void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
2043 // Immediate can be either 0 or 1.
2044 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
2045 if (Imm->getZExtValue() > 1)
2046 return;
2047
2048 SDLoc DL(N);
2049 EVT VT = N->getValueType(0);
2050 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
2051 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2052 SDValue SuperReg = SDValue(WhilePair, 0);
2053
2054 for (unsigned I = 0; I < 2; ++I)
2055 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2056 AArch64::psub0 + I, DL, VT, SuperReg));
2057
2058 CurDAG->RemoveDeadNode(N);
2059}
2060
2061void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
2062 SDLoc DL(N);
2063 EVT VT = N->getValueType(0);
2064
2065 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
2066
2067 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2068 SDValue SuperReg = SDValue(WhilePair, 0);
2069
2070 for (unsigned I = 0; I < 2; ++I)
2071 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2072 AArch64::psub0 + I, DL, VT, SuperReg));
2073
2074 CurDAG->RemoveDeadNode(N);
2075}
2076
2077void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
2078 unsigned Opcode) {
2079 EVT VT = N->getValueType(0);
2080 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2081 SDValue Ops = createZTuple(Regs);
2082 SDLoc DL(N);
2083 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2084 SDValue SuperReg = SDValue(Intrinsic, 0);
2085 for (unsigned i = 0; i < NumVecs; ++i)
2086 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2087 AArch64::zsub0 + i, DL, VT, SuperReg));
2088
2089 CurDAG->RemoveDeadNode(N);
2090}
2091
2092void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
2093 unsigned Opcode) {
2094 SDLoc DL(N);
2095 EVT VT = N->getValueType(0);
2096 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
2097 Ops.push_back(/*Chain*/ N->getOperand(0));
2098
2099 SDNode *Instruction =
2100 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
2101 SDValue SuperReg = SDValue(Instruction, 0);
2102
2103 for (unsigned i = 0; i < NumVecs; ++i)
2104 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2105 AArch64::zsub0 + i, DL, VT, SuperReg));
2106
2107 // Copy chain
2108 unsigned ChainIdx = NumVecs;
2109 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
2110 CurDAG->RemoveDeadNode(N);
2111}
2112
2113void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
2114 unsigned NumVecs,
2115 bool IsZmMulti,
2116 unsigned Opcode,
2117 bool HasPred) {
2118 assert(Opcode != 0 && "Unexpected opcode");
2119
2120 SDLoc DL(N);
2121 EVT VT = N->getValueType(0);
2122 SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID
2124
2125 auto GetMultiVecOperand = [&]() {
2126 SmallVector<SDValue, 4> Regs(OpsIter, OpsIter + NumVecs);
2127 OpsIter += NumVecs;
2128 return createZMulTuple(Regs);
2129 };
2130
2131 if (HasPred)
2132 Ops.push_back(*OpsIter++);
2133
2134 Ops.push_back(GetMultiVecOperand());
2135 if (IsZmMulti)
2136 Ops.push_back(GetMultiVecOperand());
2137 else
2138 Ops.push_back(*OpsIter++);
2139
2140 // Append any remaining operands.
2141 Ops.append(OpsIter, N->op_end());
2142 SDNode *Intrinsic;
2143 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2144 SDValue SuperReg = SDValue(Intrinsic, 0);
2145 for (unsigned i = 0; i < NumVecs; ++i)
2146 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2147 AArch64::zsub0 + i, DL, VT, SuperReg));
2148
2149 CurDAG->RemoveDeadNode(N);
2150}
2151
2152void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2153 unsigned Scale, unsigned Opc_ri,
2154 unsigned Opc_rr, bool IsIntr) {
2155 assert(Scale < 5 && "Invalid scaling value.");
2156 SDLoc DL(N);
2157 EVT VT = N->getValueType(0);
2158 SDValue Chain = N->getOperand(0);
2159
2160 // Optimize addressing mode.
2162 unsigned Opc;
2163 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2164 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2165 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2166
2167 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2168 Base, // Memory operand
2169 Offset, Chain};
2170
2171 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2172
2173 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2174 SDValue SuperReg = SDValue(Load, 0);
2175 for (unsigned i = 0; i < NumVecs; ++i)
2176 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2177 AArch64::zsub0 + i, DL, VT, SuperReg));
2178
2179 // Copy chain
2180 unsigned ChainIdx = NumVecs;
2181 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2182 CurDAG->RemoveDeadNode(N);
2183}
2184
2185void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2186 unsigned NumVecs,
2187 unsigned Scale,
2188 unsigned Opc_ri,
2189 unsigned Opc_rr) {
2190 assert(Scale < 4 && "Invalid scaling value.");
2191 SDLoc DL(N);
2192 EVT VT = N->getValueType(0);
2193 SDValue Chain = N->getOperand(0);
2194
2195 SDValue PNg = N->getOperand(2);
2196 SDValue Base = N->getOperand(3);
2197 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2198 unsigned Opc;
2199 std::tie(Opc, Base, Offset) =
2200 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2201
2202 SDValue Ops[] = {PNg, // Predicate-as-counter
2203 Base, // Memory operand
2204 Offset, Chain};
2205
2206 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2207
2208 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2209 SDValue SuperReg = SDValue(Load, 0);
2210 for (unsigned i = 0; i < NumVecs; ++i)
2211 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2212 AArch64::zsub0 + i, DL, VT, SuperReg));
2213
2214 // Copy chain
2215 unsigned ChainIdx = NumVecs;
2216 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2217 CurDAG->RemoveDeadNode(N);
2218}
2219
2220void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2221 unsigned Opcode) {
2222 if (N->getValueType(0) != MVT::nxv4f32)
2223 return;
2224 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2225}
2226
2227void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2228 unsigned NumOutVecs,
2229 unsigned Opc,
2230 uint32_t MaxImm) {
2231 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2232 if (Imm->getZExtValue() > MaxImm)
2233 return;
2234
2235 SDValue ZtValue;
2236 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2237 return;
2238
2239 SDValue Chain = Node->getOperand(0);
2240 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
2241 SDLoc DL(Node);
2242 EVT VT = Node->getValueType(0);
2243
2244 SDNode *Instruction =
2245 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2246 SDValue SuperReg = SDValue(Instruction, 0);
2247
2248 for (unsigned I = 0; I < NumOutVecs; ++I)
2249 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2250 AArch64::zsub0 + I, DL, VT, SuperReg));
2251
2252 // Copy chain
2253 unsigned ChainIdx = NumOutVecs;
2254 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2255 CurDAG->RemoveDeadNode(Node);
2256}
2257
2258void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2259 unsigned NumOutVecs,
2260 unsigned Opc) {
2261 SDValue ZtValue;
2262 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2263 return;
2264
2265 SDValue Chain = Node->getOperand(0);
2266 SDValue Ops[] = {ZtValue,
2267 createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
2268 Chain};
2269
2270 SDLoc DL(Node);
2271 EVT VT = Node->getValueType(0);
2272
2273 SDNode *Instruction =
2274 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2275 SDValue SuperReg = SDValue(Instruction, 0);
2276
2277 for (unsigned I = 0; I < NumOutVecs; ++I)
2278 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2279 AArch64::zsub0 + I, DL, VT, SuperReg));
2280
2281 // Copy chain
2282 unsigned ChainIdx = NumOutVecs;
2283 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2284 CurDAG->RemoveDeadNode(Node);
2285}
2286
2287void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2288 unsigned Op) {
2289 SDLoc DL(N);
2290 EVT VT = N->getValueType(0);
2291
2292 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2293 SDValue Zd = createZMulTuple(Regs);
2294 SDValue Zn = N->getOperand(1 + NumVecs);
2295 SDValue Zm = N->getOperand(2 + NumVecs);
2296
2297 SDValue Ops[] = {Zd, Zn, Zm};
2298
2299 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2300 SDValue SuperReg = SDValue(Intrinsic, 0);
2301 for (unsigned i = 0; i < NumVecs; ++i)
2302 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2303 AArch64::zsub0 + i, DL, VT, SuperReg));
2304
2305 CurDAG->RemoveDeadNode(N);
2306}
2307
2308bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2309 switch (BaseReg) {
2310 default:
2311 return false;
2312 case AArch64::ZA:
2313 case AArch64::ZAB0:
2314 if (TileNum == 0)
2315 break;
2316 return false;
2317 case AArch64::ZAH0:
2318 if (TileNum <= 1)
2319 break;
2320 return false;
2321 case AArch64::ZAS0:
2322 if (TileNum <= 3)
2323 break;
2324 return false;
2325 case AArch64::ZAD0:
2326 if (TileNum <= 7)
2327 break;
2328 return false;
2329 }
2330
2331 BaseReg += TileNum;
2332 return true;
2333}
2334
2335template <unsigned MaxIdx, unsigned Scale>
2336void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2337 unsigned BaseReg, unsigned Op) {
2338 unsigned TileNum = 0;
2339 if (BaseReg != AArch64::ZA)
2340 TileNum = N->getConstantOperandVal(2);
2341
2342 if (!SelectSMETile(BaseReg, TileNum))
2343 return;
2344
2345 SDValue SliceBase, Base, Offset;
2346 if (BaseReg == AArch64::ZA)
2347 SliceBase = N->getOperand(2);
2348 else
2349 SliceBase = N->getOperand(3);
2350
2351 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2352 return;
2353
2354 SDLoc DL(N);
2355 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2356 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2357 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2358
2359 EVT VT = N->getValueType(0);
2360 for (unsigned I = 0; I < NumVecs; ++I)
2361 ReplaceUses(SDValue(N, I),
2362 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2363 SDValue(Mov, 0)));
2364 // Copy chain
2365 unsigned ChainIdx = NumVecs;
2366 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2367 CurDAG->RemoveDeadNode(N);
2368}
2369
2370void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2371 unsigned Op, unsigned MaxIdx,
2372 unsigned Scale, unsigned BaseReg) {
2373 // Slice can be in different positions
2374 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2375 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2376 SDValue SliceBase = N->getOperand(2);
2377 if (BaseReg != AArch64::ZA)
2378 SliceBase = N->getOperand(3);
2379
2381 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2382 return;
2383 // The correct Za tile number is computed in Machine Instruction
2384 // See EmitZAInstr
2385 // DAG cannot select Za tile as an output register with ZReg
2386 SDLoc DL(N);
2388 if (BaseReg != AArch64::ZA )
2389 Ops.push_back(N->getOperand(2));
2390 Ops.push_back(Base);
2391 Ops.push_back(Offset);
2392 Ops.push_back(N->getOperand(0)); //Chain
2393 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2394
2395 EVT VT = N->getValueType(0);
2396 for (unsigned I = 0; I < NumVecs; ++I)
2397 ReplaceUses(SDValue(N, I),
2398 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2399 SDValue(Mov, 0)));
2400
2401 // Copy chain
2402 unsigned ChainIdx = NumVecs;
2403 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2404 CurDAG->RemoveDeadNode(N);
2405}
2406
2407void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2408 unsigned NumOutVecs,
2409 bool IsTupleInput,
2410 unsigned Opc) {
2411 SDLoc DL(N);
2412 EVT VT = N->getValueType(0);
2413 unsigned NumInVecs = N->getNumOperands() - 1;
2414
2416 if (IsTupleInput) {
2417 assert((NumInVecs == 2 || NumInVecs == 4) &&
2418 "Don't know how to handle multi-register input!");
2419 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2420 Ops.push_back(createZMulTuple(Regs));
2421 } else {
2422 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2423 for (unsigned I = 0; I < NumInVecs; I++)
2424 Ops.push_back(N->getOperand(1 + I));
2425 }
2426
2427 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2428 SDValue SuperReg = SDValue(Res, 0);
2429
2430 for (unsigned I = 0; I < NumOutVecs; I++)
2431 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2432 AArch64::zsub0 + I, DL, VT, SuperReg));
2433 CurDAG->RemoveDeadNode(N);
2434}
2435
2436void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2437 unsigned Opc) {
2438 SDLoc dl(N);
2439 EVT VT = N->getOperand(2)->getValueType(0);
2440
2441 // Form a REG_SEQUENCE to force register allocation.
2442 bool Is128Bit = VT.getSizeInBits() == 128;
2443 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2444 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2445
2446 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2447 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2448
2449 // Transfer memoperands.
2450 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2451 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2452
2453 ReplaceNode(N, St);
2454}
2455
2456void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2457 unsigned Scale, unsigned Opc_rr,
2458 unsigned Opc_ri) {
2459 SDLoc dl(N);
2460
2461 // Form a REG_SEQUENCE to force register allocation.
2462 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2463 SDValue RegSeq = createZTuple(Regs);
2464
2465 // Optimize addressing mode.
2466 unsigned Opc;
2468 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2469 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2470 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2471
2472 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2473 Base, // address
2474 Offset, // offset
2475 N->getOperand(0)}; // chain
2476 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2477
2478 ReplaceNode(N, St);
2479}
2480
2481bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2482 SDValue &OffImm) {
2483 SDLoc dl(N);
2484 const DataLayout &DL = CurDAG->getDataLayout();
2485 const TargetLowering *TLI = getTargetLowering();
2486
2487 // Try to match it for the frame address
2488 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2489 int FI = FINode->getIndex();
2490 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2491 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2492 return true;
2493 }
2494
2495 return false;
2496}
2497
2498void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2499 unsigned Opc) {
2500 SDLoc dl(N);
2501 EVT VT = N->getOperand(2)->getValueType(0);
2502 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2503 MVT::Other}; // Type for the Chain
2504
2505 // Form a REG_SEQUENCE to force register allocation.
2506 bool Is128Bit = VT.getSizeInBits() == 128;
2507 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2508 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2509
2510 SDValue Ops[] = {RegSeq,
2511 N->getOperand(NumVecs + 1), // base register
2512 N->getOperand(NumVecs + 2), // Incremental
2513 N->getOperand(0)}; // Chain
2514 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2515
2516 ReplaceNode(N, St);
2517}
2518
2519namespace {
2520/// WidenVector - Given a value in the V64 register class, produce the
2521/// equivalent value in the V128 register class.
2522class WidenVector {
2523 SelectionDAG &DAG;
2524
2525public:
2526 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2527
2528 SDValue operator()(SDValue V64Reg) {
2529 EVT VT = V64Reg.getValueType();
2530 unsigned NarrowSize = VT.getVectorNumElements();
2531 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2532 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2533 SDLoc DL(V64Reg);
2534
2535 SDValue Undef =
2536 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2537 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2538 }
2539};
2540} // namespace
2541
2542/// NarrowVector - Given a value in the V128 register class, produce the
2543/// equivalent value in the V64 register class.
2545 EVT VT = V128Reg.getValueType();
2546 unsigned WideSize = VT.getVectorNumElements();
2547 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2548 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2549
2550 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2551 V128Reg);
2552}
2553
2554void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2555 unsigned Opc) {
2556 SDLoc dl(N);
2557 EVT VT = N->getValueType(0);
2558 bool Narrow = VT.getSizeInBits() == 64;
2559
2560 // Form a REG_SEQUENCE to force register allocation.
2561 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2562
2563 if (Narrow)
2564 transform(Regs, Regs.begin(),
2565 WidenVector(*CurDAG));
2566
2567 SDValue RegSeq = createQTuple(Regs);
2568
2569 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2570
2571 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2572
2573 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2574 N->getOperand(NumVecs + 3), N->getOperand(0)};
2575 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2576 SDValue SuperReg = SDValue(Ld, 0);
2577
2578 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2579 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2580 AArch64::qsub2, AArch64::qsub3 };
2581 for (unsigned i = 0; i < NumVecs; ++i) {
2582 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2583 if (Narrow)
2584 NV = NarrowVector(NV, *CurDAG);
2585 ReplaceUses(SDValue(N, i), NV);
2586 }
2587
2588 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2589 CurDAG->RemoveDeadNode(N);
2590}
2591
2592void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2593 unsigned Opc) {
2594 SDLoc dl(N);
2595 EVT VT = N->getValueType(0);
2596 bool Narrow = VT.getSizeInBits() == 64;
2597
2598 // Form a REG_SEQUENCE to force register allocation.
2599 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2600
2601 if (Narrow)
2602 transform(Regs, Regs.begin(),
2603 WidenVector(*CurDAG));
2604
2605 SDValue RegSeq = createQTuple(Regs);
2606
2607 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2608 RegSeq->getValueType(0), MVT::Other};
2609
2610 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2611
2612 SDValue Ops[] = {RegSeq,
2613 CurDAG->getTargetConstant(LaneNo, dl,
2614 MVT::i64), // Lane Number
2615 N->getOperand(NumVecs + 2), // Base register
2616 N->getOperand(NumVecs + 3), // Incremental
2617 N->getOperand(0)};
2618 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2619
2620 // Update uses of the write back register
2621 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2622
2623 // Update uses of the vector list
2624 SDValue SuperReg = SDValue(Ld, 1);
2625 if (NumVecs == 1) {
2626 ReplaceUses(SDValue(N, 0),
2627 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2628 } else {
2629 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2630 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2631 AArch64::qsub2, AArch64::qsub3 };
2632 for (unsigned i = 0; i < NumVecs; ++i) {
2633 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2634 SuperReg);
2635 if (Narrow)
2636 NV = NarrowVector(NV, *CurDAG);
2637 ReplaceUses(SDValue(N, i), NV);
2638 }
2639 }
2640
2641 // Update the Chain
2642 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2643 CurDAG->RemoveDeadNode(N);
2644}
2645
2646void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2647 unsigned Opc) {
2648 SDLoc dl(N);
2649 EVT VT = N->getOperand(2)->getValueType(0);
2650 bool Narrow = VT.getSizeInBits() == 64;
2651
2652 // Form a REG_SEQUENCE to force register allocation.
2653 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2654
2655 if (Narrow)
2656 transform(Regs, Regs.begin(),
2657 WidenVector(*CurDAG));
2658
2659 SDValue RegSeq = createQTuple(Regs);
2660
2661 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2662
2663 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2664 N->getOperand(NumVecs + 3), N->getOperand(0)};
2665 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2666
2667 // Transfer memoperands.
2668 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2669 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2670
2671 ReplaceNode(N, St);
2672}
2673
2674void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2675 unsigned Opc) {
2676 SDLoc dl(N);
2677 EVT VT = N->getOperand(2)->getValueType(0);
2678 bool Narrow = VT.getSizeInBits() == 64;
2679
2680 // Form a REG_SEQUENCE to force register allocation.
2681 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2682
2683 if (Narrow)
2684 transform(Regs, Regs.begin(),
2685 WidenVector(*CurDAG));
2686
2687 SDValue RegSeq = createQTuple(Regs);
2688
2689 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2690 MVT::Other};
2691
2692 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2693
2694 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2695 N->getOperand(NumVecs + 2), // Base Register
2696 N->getOperand(NumVecs + 3), // Incremental
2697 N->getOperand(0)};
2698 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2699
2700 // Transfer memoperands.
2701 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2702 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2703
2704 ReplaceNode(N, St);
2705}
2706
2708 unsigned &Opc, SDValue &Opd0,
2709 unsigned &LSB, unsigned &MSB,
2710 unsigned NumberOfIgnoredLowBits,
2711 bool BiggerPattern) {
2712 assert(N->getOpcode() == ISD::AND &&
2713 "N must be a AND operation to call this function");
2714
2715 EVT VT = N->getValueType(0);
2716
2717 // Here we can test the type of VT and return false when the type does not
2718 // match, but since it is done prior to that call in the current context
2719 // we turned that into an assert to avoid redundant code.
2720 assert((VT == MVT::i32 || VT == MVT::i64) &&
2721 "Type checking must have been done before calling this function");
2722
2723 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2724 // changed the AND node to a 32-bit mask operation. We'll have to
2725 // undo that as part of the transform here if we want to catch all
2726 // the opportunities.
2727 // Currently the NumberOfIgnoredLowBits argument helps to recover
2728 // from these situations when matching bigger pattern (bitfield insert).
2729
2730 // For unsigned extracts, check for a shift right and mask
2731 uint64_t AndImm = 0;
2732 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2733 return false;
2734
2735 const SDNode *Op0 = N->getOperand(0).getNode();
2736
2737 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2738 // simplified. Try to undo that
2739 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2740
2741 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2742 if (AndImm & (AndImm + 1))
2743 return false;
2744
2745 bool ClampMSB = false;
2746 uint64_t SrlImm = 0;
2747 // Handle the SRL + ANY_EXTEND case.
2748 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2749 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2750 // Extend the incoming operand of the SRL to 64-bit.
2751 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2752 // Make sure to clamp the MSB so that we preserve the semantics of the
2753 // original operations.
2754 ClampMSB = true;
2755 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2757 SrlImm)) {
2758 // If the shift result was truncated, we can still combine them.
2759 Opd0 = Op0->getOperand(0).getOperand(0);
2760
2761 // Use the type of SRL node.
2762 VT = Opd0->getValueType(0);
2763 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2764 Opd0 = Op0->getOperand(0);
2765 ClampMSB = (VT == MVT::i32);
2766 } else if (BiggerPattern) {
2767 // Let's pretend a 0 shift right has been performed.
2768 // The resulting code will be at least as good as the original one
2769 // plus it may expose more opportunities for bitfield insert pattern.
2770 // FIXME: Currently we limit this to the bigger pattern, because
2771 // some optimizations expect AND and not UBFM.
2772 Opd0 = N->getOperand(0);
2773 } else
2774 return false;
2775
2776 // Bail out on large immediates. This happens when no proper
2777 // combining/constant folding was performed.
2778 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2779 LLVM_DEBUG(
2780 (dbgs() << N
2781 << ": Found large shift immediate, this should not happen\n"));
2782 return false;
2783 }
2784
2785 LSB = SrlImm;
2786 MSB = SrlImm +
2787 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2788 : llvm::countr_one<uint64_t>(AndImm)) -
2789 1;
2790 if (ClampMSB)
2791 // Since we're moving the extend before the right shift operation, we need
2792 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2793 // the zeros which would get shifted in with the original right shift
2794 // operation.
2795 MSB = MSB > 31 ? 31 : MSB;
2796
2797 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2798 return true;
2799}
2800
2802 SDValue &Opd0, unsigned &Immr,
2803 unsigned &Imms) {
2804 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2805
2806 EVT VT = N->getValueType(0);
2807 unsigned BitWidth = VT.getSizeInBits();
2808 assert((VT == MVT::i32 || VT == MVT::i64) &&
2809 "Type checking must have been done before calling this function");
2810
2811 SDValue Op = N->getOperand(0);
2812 if (Op->getOpcode() == ISD::TRUNCATE) {
2813 Op = Op->getOperand(0);
2814 VT = Op->getValueType(0);
2815 BitWidth = VT.getSizeInBits();
2816 }
2817
2818 uint64_t ShiftImm;
2819 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2820 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2821 return false;
2822
2823 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2824 if (ShiftImm + Width > BitWidth)
2825 return false;
2826
2827 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2828 Opd0 = Op.getOperand(0);
2829 Immr = ShiftImm;
2830 Imms = ShiftImm + Width - 1;
2831 return true;
2832}
2833
2835 SDValue &Opd0, unsigned &LSB,
2836 unsigned &MSB) {
2837 // We are looking for the following pattern which basically extracts several
2838 // continuous bits from the source value and places it from the LSB of the
2839 // destination value, all other bits of the destination value or set to zero:
2840 //
2841 // Value2 = AND Value, MaskImm
2842 // SRL Value2, ShiftImm
2843 //
2844 // with MaskImm >> ShiftImm to search for the bit width.
2845 //
2846 // This gets selected into a single UBFM:
2847 //
2848 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2849 //
2850
2851 if (N->getOpcode() != ISD::SRL)
2852 return false;
2853
2854 uint64_t AndMask = 0;
2855 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2856 return false;
2857
2858 Opd0 = N->getOperand(0).getOperand(0);
2859
2860 uint64_t SrlImm = 0;
2861 if (!isIntImmediate(N->getOperand(1), SrlImm))
2862 return false;
2863
2864 // Check whether we really have several bits extract here.
2865 if (!isMask_64(AndMask >> SrlImm))
2866 return false;
2867
2868 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2869 LSB = SrlImm;
2870 MSB = llvm::Log2_64(AndMask);
2871 return true;
2872}
2873
2874static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2875 unsigned &Immr, unsigned &Imms,
2876 bool BiggerPattern) {
2877 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2878 "N must be a SHR/SRA operation to call this function");
2879
2880 EVT VT = N->getValueType(0);
2881
2882 // Here we can test the type of VT and return false when the type does not
2883 // match, but since it is done prior to that call in the current context
2884 // we turned that into an assert to avoid redundant code.
2885 assert((VT == MVT::i32 || VT == MVT::i64) &&
2886 "Type checking must have been done before calling this function");
2887
2888 // Check for AND + SRL doing several bits extract.
2889 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2890 return true;
2891
2892 // We're looking for a shift of a shift.
2893 uint64_t ShlImm = 0;
2894 uint64_t TruncBits = 0;
2895 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2896 Opd0 = N->getOperand(0).getOperand(0);
2897 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2898 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2899 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2900 // be considered as setting high 32 bits as zero. Our strategy here is to
2901 // always generate 64bit UBFM. This consistency will help the CSE pass
2902 // later find more redundancy.
2903 Opd0 = N->getOperand(0).getOperand(0);
2904 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2905 VT = Opd0.getValueType();
2906 assert(VT == MVT::i64 && "the promoted type should be i64");
2907 } else if (BiggerPattern) {
2908 // Let's pretend a 0 shift left has been performed.
2909 // FIXME: Currently we limit this to the bigger pattern case,
2910 // because some optimizations expect AND and not UBFM
2911 Opd0 = N->getOperand(0);
2912 } else
2913 return false;
2914
2915 // Missing combines/constant folding may have left us with strange
2916 // constants.
2917 if (ShlImm >= VT.getSizeInBits()) {
2918 LLVM_DEBUG(
2919 (dbgs() << N
2920 << ": Found large shift immediate, this should not happen\n"));
2921 return false;
2922 }
2923
2924 uint64_t SrlImm = 0;
2925 if (!isIntImmediate(N->getOperand(1), SrlImm))
2926 return false;
2927
2928 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2929 "bad amount in shift node!");
2930 int immr = SrlImm - ShlImm;
2931 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2932 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2933 // SRA requires a signed extraction
2934 if (VT == MVT::i32)
2935 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2936 else
2937 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2938 return true;
2939}
2940
2941bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2942 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2943
2944 EVT VT = N->getValueType(0);
2945 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2946 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2947 return false;
2948
2949 uint64_t ShiftImm;
2950 SDValue Op = N->getOperand(0);
2951 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2952 return false;
2953
2954 SDLoc dl(N);
2955 // Extend the incoming operand of the shift to 64-bits.
2956 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2957 unsigned Immr = ShiftImm;
2958 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2959 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2960 CurDAG->getTargetConstant(Imms, dl, VT)};
2961 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2962 return true;
2963}
2964
2965static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2966 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2967 unsigned NumberOfIgnoredLowBits = 0,
2968 bool BiggerPattern = false) {
2969 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2970 return false;
2971
2972 switch (N->getOpcode()) {
2973 default:
2974 if (!N->isMachineOpcode())
2975 return false;
2976 break;
2977 case ISD::AND:
2978 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2979 NumberOfIgnoredLowBits, BiggerPattern);
2980 case ISD::SRL:
2981 case ISD::SRA:
2982 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2983
2985 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2986 }
2987
2988 unsigned NOpc = N->getMachineOpcode();
2989 switch (NOpc) {
2990 default:
2991 return false;
2992 case AArch64::SBFMWri:
2993 case AArch64::UBFMWri:
2994 case AArch64::SBFMXri:
2995 case AArch64::UBFMXri:
2996 Opc = NOpc;
2997 Opd0 = N->getOperand(0);
2998 Immr = N->getConstantOperandVal(1);
2999 Imms = N->getConstantOperandVal(2);
3000 return true;
3001 }
3002 // Unreachable
3003 return false;
3004}
3005
3006bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
3007 unsigned Opc, Immr, Imms;
3008 SDValue Opd0;
3009 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
3010 return false;
3011
3012 EVT VT = N->getValueType(0);
3013 SDLoc dl(N);
3014
3015 // If the bit extract operation is 64bit but the original type is 32bit, we
3016 // need to add one EXTRACT_SUBREG.
3017 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
3018 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
3019 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
3020
3021 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
3022 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
3023 MVT::i32, SDValue(BFM, 0));
3024 ReplaceNode(N, Inner.getNode());
3025 return true;
3026 }
3027
3028 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
3029 CurDAG->getTargetConstant(Imms, dl, VT)};
3030 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3031 return true;
3032}
3033
3034/// Does DstMask form a complementary pair with the mask provided by
3035/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
3036/// this asks whether DstMask zeroes precisely those bits that will be set by
3037/// the other half.
3038static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
3039 unsigned NumberOfIgnoredHighBits, EVT VT) {
3040 assert((VT == MVT::i32 || VT == MVT::i64) &&
3041 "i32 or i64 mask type expected!");
3042 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
3043
3044 // Enable implicitTrunc as we're intentionally ignoring high bits.
3045 APInt SignificantDstMask =
3046 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
3047 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
3048
3049 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
3050 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
3051}
3052
3053// Look for bits that will be useful for later uses.
3054// A bit is consider useless as soon as it is dropped and never used
3055// before it as been dropped.
3056// E.g., looking for useful bit of x
3057// 1. y = x & 0x7
3058// 2. z = y >> 2
3059// After #1, x useful bits are 0x7, then the useful bits of x, live through
3060// y.
3061// After #2, the useful bits of x are 0x4.
3062// However, if x is used on an unpredictable instruction, then all its bits
3063// are useful.
3064// E.g.
3065// 1. y = x & 0x7
3066// 2. z = y >> 2
3067// 3. str x, [@x]
3068static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
3069
3071 unsigned Depth) {
3072 uint64_t Imm =
3073 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
3074 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
3075 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
3076 getUsefulBits(Op, UsefulBits, Depth + 1);
3077}
3078
3080 uint64_t Imm, uint64_t MSB,
3081 unsigned Depth) {
3082 // inherit the bitwidth value
3083 APInt OpUsefulBits(UsefulBits);
3084 OpUsefulBits = 1;
3085
3086 if (MSB >= Imm) {
3087 OpUsefulBits <<= MSB - Imm + 1;
3088 --OpUsefulBits;
3089 // The interesting part will be in the lower part of the result
3090 getUsefulBits(Op, OpUsefulBits, Depth + 1);
3091 // The interesting part was starting at Imm in the argument
3092 OpUsefulBits <<= Imm;
3093 } else {
3094 OpUsefulBits <<= MSB + 1;
3095 --OpUsefulBits;
3096 // The interesting part will be shifted in the result
3097 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
3098 getUsefulBits(Op, OpUsefulBits, Depth + 1);
3099 // The interesting part was at zero in the argument
3100 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
3101 }
3102
3103 UsefulBits &= OpUsefulBits;
3104}
3105
3106static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
3107 unsigned Depth) {
3108 uint64_t Imm =
3109 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
3110 uint64_t MSB =
3111 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3112
3113 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
3114}
3115
3117 unsigned Depth) {
3118 uint64_t ShiftTypeAndValue =
3119 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3120 APInt Mask(UsefulBits);
3121 Mask.clearAllBits();
3122 Mask.flipAllBits();
3123
3124 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
3125 // Shift Left
3126 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3127 Mask <<= ShiftAmt;
3128 getUsefulBits(Op, Mask, Depth + 1);
3129 Mask.lshrInPlace(ShiftAmt);
3130 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
3131 // Shift Right
3132 // We do not handle AArch64_AM::ASR, because the sign will change the
3133 // number of useful bits
3134 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3135 Mask.lshrInPlace(ShiftAmt);
3136 getUsefulBits(Op, Mask, Depth + 1);
3137 Mask <<= ShiftAmt;
3138 } else
3139 return;
3140
3141 UsefulBits &= Mask;
3142}
3143
3144static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3145 unsigned Depth) {
3146 uint64_t Imm =
3147 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3148 uint64_t MSB =
3149 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3150
3151 APInt OpUsefulBits(UsefulBits);
3152 OpUsefulBits = 1;
3153
3154 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3155 ResultUsefulBits.flipAllBits();
3156 APInt Mask(UsefulBits.getBitWidth(), 0);
3157
3158 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3159
3160 if (MSB >= Imm) {
3161 // The instruction is a BFXIL.
3162 uint64_t Width = MSB - Imm + 1;
3163 uint64_t LSB = Imm;
3164
3165 OpUsefulBits <<= Width;
3166 --OpUsefulBits;
3167
3168 if (Op.getOperand(1) == Orig) {
3169 // Copy the low bits from the result to bits starting from LSB.
3170 Mask = ResultUsefulBits & OpUsefulBits;
3171 Mask <<= LSB;
3172 }
3173
3174 if (Op.getOperand(0) == Orig)
3175 // Bits starting from LSB in the input contribute to the result.
3176 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3177 } else {
3178 // The instruction is a BFI.
3179 uint64_t Width = MSB + 1;
3180 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3181
3182 OpUsefulBits <<= Width;
3183 --OpUsefulBits;
3184 OpUsefulBits <<= LSB;
3185
3186 if (Op.getOperand(1) == Orig) {
3187 // Copy the bits from the result to the zero bits.
3188 Mask = ResultUsefulBits & OpUsefulBits;
3189 Mask.lshrInPlace(LSB);
3190 }
3191
3192 if (Op.getOperand(0) == Orig)
3193 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3194 }
3195
3196 UsefulBits &= Mask;
3197}
3198
3199static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3200 SDValue Orig, unsigned Depth) {
3201
3202 // Users of this node should have already been instruction selected
3203 // FIXME: Can we turn that into an assert?
3204 if (!UserNode->isMachineOpcode())
3205 return;
3206
3207 switch (UserNode->getMachineOpcode()) {
3208 default:
3209 return;
3210 case AArch64::ANDSWri:
3211 case AArch64::ANDSXri:
3212 case AArch64::ANDWri:
3213 case AArch64::ANDXri:
3214 // We increment Depth only when we call the getUsefulBits
3215 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3216 Depth);
3217 case AArch64::UBFMWri:
3218 case AArch64::UBFMXri:
3219 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3220
3221 case AArch64::ORRWrs:
3222 case AArch64::ORRXrs:
3223 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3224 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3225 Depth);
3226 return;
3227 case AArch64::BFMWri:
3228 case AArch64::BFMXri:
3229 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3230
3231 case AArch64::STRBBui:
3232 case AArch64::STURBBi:
3233 if (UserNode->getOperand(0) != Orig)
3234 return;
3235 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3236 return;
3237
3238 case AArch64::STRHHui:
3239 case AArch64::STURHHi:
3240 if (UserNode->getOperand(0) != Orig)
3241 return;
3242 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3243 return;
3244 }
3245}
3246
3247static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3249 return;
3250 // Initialize UsefulBits
3251 if (!Depth) {
3252 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3253 // At the beginning, assume every produced bits is useful
3254 UsefulBits = APInt(Bitwidth, 0);
3255 UsefulBits.flipAllBits();
3256 }
3257 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3258
3259 for (SDNode *Node : Op.getNode()->users()) {
3260 // A use cannot produce useful bits
3261 APInt UsefulBitsForUse = APInt(UsefulBits);
3262 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3263 UsersUsefulBits |= UsefulBitsForUse;
3264 }
3265 // UsefulBits contains the produced bits that are meaningful for the
3266 // current definition, thus a user cannot make a bit meaningful at
3267 // this point
3268 UsefulBits &= UsersUsefulBits;
3269}
3270
3271/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3272/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3273/// 0, return Op unchanged.
3274static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3275 if (ShlAmount == 0)
3276 return Op;
3277
3278 EVT VT = Op.getValueType();
3279 SDLoc dl(Op);
3280 unsigned BitWidth = VT.getSizeInBits();
3281 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3282
3283 SDNode *ShiftNode;
3284 if (ShlAmount > 0) {
3285 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3286 ShiftNode = CurDAG->getMachineNode(
3287 UBFMOpc, dl, VT, Op,
3288 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3289 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3290 } else {
3291 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3292 assert(ShlAmount < 0 && "expected right shift");
3293 int ShrAmount = -ShlAmount;
3294 ShiftNode = CurDAG->getMachineNode(
3295 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3296 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3297 }
3298
3299 return SDValue(ShiftNode, 0);
3300}
3301
3302// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3303static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3304 bool BiggerPattern,
3305 const uint64_t NonZeroBits,
3306 SDValue &Src, int &DstLSB,
3307 int &Width);
3308
3309// For bit-field-positioning pattern "shl VAL, N)".
3310static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3311 bool BiggerPattern,
3312 const uint64_t NonZeroBits,
3313 SDValue &Src, int &DstLSB,
3314 int &Width);
3315
3316/// Does this tree qualify as an attempt to move a bitfield into position,
3317/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3319 bool BiggerPattern, SDValue &Src,
3320 int &DstLSB, int &Width) {
3321 EVT VT = Op.getValueType();
3322 unsigned BitWidth = VT.getSizeInBits();
3323 (void)BitWidth;
3324 assert(BitWidth == 32 || BitWidth == 64);
3325
3326 KnownBits Known = CurDAG->computeKnownBits(Op);
3327
3328 // Non-zero in the sense that they're not provably zero, which is the key
3329 // point if we want to use this value
3330 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3331 if (!isShiftedMask_64(NonZeroBits))
3332 return false;
3333
3334 switch (Op.getOpcode()) {
3335 default:
3336 break;
3337 case ISD::AND:
3338 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3339 NonZeroBits, Src, DstLSB, Width);
3340 case ISD::SHL:
3341 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3342 NonZeroBits, Src, DstLSB, Width);
3343 }
3344
3345 return false;
3346}
3347
3349 bool BiggerPattern,
3350 const uint64_t NonZeroBits,
3351 SDValue &Src, int &DstLSB,
3352 int &Width) {
3353 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3354
3355 EVT VT = Op.getValueType();
3356 assert((VT == MVT::i32 || VT == MVT::i64) &&
3357 "Caller guarantees VT is one of i32 or i64");
3358 (void)VT;
3359
3360 uint64_t AndImm;
3361 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3362 return false;
3363
3364 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3365 // 1) (AndImm & (1 << POS) == 0)
3366 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3367 //
3368 // 1) and 2) don't agree so something must be wrong (e.g., in
3369 // 'SelectionDAG::computeKnownBits')
3370 assert((~AndImm & NonZeroBits) == 0 &&
3371 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3372
3373 SDValue AndOp0 = Op.getOperand(0);
3374
3375 uint64_t ShlImm;
3376 SDValue ShlOp0;
3377 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3378 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3379 ShlOp0 = AndOp0.getOperand(0);
3380 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3382 ShlImm)) {
3383 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3384
3385 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3386 SDValue ShlVal = AndOp0.getOperand(0);
3387
3388 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3389 // expect VT to be MVT::i32.
3390 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3391
3392 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3393 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3394 } else
3395 return false;
3396
3397 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3398 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3399 // AndOp0+AND.
3400 if (!BiggerPattern && !AndOp0.hasOneUse())
3401 return false;
3402
3403 DstLSB = llvm::countr_zero(NonZeroBits);
3404 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3405
3406 // Bail out on large Width. This happens when no proper combining / constant
3407 // folding was performed.
3408 if (Width >= (int)VT.getSizeInBits()) {
3409 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3410 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3411 // "val".
3412 // If VT is i32, what Width >= 32 means:
3413 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3414 // demands at least 'Width' bits (after dag-combiner). This together with
3415 // `any_extend` Op (undefined higher bits) indicates missed combination
3416 // when lowering the 'and' IR instruction to an machine IR instruction.
3417 LLVM_DEBUG(
3418 dbgs()
3419 << "Found large Width in bit-field-positioning -- this indicates no "
3420 "proper combining / constant folding was performed\n");
3421 return false;
3422 }
3423
3424 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3425 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3426 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3427 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3428 // which case it is not profitable to insert an extra shift.
3429 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3430 return false;
3431
3432 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3433 return true;
3434}
3435
3436// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3437// UBFIZ.
3439 SDValue &Src, int &DstLSB,
3440 int &Width) {
3441 // Caller should have verified that N is a left shift with constant shift
3442 // amount; asserts that.
3443 assert(Op.getOpcode() == ISD::SHL &&
3444 "Op.getNode() should be a SHL node to call this function");
3445 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3446 "Op.getNode() should shift ShlImm to call this function");
3447
3448 uint64_t AndImm = 0;
3449 SDValue Op0 = Op.getOperand(0);
3450 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3451 return false;
3452
3453 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3454 if (isMask_64(ShiftedAndImm)) {
3455 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3456 // should end with Mask, and could be prefixed with random bits if those
3457 // bits are shifted out.
3458 //
3459 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3460 // the AND result corresponding to those bits are shifted out, so it's fine
3461 // to not extract them.
3462 Width = llvm::countr_one(ShiftedAndImm);
3463 DstLSB = ShlImm;
3464 Src = Op0.getOperand(0);
3465 return true;
3466 }
3467 return false;
3468}
3469
3471 bool BiggerPattern,
3472 const uint64_t NonZeroBits,
3473 SDValue &Src, int &DstLSB,
3474 int &Width) {
3475 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3476
3477 EVT VT = Op.getValueType();
3478 assert((VT == MVT::i32 || VT == MVT::i64) &&
3479 "Caller guarantees that type is i32 or i64");
3480 (void)VT;
3481
3482 uint64_t ShlImm;
3483 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3484 return false;
3485
3486 if (!BiggerPattern && !Op.hasOneUse())
3487 return false;
3488
3489 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3490 return true;
3491
3492 DstLSB = llvm::countr_zero(NonZeroBits);
3493 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3494
3495 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3496 return false;
3497
3498 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3499 return true;
3500}
3501
3502static bool isShiftedMask(uint64_t Mask, EVT VT) {
3503 assert(VT == MVT::i32 || VT == MVT::i64);
3504 if (VT == MVT::i32)
3505 return isShiftedMask_32(Mask);
3506 return isShiftedMask_64(Mask);
3507}
3508
3509// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3510// inserted only sets known zero bits.
3512 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3513
3514 EVT VT = N->getValueType(0);
3515 if (VT != MVT::i32 && VT != MVT::i64)
3516 return false;
3517
3518 unsigned BitWidth = VT.getSizeInBits();
3519
3520 uint64_t OrImm;
3521 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3522 return false;
3523
3524 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3525 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3526 // performance neutral.
3528 return false;
3529
3530 uint64_t MaskImm;
3531 SDValue And = N->getOperand(0);
3532 // Must be a single use AND with an immediate operand.
3533 if (!And.hasOneUse() ||
3534 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3535 return false;
3536
3537 // Compute the Known Zero for the AND as this allows us to catch more general
3538 // cases than just looking for AND with imm.
3539 KnownBits Known = CurDAG->computeKnownBits(And);
3540
3541 // Non-zero in the sense that they're not provably zero, which is the key
3542 // point if we want to use this value.
3543 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3544
3545 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3546 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3547 return false;
3548
3549 // The bits being inserted must only set those bits that are known to be zero.
3550 if ((OrImm & NotKnownZero) != 0) {
3551 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3552 // currently handle this case.
3553 return false;
3554 }
3555
3556 // BFI/BFXIL dst, src, #lsb, #width.
3557 int LSB = llvm::countr_one(NotKnownZero);
3558 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3559
3560 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3561 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3562 unsigned ImmS = Width - 1;
3563
3564 // If we're creating a BFI instruction avoid cases where we need more
3565 // instructions to materialize the BFI constant as compared to the original
3566 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3567 // should be no worse in this case.
3568 bool IsBFI = LSB != 0;
3569 uint64_t BFIImm = OrImm >> LSB;
3570 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3571 // We have a BFI instruction and we know the constant can't be materialized
3572 // with a ORR-immediate with the zero register.
3573 unsigned OrChunks = 0, BFIChunks = 0;
3574 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3575 if (((OrImm >> Shift) & 0xFFFF) != 0)
3576 ++OrChunks;
3577 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3578 ++BFIChunks;
3579 }
3580 if (BFIChunks > OrChunks)
3581 return false;
3582 }
3583
3584 // Materialize the constant to be inserted.
3585 SDLoc DL(N);
3586 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3587 SDNode *MOVI = CurDAG->getMachineNode(
3588 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3589
3590 // Create the BFI/BFXIL instruction.
3591 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3592 CurDAG->getTargetConstant(ImmR, DL, VT),
3593 CurDAG->getTargetConstant(ImmS, DL, VT)};
3594 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3595 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3596 return true;
3597}
3598
3600 SDValue &ShiftedOperand,
3601 uint64_t &EncodedShiftImm) {
3602 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3603 if (!Dst.hasOneUse())
3604 return false;
3605
3606 EVT VT = Dst.getValueType();
3607 assert((VT == MVT::i32 || VT == MVT::i64) &&
3608 "Caller should guarantee that VT is one of i32 or i64");
3609 const unsigned SizeInBits = VT.getSizeInBits();
3610
3611 SDLoc DL(Dst.getNode());
3612 uint64_t AndImm, ShlImm;
3613 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3614 isShiftedMask_64(AndImm)) {
3615 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3616 SDValue DstOp0 = Dst.getOperand(0);
3617 if (!DstOp0.hasOneUse())
3618 return false;
3619
3620 // An example to illustrate the transformation
3621 // From:
3622 // lsr x8, x1, #1
3623 // and x8, x8, #0x3f80
3624 // bfxil x8, x1, #0, #7
3625 // To:
3626 // and x8, x23, #0x7f
3627 // ubfx x9, x23, #8, #7
3628 // orr x23, x8, x9, lsl #7
3629 //
3630 // The number of instructions remains the same, but ORR is faster than BFXIL
3631 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3632 // the dependency chain is improved after the transformation.
3633 uint64_t SrlImm;
3634 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3635 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3636 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3637 unsigned MaskWidth =
3638 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3639 unsigned UBFMOpc =
3640 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3641 SDNode *UBFMNode = CurDAG->getMachineNode(
3642 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3643 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3644 VT),
3645 CurDAG->getTargetConstant(
3646 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3647 ShiftedOperand = SDValue(UBFMNode, 0);
3648 EncodedShiftImm = AArch64_AM::getShifterImm(
3649 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3650 return true;
3651 }
3652 }
3653 return false;
3654 }
3655
3656 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3657 ShiftedOperand = Dst.getOperand(0);
3658 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3659 return true;
3660 }
3661
3662 uint64_t SrlImm;
3663 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3664 ShiftedOperand = Dst.getOperand(0);
3665 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3666 return true;
3667 }
3668 return false;
3669}
3670
3671// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3672// the operands and select it to AArch64::ORR with shifted registers if
3673// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3674static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3675 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3676 const bool BiggerPattern) {
3677 EVT VT = N->getValueType(0);
3678 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3679 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3680 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3681 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3682 assert((VT == MVT::i32 || VT == MVT::i64) &&
3683 "Expect result type to be i32 or i64 since N is combinable to BFM");
3684 SDLoc DL(N);
3685
3686 // Bail out if BFM simplifies away one node in BFM Dst.
3687 if (OrOpd1 != Dst)
3688 return false;
3689
3690 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3691 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3692 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3693 if (BiggerPattern) {
3694 uint64_t SrcAndImm;
3695 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3696 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3697 // OrOpd0 = AND Src, #Mask
3698 // So BFM simplifies away one AND node from Src and doesn't simplify away
3699 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3700 // one node (from Rd), ORR is better since it has higher throughput and
3701 // smaller latency than BFM on many AArch64 processors (and for the rest
3702 // ORR is at least as good as BFM).
3703 SDValue ShiftedOperand;
3704 uint64_t EncodedShiftImm;
3705 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3706 EncodedShiftImm)) {
3707 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3708 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3709 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3710 return true;
3711 }
3712 }
3713 return false;
3714 }
3715
3716 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3717
3718 uint64_t ShlImm;
3719 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3720 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3721 SDValue Ops[] = {
3722 Dst, Src,
3723 CurDAG->getTargetConstant(
3725 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3726 return true;
3727 }
3728
3729 // Select the following pattern to left-shifted operand rather than BFI.
3730 // %val1 = op ..
3731 // %val2 = shl %val1, #imm
3732 // %res = or %val1, %val2
3733 //
3734 // If N is selected to be BFI, we know that
3735 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3736 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3737 //
3738 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3739 if (OrOpd0.getOperand(0) == OrOpd1) {
3740 SDValue Ops[] = {
3741 OrOpd1, OrOpd1,
3742 CurDAG->getTargetConstant(
3744 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3745 return true;
3746 }
3747 }
3748
3749 uint64_t SrlImm;
3750 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3751 // Select the following pattern to right-shifted operand rather than BFXIL.
3752 // %val1 = op ..
3753 // %val2 = lshr %val1, #imm
3754 // %res = or %val1, %val2
3755 //
3756 // If N is selected to be BFXIL, we know that
3757 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3758 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3759 //
3760 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3761 if (OrOpd0.getOperand(0) == OrOpd1) {
3762 SDValue Ops[] = {
3763 OrOpd1, OrOpd1,
3764 CurDAG->getTargetConstant(
3766 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3767 return true;
3768 }
3769 }
3770
3771 return false;
3772}
3773
3774static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3775 SelectionDAG *CurDAG) {
3776 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3777
3778 EVT VT = N->getValueType(0);
3779 if (VT != MVT::i32 && VT != MVT::i64)
3780 return false;
3781
3782 unsigned BitWidth = VT.getSizeInBits();
3783
3784 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3785 // have the expected shape. Try to undo that.
3786
3787 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3788 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3789
3790 // Given a OR operation, check if we have the following pattern
3791 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3792 // isBitfieldExtractOp)
3793 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3794 // countTrailingZeros(mask2) == imm2 - imm + 1
3795 // f = d | c
3796 // if yes, replace the OR instruction with:
3797 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3798
3799 // OR is commutative, check all combinations of operand order and values of
3800 // BiggerPattern, i.e.
3801 // Opd0, Opd1, BiggerPattern=false
3802 // Opd1, Opd0, BiggerPattern=false
3803 // Opd0, Opd1, BiggerPattern=true
3804 // Opd1, Opd0, BiggerPattern=true
3805 // Several of these combinations may match, so check with BiggerPattern=false
3806 // first since that will produce better results by matching more instructions
3807 // and/or inserting fewer extra instructions.
3808 for (int I = 0; I < 4; ++I) {
3809
3810 SDValue Dst, Src;
3811 unsigned ImmR, ImmS;
3812 bool BiggerPattern = I / 2;
3813 SDValue OrOpd0Val = N->getOperand(I % 2);
3814 SDNode *OrOpd0 = OrOpd0Val.getNode();
3815 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3816 SDNode *OrOpd1 = OrOpd1Val.getNode();
3817
3818 unsigned BFXOpc;
3819 int DstLSB, Width;
3820 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3821 NumberOfIgnoredLowBits, BiggerPattern)) {
3822 // Check that the returned opcode is compatible with the pattern,
3823 // i.e., same type and zero extended (U and not S)
3824 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3825 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3826 continue;
3827
3828 // Compute the width of the bitfield insertion
3829 DstLSB = 0;
3830 Width = ImmS - ImmR + 1;
3831 // FIXME: This constraint is to catch bitfield insertion we may
3832 // want to widen the pattern if we want to grab general bitfield
3833 // move case
3834 if (Width <= 0)
3835 continue;
3836
3837 // If the mask on the insertee is correct, we have a BFXIL operation. We
3838 // can share the ImmR and ImmS values from the already-computed UBFM.
3839 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3840 BiggerPattern,
3841 Src, DstLSB, Width)) {
3842 ImmR = (BitWidth - DstLSB) % BitWidth;
3843 ImmS = Width - 1;
3844 } else
3845 continue;
3846
3847 // Check the second part of the pattern
3848 EVT VT = OrOpd1Val.getValueType();
3849 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3850
3851 // Compute the Known Zero for the candidate of the first operand.
3852 // This allows to catch more general case than just looking for
3853 // AND with imm. Indeed, simplify-demanded-bits may have removed
3854 // the AND instruction because it proves it was useless.
3855 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3856
3857 // Check if there is enough room for the second operand to appear
3858 // in the first one
3859 APInt BitsToBeInserted =
3860 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3861
3862 if ((BitsToBeInserted & ~Known.Zero) != 0)
3863 continue;
3864
3865 // Set the first operand
3866 uint64_t Imm;
3867 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3868 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3869 // In that case, we can eliminate the AND
3870 Dst = OrOpd1->getOperand(0);
3871 else
3872 // Maybe the AND has been removed by simplify-demanded-bits
3873 // or is useful because it discards more bits
3874 Dst = OrOpd1Val;
3875
3876 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3877 // with shifted operand is more efficient.
3878 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3879 BiggerPattern))
3880 return true;
3881
3882 // both parts match
3883 SDLoc DL(N);
3884 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3885 CurDAG->getTargetConstant(ImmS, DL, VT)};
3886 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3887 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3888 return true;
3889 }
3890
3891 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3892 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3893 // mask (e.g., 0x000ffff0).
3894 uint64_t Mask0Imm, Mask1Imm;
3895 SDValue And0 = N->getOperand(0);
3896 SDValue And1 = N->getOperand(1);
3897 if (And0.hasOneUse() && And1.hasOneUse() &&
3898 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3899 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3900 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3901 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3902
3903 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3904 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3905 // bits to be inserted.
3906 if (isShiftedMask(Mask0Imm, VT)) {
3907 std::swap(And0, And1);
3908 std::swap(Mask0Imm, Mask1Imm);
3909 }
3910
3911 SDValue Src = And1->getOperand(0);
3912 SDValue Dst = And0->getOperand(0);
3913 unsigned LSB = llvm::countr_zero(Mask1Imm);
3914 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3915
3916 // The BFXIL inserts the low-order bits from a source register, so right
3917 // shift the needed bits into place.
3918 SDLoc DL(N);
3919 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3920 uint64_t LsrImm = LSB;
3921 if (Src->hasOneUse() &&
3922 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3923 (LsrImm + LSB) < BitWidth) {
3924 Src = Src->getOperand(0);
3925 LsrImm += LSB;
3926 }
3927
3928 SDNode *LSR = CurDAG->getMachineNode(
3929 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3930 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3931
3932 // BFXIL is an alias of BFM, so translate to BFM operands.
3933 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3934 unsigned ImmS = Width - 1;
3935
3936 // Create the BFXIL instruction.
3937 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3938 CurDAG->getTargetConstant(ImmR, DL, VT),
3939 CurDAG->getTargetConstant(ImmS, DL, VT)};
3940 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3941 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3942 return true;
3943 }
3944
3945 return false;
3946}
3947
3948bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3949 if (N->getOpcode() != ISD::OR)
3950 return false;
3951
3952 APInt NUsefulBits;
3953 getUsefulBits(SDValue(N, 0), NUsefulBits);
3954
3955 // If all bits are not useful, just return UNDEF.
3956 if (!NUsefulBits) {
3957 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3958 return true;
3959 }
3960
3961 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3962 return true;
3963
3964 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3965}
3966
3967/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3968/// equivalent of a left shift by a constant amount followed by an and masking
3969/// out a contiguous set of bits.
3970bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3971 if (N->getOpcode() != ISD::AND)
3972 return false;
3973
3974 EVT VT = N->getValueType(0);
3975 if (VT != MVT::i32 && VT != MVT::i64)
3976 return false;
3977
3978 SDValue Op0;
3979 int DstLSB, Width;
3980 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3981 Op0, DstLSB, Width))
3982 return false;
3983
3984 // ImmR is the rotate right amount.
3985 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3986 // ImmS is the most significant bit of the source to be moved.
3987 unsigned ImmS = Width - 1;
3988
3989 SDLoc DL(N);
3990 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3991 CurDAG->getTargetConstant(ImmS, DL, VT)};
3992 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3993 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3994 return true;
3995}
3996
3997/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3998/// variable shift/rotate instructions.
3999bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
4000 EVT VT = N->getValueType(0);
4001
4002 unsigned Opc;
4003 switch (N->getOpcode()) {
4004 case ISD::ROTR:
4005 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
4006 break;
4007 case ISD::SHL:
4008 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
4009 break;
4010 case ISD::SRL:
4011 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
4012 break;
4013 case ISD::SRA:
4014 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
4015 break;
4016 default:
4017 return false;
4018 }
4019
4020 uint64_t Size;
4021 uint64_t Bits;
4022 if (VT == MVT::i32) {
4023 Bits = 5;
4024 Size = 32;
4025 } else if (VT == MVT::i64) {
4026 Bits = 6;
4027 Size = 64;
4028 } else
4029 return false;
4030
4031 SDValue ShiftAmt = N->getOperand(1);
4032 SDLoc DL(N);
4033 SDValue NewShiftAmt;
4034
4035 // Skip over an extend of the shift amount.
4036 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
4037 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
4038 ShiftAmt = ShiftAmt->getOperand(0);
4039
4040 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
4041 SDValue Add0 = ShiftAmt->getOperand(0);
4042 SDValue Add1 = ShiftAmt->getOperand(1);
4043 uint64_t Add0Imm;
4044 uint64_t Add1Imm;
4045 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
4046 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
4047 // to avoid the ADD/SUB.
4048 NewShiftAmt = Add0;
4049 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
4050 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
4051 (Add0Imm % Size == 0)) {
4052 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
4053 // to generate a NEG instead of a SUB from a constant.
4054 unsigned NegOpc;
4055 unsigned ZeroReg;
4056 EVT SubVT = ShiftAmt->getValueType(0);
4057 if (SubVT == MVT::i32) {
4058 NegOpc = AArch64::SUBWrr;
4059 ZeroReg = AArch64::WZR;
4060 } else {
4061 assert(SubVT == MVT::i64);
4062 NegOpc = AArch64::SUBXrr;
4063 ZeroReg = AArch64::XZR;
4064 }
4065 SDValue Zero =
4066 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
4067 MachineSDNode *Neg =
4068 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
4069 NewShiftAmt = SDValue(Neg, 0);
4070 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
4071 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
4072 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
4073 // to generate a NOT instead of a SUB from a constant.
4074 unsigned NotOpc;
4075 unsigned ZeroReg;
4076 EVT SubVT = ShiftAmt->getValueType(0);
4077 if (SubVT == MVT::i32) {
4078 NotOpc = AArch64::ORNWrr;
4079 ZeroReg = AArch64::WZR;
4080 } else {
4081 assert(SubVT == MVT::i64);
4082 NotOpc = AArch64::ORNXrr;
4083 ZeroReg = AArch64::XZR;
4084 }
4085 SDValue Zero =
4086 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
4087 MachineSDNode *Not =
4088 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
4089 NewShiftAmt = SDValue(Not, 0);
4090 } else
4091 return false;
4092 } else {
4093 // If the shift amount is masked with an AND, check that the mask covers the
4094 // bits that are implicitly ANDed off by the above opcodes and if so, skip
4095 // the AND.
4096 uint64_t MaskImm;
4097 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
4098 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
4099 return false;
4100
4101 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
4102 return false;
4103
4104 NewShiftAmt = ShiftAmt->getOperand(0);
4105 }
4106
4107 // Narrow/widen the shift amount to match the size of the shift operation.
4108 if (VT == MVT::i32)
4109 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
4110 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
4111 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
4112 MachineSDNode *Ext = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT,
4113 NewShiftAmt, SubReg);
4114 NewShiftAmt = SDValue(Ext, 0);
4115 }
4116
4117 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
4118 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4119 return true;
4120}
4121
4123 SDValue &FixedPos,
4124 unsigned RegWidth,
4125 bool isReciprocal) {
4126 APFloat FVal(0.0);
4128 FVal = CN->getValueAPF();
4129 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
4130 // Some otherwise illegal constants are allowed in this case.
4131 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
4132 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
4133 return false;
4134
4135 ConstantPoolSDNode *CN =
4136 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
4137 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
4138 } else
4139 return false;
4140
4141 if (unsigned FBits =
4142 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4143 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4144 return true;
4145 }
4146
4147 return false;
4148}
4149
4150bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4151 unsigned RegWidth) {
4152 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4153 /*isReciprocal*/ false);
4154}
4155
4156bool AArch64DAGToDAGISel::SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos,
4157 unsigned RegWidth) {
4158 if ((N.getOpcode() == AArch64ISD::NVCAST || N.getOpcode() == ISD::BITCAST) &&
4159 N.getValueType().getScalarSizeInBits() ==
4160 N.getOperand(0).getValueType().getScalarSizeInBits())
4161 N = N.getOperand(0);
4162
4163 auto ImmToFloat = [RegWidth](APInt Imm) {
4164 switch (RegWidth) {
4165 case 16:
4166 return APFloat(APFloat::IEEEhalf(), Imm);
4167 case 32:
4168 return APFloat(APFloat::IEEEsingle(), Imm);
4169 case 64:
4170 return APFloat(APFloat::IEEEdouble(), Imm);
4171 default:
4172 llvm_unreachable("Unexpected RegWidth!");
4173 };
4174 };
4175
4176 APFloat FVal(0.0);
4177 switch (N->getOpcode()) {
4178 case AArch64ISD::MOVIshift:
4179 FVal = ImmToFloat(APInt(RegWidth, N.getConstantOperandVal(0)
4180 << N.getConstantOperandVal(1)));
4181 break;
4182 case AArch64ISD::FMOV:
4183 FVal = ImmToFloat(DecodeFMOVImm(N.getConstantOperandVal(0), RegWidth));
4184 break;
4185 case AArch64ISD::DUP:
4186 if (isa<ConstantSDNode>(N.getOperand(0)))
4187 FVal = ImmToFloat(N.getConstantOperandAPInt(0).trunc(RegWidth));
4188 else
4189 return false;
4190 break;
4191 default:
4192 return false;
4193 }
4194
4195 if (unsigned FBits = CheckFixedPointOperandConstant(FVal, RegWidth,
4196 /*isReciprocal*/ false)) {
4197 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4198 return true;
4199 }
4200
4201 return false;
4202}
4203
4204bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4205 SDValue &FixedPos,
4206 unsigned RegWidth) {
4207 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4208 /*isReciprocal*/ true);
4209}
4210
4211// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4212// of the string and obtains the integer values from them and combines these
4213// into a single value to be used in the MRS/MSR instruction.
4216 RegString.split(Fields, ':');
4217
4218 if (Fields.size() == 1)
4219 return -1;
4220
4221 assert(Fields.size() == 5
4222 && "Invalid number of fields in read register string");
4223
4225 bool AllIntFields = true;
4226
4227 for (StringRef Field : Fields) {
4228 unsigned IntField;
4229 AllIntFields &= !Field.getAsInteger(10, IntField);
4230 Ops.push_back(IntField);
4231 }
4232
4233 assert(AllIntFields &&
4234 "Unexpected non-integer value in special register string.");
4235 (void)AllIntFields;
4236
4237 // Need to combine the integer fields of the string into a single value
4238 // based on the bit encoding of MRS/MSR instruction.
4239 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4240 (Ops[3] << 3) | (Ops[4]);
4241}
4242
4243// Lower the read_register intrinsic to an MRS instruction node if the special
4244// register string argument is either of the form detailed in the ALCE (the
4245// form described in getIntOperandsFromRegisterString) or is a named register
4246// known by the MRS SysReg mapper.
4247bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4248 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4249 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4250 SDLoc DL(N);
4251
4252 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4253
4254 unsigned Opcode64Bit = AArch64::MRS;
4255 int Imm = getIntOperandFromRegisterString(RegString->getString());
4256 if (Imm == -1) {
4257 // No match, Use the sysreg mapper to map the remaining possible strings to
4258 // the value for the register to be used for the instruction operand.
4259 const auto *TheReg =
4260 AArch64SysReg::lookupSysRegByName(RegString->getString());
4261 if (TheReg && TheReg->Readable &&
4262 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4263 Imm = TheReg->Encoding;
4264 else
4265 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4266
4267 if (Imm == -1) {
4268 // Still no match, see if this is "pc" or give up.
4269 if (!ReadIs128Bit && RegString->getString() == "pc") {
4270 Opcode64Bit = AArch64::ADR;
4271 Imm = 0;
4272 } else {
4273 return false;
4274 }
4275 }
4276 }
4277
4278 SDValue InChain = N->getOperand(0);
4279 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4280 if (!ReadIs128Bit) {
4281 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4282 {SysRegImm, InChain});
4283 } else {
4284 SDNode *MRRS = CurDAG->getMachineNode(
4285 AArch64::MRRS, DL,
4286 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4287 {SysRegImm, InChain});
4288
4289 // Sysregs are not endian. The even register always contains the low half
4290 // of the register.
4291 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4292 SDValue(MRRS, 0));
4293 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4294 SDValue(MRRS, 0));
4295 SDValue OutChain = SDValue(MRRS, 1);
4296
4297 ReplaceUses(SDValue(N, 0), Lo);
4298 ReplaceUses(SDValue(N, 1), Hi);
4299 ReplaceUses(SDValue(N, 2), OutChain);
4300 };
4301 return true;
4302}
4303
4304// Lower the write_register intrinsic to an MSR instruction node if the special
4305// register string argument is either of the form detailed in the ALCE (the
4306// form described in getIntOperandsFromRegisterString) or is a named register
4307// known by the MSR SysReg mapper.
4308bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4309 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4310 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4311 SDLoc DL(N);
4312
4313 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4314
4315 if (!WriteIs128Bit) {
4316 // Check if the register was one of those allowed as the pstatefield value
4317 // in the MSR (immediate) instruction. To accept the values allowed in the
4318 // pstatefield for the MSR (immediate) instruction, we also require that an
4319 // immediate value has been provided as an argument, we know that this is
4320 // the case as it has been ensured by semantic checking.
4321 auto trySelectPState = [&](auto PMapper, unsigned State) {
4322 if (PMapper) {
4323 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4324 "Expected a constant integer expression.");
4325 unsigned Reg = PMapper->Encoding;
4326 uint64_t Immed = N->getConstantOperandVal(2);
4327 CurDAG->SelectNodeTo(
4328 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4329 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4330 return true;
4331 }
4332 return false;
4333 };
4334
4335 if (trySelectPState(
4336 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4337 AArch64::MSRpstateImm4))
4338 return true;
4339 if (trySelectPState(
4340 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4341 AArch64::MSRpstateImm1))
4342 return true;
4343 }
4344
4345 int Imm = getIntOperandFromRegisterString(RegString->getString());
4346 if (Imm == -1) {
4347 // Use the sysreg mapper to attempt to map the remaining possible strings
4348 // to the value for the register to be used for the MSR (register)
4349 // instruction operand.
4350 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4351 if (TheReg && TheReg->Writeable &&
4352 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4353 Imm = TheReg->Encoding;
4354 else
4355 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4356
4357 if (Imm == -1)
4358 return false;
4359 }
4360
4361 SDValue InChain = N->getOperand(0);
4362 if (!WriteIs128Bit) {
4363 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4364 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4365 N->getOperand(2), InChain);
4366 } else {
4367 // No endian swap. The lower half always goes into the even subreg, and the
4368 // higher half always into the odd supreg.
4369 SDNode *Pair = CurDAG->getMachineNode(
4370 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4371 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4372 MVT::i32),
4373 N->getOperand(2),
4374 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4375 N->getOperand(3),
4376 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4377
4378 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4379 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4380 SDValue(Pair, 0), InChain);
4381 }
4382
4383 return true;
4384}
4385
4386/// We've got special pseudo-instructions for these
4387bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4388 unsigned Opcode;
4389 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4390
4391 // Leave IR for LSE if subtarget supports it.
4392 if (Subtarget->hasLSE()) return false;
4393
4394 if (MemTy == MVT::i8)
4395 Opcode = AArch64::CMP_SWAP_8;
4396 else if (MemTy == MVT::i16)
4397 Opcode = AArch64::CMP_SWAP_16;
4398 else if (MemTy == MVT::i32)
4399 Opcode = AArch64::CMP_SWAP_32;
4400 else if (MemTy == MVT::i64)
4401 Opcode = AArch64::CMP_SWAP_64;
4402 else
4403 llvm_unreachable("Unknown AtomicCmpSwap type");
4404
4405 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4406 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4407 N->getOperand(0)};
4408 SDNode *CmpSwap = CurDAG->getMachineNode(
4409 Opcode, SDLoc(N),
4410 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4411
4412 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4413 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4414
4415 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4416 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4417 CurDAG->RemoveDeadNode(N);
4418
4419 return true;
4420}
4421
4422bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4423 SDValue &Shift, bool Negate) {
4424 if (!isa<ConstantSDNode>(N))
4425 return false;
4426
4427 APInt Val =
4428 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4429
4430 return SelectSVEAddSubImm(SDLoc(N), Val, VT, Imm, Shift, Negate);
4431}
4432
4433bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDLoc DL, APInt Val, MVT VT,
4434 SDValue &Imm, SDValue &Shift,
4435 bool Negate) {
4436 if (Negate)
4437 Val = -Val;
4438
4439 switch (VT.SimpleTy) {
4440 case MVT::i8:
4441 // All immediates are supported.
4442 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4443 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4444 return true;
4445 case MVT::i16:
4446 case MVT::i32:
4447 case MVT::i64:
4448 // Support 8bit unsigned immediates.
4449 if ((Val & ~0xff) == 0) {
4450 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4451 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4452 return true;
4453 }
4454 // Support 16bit unsigned immediates that are a multiple of 256.
4455 if ((Val & ~0xff00) == 0) {
4456 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4457 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4458 return true;
4459 }
4460 break;
4461 default:
4462 break;
4463 }
4464
4465 return false;
4466}
4467
4468bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4469 SDValue &Imm, SDValue &Shift,
4470 bool Negate) {
4471 if (!isa<ConstantSDNode>(N))
4472 return false;
4473
4474 SDLoc DL(N);
4475 int64_t Val = cast<ConstantSDNode>(N)
4476 ->getAPIntValue()
4478 .getSExtValue();
4479
4480 if (Negate)
4481 Val = -Val;
4482
4483 // Signed saturating instructions treat their immediate operand as unsigned,
4484 // whereas the related intrinsics define their operands to be signed. This
4485 // means we can only use the immediate form when the operand is non-negative.
4486 if (Val < 0)
4487 return false;
4488
4489 switch (VT.SimpleTy) {
4490 case MVT::i8:
4491 // All positive immediates are supported.
4492 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4493 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4494 return true;
4495 case MVT::i16:
4496 case MVT::i32:
4497 case MVT::i64:
4498 // Support 8bit positive immediates.
4499 if (Val <= 255) {
4500 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4501 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4502 return true;
4503 }
4504 // Support 16bit positive immediates that are a multiple of 256.
4505 if (Val <= 65280 && Val % 256 == 0) {
4506 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4507 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4508 return true;
4509 }
4510 break;
4511 default:
4512 break;
4513 }
4514
4515 return false;
4516}
4517
4518bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4519 SDValue &Shift) {
4520 if (!isa<ConstantSDNode>(N))
4521 return false;
4522
4523 SDLoc DL(N);
4524 int64_t Val = cast<ConstantSDNode>(N)
4525 ->getAPIntValue()
4526 .trunc(VT.getFixedSizeInBits())
4527 .getSExtValue();
4528 int32_t ImmVal, ShiftVal;
4529 if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
4530 ShiftVal))
4531 return false;
4532
4533 Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
4534 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4535 return true;
4536}
4537
4538bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4539 if (auto CNode = dyn_cast<ConstantSDNode>(N))
4540 return SelectSVESignedArithImm(SDLoc(N), CNode->getAPIntValue(), Imm);
4541 return false;
4542}
4543
4544bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDLoc DL, APInt Val,
4545 SDValue &Imm) {
4546 int64_t ImmVal = Val.getSExtValue();
4547 if (ImmVal >= -128 && ImmVal < 128) {
4548 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4549 return true;
4550 }
4551 return false;
4552}
4553
4554bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4555 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4556 uint64_t ImmVal = CNode->getZExtValue();
4557
4558 switch (VT.SimpleTy) {
4559 case MVT::i8:
4560 ImmVal &= 0xFF;
4561 break;
4562 case MVT::i16:
4563 ImmVal &= 0xFFFF;
4564 break;
4565 case MVT::i32:
4566 ImmVal &= 0xFFFFFFFF;
4567 break;
4568 case MVT::i64:
4569 break;
4570 default:
4571 llvm_unreachable("Unexpected type");
4572 }
4573
4574 if (ImmVal < 256) {
4575 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4576 return true;
4577 }
4578 }
4579 return false;
4580}
4581
4582bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4583 bool Invert) {
4584 uint64_t ImmVal;
4585 if (auto CI = dyn_cast<ConstantSDNode>(N))
4586 ImmVal = CI->getZExtValue();
4587 else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
4588 ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4589 else
4590 return false;
4591
4592 if (Invert)
4593 ImmVal = ~ImmVal;
4594
4595 uint64_t encoding;
4596 if (!AArch64_AM::isSVELogicalImm(VT.getScalarSizeInBits(), ImmVal, encoding))
4597 return false;
4598
4599 Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
4600 return true;
4601}
4602
4603// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4604// Rather than attempt to normalise everything we can sometimes saturate the
4605// shift amount during selection. This function also allows for consistent
4606// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4607// required by the instructions.
4608bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4609 uint64_t High, bool AllowSaturation,
4610 SDValue &Imm) {
4611 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4612 uint64_t ImmVal = CN->getZExtValue();
4613
4614 // Reject shift amounts that are too small.
4615 if (ImmVal < Low)
4616 return false;
4617
4618 // Reject or saturate shift amounts that are too big.
4619 if (ImmVal > High) {
4620 if (!AllowSaturation)
4621 return false;
4622 ImmVal = High;
4623 }
4624
4625 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4626 return true;
4627 }
4628
4629 return false;
4630}
4631
4632bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4633 // tagp(FrameIndex, IRGstack, tag_offset):
4634 // since the offset between FrameIndex and IRGstack is a compile-time
4635 // constant, this can be lowered to a single ADDG instruction.
4636 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4637 return false;
4638 }
4639
4640 SDValue IRG_SP = N->getOperand(2);
4641 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4642 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4643 return false;
4644 }
4645
4646 const TargetLowering *TLI = getTargetLowering();
4647 SDLoc DL(N);
4648 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4649 SDValue FiOp = CurDAG->getTargetFrameIndex(
4650 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4651 int TagOffset = N->getConstantOperandVal(3);
4652
4653 SDNode *Out = CurDAG->getMachineNode(
4654 AArch64::TAGPstack, DL, MVT::i64,
4655 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4656 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4657 ReplaceNode(N, Out);
4658 return true;
4659}
4660
4661void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4662 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4663 "llvm.aarch64.tagp third argument must be an immediate");
4664 if (trySelectStackSlotTagP(N))
4665 return;
4666 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4667 // compile-time constant, not just for stack allocations.
4668
4669 // General case for unrelated pointers in Op1 and Op2.
4670 SDLoc DL(N);
4671 int TagOffset = N->getConstantOperandVal(3);
4672 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4673 {N->getOperand(1), N->getOperand(2)});
4674 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4675 {SDValue(N1, 0), N->getOperand(2)});
4676 SDNode *N3 = CurDAG->getMachineNode(
4677 AArch64::ADDG, DL, MVT::i64,
4678 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4679 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4680 ReplaceNode(N, N3);
4681}
4682
4683bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4684 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4685
4686 // Bail when not a "cast" like insert_subvector.
4687 if (N->getConstantOperandVal(2) != 0)
4688 return false;
4689 if (!N->getOperand(0).isUndef())
4690 return false;
4691
4692 // Bail when normal isel should do the job.
4693 EVT VT = N->getValueType(0);
4694 EVT InVT = N->getOperand(1).getValueType();
4695 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4696 return false;
4697 if (InVT.getSizeInBits() <= 128)
4698 return false;
4699
4700 // NOTE: We can only get here when doing fixed length SVE code generation.
4701 // We do manual selection because the types involved are not linked to real
4702 // registers (despite being legal) and must be coerced into SVE registers.
4703
4705 "Expected to insert into a packed scalable vector!");
4706
4707 SDLoc DL(N);
4708 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4709 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4710 N->getOperand(1), RC));
4711 return true;
4712}
4713
4714bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4715 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4716
4717 // Bail when not a "cast" like extract_subvector.
4718 if (N->getConstantOperandVal(1) != 0)
4719 return false;
4720
4721 // Bail when normal isel can do the job.
4722 EVT VT = N->getValueType(0);
4723 EVT InVT = N->getOperand(0).getValueType();
4724 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4725 return false;
4726 if (VT.getSizeInBits() <= 128)
4727 return false;
4728
4729 // NOTE: We can only get here when doing fixed length SVE code generation.
4730 // We do manual selection because the types involved are not linked to real
4731 // registers (despite being legal) and must be coerced into SVE registers.
4732
4734 "Expected to extract from a packed scalable vector!");
4735
4736 SDLoc DL(N);
4737 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4738 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4739 N->getOperand(0), RC));
4740 return true;
4741}
4742
4743bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4744 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4745
4746 SDValue N0 = N->getOperand(0);
4747 SDValue N1 = N->getOperand(1);
4748
4749 EVT VT = N->getValueType(0);
4750 SDLoc DL(N);
4751
4752 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4753 // Rotate by a constant is a funnel shift in IR which is expanded to
4754 // an OR with shifted operands.
4755 // We do the following transform:
4756 // OR N0, N1 -> xar (x, y, imm)
4757 // Where:
4758 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4759 // N0 = SHL_PRED true, V, splat(bits-imm)
4760 // V = (xor x, y)
4761 if (VT.isScalableVector() &&
4762 (Subtarget->hasSVE2() ||
4763 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4764 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4765 N1.getOpcode() != AArch64ISD::SRL_PRED)
4766 std::swap(N0, N1);
4767 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4768 N1.getOpcode() != AArch64ISD::SRL_PRED)
4769 return false;
4770
4771 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4772 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4773 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4774 return false;
4775
4776 if (N0.getOperand(1) != N1.getOperand(1))
4777 return false;
4778
4779 SDValue R1, R2;
4780 bool IsXOROperand = true;
4781 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4782 IsXOROperand = false;
4783 } else {
4784 R1 = N0.getOperand(1).getOperand(0);
4785 R2 = N1.getOperand(1).getOperand(1);
4786 }
4787
4788 APInt ShlAmt, ShrAmt;
4789 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4791 return false;
4792
4793 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4794 return false;
4795
4796 if (!IsXOROperand) {
4797 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4798 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4799 SDValue MOVIV = SDValue(MOV, 0);
4800
4801 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4802 SDNode *SubRegToReg =
4803 CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT, MOVIV, ZSub);
4804
4805 R1 = N1->getOperand(1);
4806 R2 = SDValue(SubRegToReg, 0);
4807 }
4808
4809 SDValue Imm =
4810 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4811
4812 SDValue Ops[] = {R1, R2, Imm};
4814 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4815 AArch64::XAR_ZZZI_D})) {
4816 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4817 return true;
4818 }
4819 return false;
4820 }
4821
4822 // We have Neon SHA3 XAR operation for v2i64 but for types
4823 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4824 // is available.
4825 EVT SVT;
4826 switch (VT.getSimpleVT().SimpleTy) {
4827 case MVT::v4i32:
4828 case MVT::v2i32:
4829 SVT = MVT::nxv4i32;
4830 break;
4831 case MVT::v8i16:
4832 case MVT::v4i16:
4833 SVT = MVT::nxv8i16;
4834 break;
4835 case MVT::v16i8:
4836 case MVT::v8i8:
4837 SVT = MVT::nxv16i8;
4838 break;
4839 case MVT::v2i64:
4840 case MVT::v1i64:
4841 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4842 break;
4843 default:
4844 return false;
4845 }
4846
4847 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4848 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4849 return false;
4850
4851 if (N0->getOpcode() != AArch64ISD::VSHL ||
4852 N1->getOpcode() != AArch64ISD::VLSHR)
4853 return false;
4854
4855 if (N0->getOperand(0) != N1->getOperand(0))
4856 return false;
4857
4858 SDValue R1, R2;
4859 bool IsXOROperand = true;
4860 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4861 IsXOROperand = false;
4862 } else {
4863 SDValue XOR = N0.getOperand(0);
4864 R1 = XOR.getOperand(0);
4865 R2 = XOR.getOperand(1);
4866 }
4867
4868 unsigned HsAmt = N0.getConstantOperandVal(1);
4869 unsigned ShAmt = N1.getConstantOperandVal(1);
4870
4871 SDValue Imm = CurDAG->getTargetConstant(
4872 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4873
4874 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4875 if (ShAmt + HsAmt != VTSizeInBits)
4876 return false;
4877
4878 if (!IsXOROperand) {
4879 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4880 SDNode *MOV =
4881 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4882 SDValue MOVIV = SDValue(MOV, 0);
4883
4884 R1 = N1->getOperand(0);
4885 R2 = MOVIV;
4886 }
4887
4888 if (SVT != VT) {
4889 SDValue Undef =
4890 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4891
4892 if (SVT.isScalableVector() && VT.is64BitVector()) {
4893 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4894
4895 SDValue UndefQ = SDValue(
4896 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4897 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4898
4899 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4900 UndefQ, R1, DSub),
4901 0);
4902 if (R2.getValueType() == VT)
4903 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4904 UndefQ, R2, DSub),
4905 0);
4906 }
4907
4908 SDValue SubReg = CurDAG->getTargetConstant(
4909 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4910
4911 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4912 R1, SubReg),
4913 0);
4914
4915 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4916 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4917 Undef, R2, SubReg),
4918 0);
4919 }
4920
4921 SDValue Ops[] = {R1, R2, Imm};
4922 SDNode *XAR = nullptr;
4923
4924 if (SVT.isScalableVector()) {
4926 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4927 AArch64::XAR_ZZZI_D}))
4928 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4929 } else {
4930 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4931 }
4932
4933 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4934
4935 if (SVT != VT) {
4936 if (VT.is64BitVector() && SVT.isScalableVector()) {
4937 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4938
4939 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4940 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4941 SDValue(XAR, 0), ZSub);
4942
4943 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4944 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4945 SDValue(Q, 0), DSub);
4946 } else {
4947 SDValue SubReg = CurDAG->getTargetConstant(
4948 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4949 MVT::i32);
4950 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4951 SDValue(XAR, 0), SubReg);
4952 }
4953 }
4954 ReplaceNode(N, XAR);
4955 return true;
4956}
4957
4958void AArch64DAGToDAGISel::Select(SDNode *Node) {
4959 // If we have a custom node, we already have selected!
4960 if (Node->isMachineOpcode()) {
4961 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4962 Node->setNodeId(-1);
4963 return;
4964 }
4965
4966 // Few custom selection stuff.
4967 EVT VT = Node->getValueType(0);
4968
4969 switch (Node->getOpcode()) {
4970 default:
4971 break;
4972
4974 if (SelectCMP_SWAP(Node))
4975 return;
4976 break;
4977
4978 case ISD::READ_REGISTER:
4979 case AArch64ISD::MRRS:
4980 if (tryReadRegister(Node))
4981 return;
4982 break;
4983
4985 case AArch64ISD::MSRR:
4986 if (tryWriteRegister(Node))
4987 return;
4988 break;
4989
4990 case ISD::LOAD: {
4991 // Try to select as an indexed load. Fall through to normal processing
4992 // if we can't.
4993 if (tryIndexedLoad(Node))
4994 return;
4995 break;
4996 }
4997
4998 case ISD::SRL:
4999 case ISD::AND:
5000 case ISD::SRA:
5002 if (tryBitfieldExtractOp(Node))
5003 return;
5004 if (tryBitfieldInsertInZeroOp(Node))
5005 return;
5006 [[fallthrough]];
5007 case ISD::ROTR:
5008 case ISD::SHL:
5009 if (tryShiftAmountMod(Node))
5010 return;
5011 break;
5012
5013 case ISD::SIGN_EXTEND:
5014 if (tryBitfieldExtractOpFromSExt(Node))
5015 return;
5016 break;
5017
5018 case ISD::OR:
5019 if (tryBitfieldInsertOp(Node))
5020 return;
5021 if (trySelectXAR(Node))
5022 return;
5023 break;
5024
5026 if (trySelectCastScalableToFixedLengthVector(Node))
5027 return;
5028 break;
5029 }
5030
5031 case ISD::INSERT_SUBVECTOR: {
5032 if (trySelectCastFixedLengthToScalableVector(Node))
5033 return;
5034 break;
5035 }
5036
5037 case ISD::Constant: {
5038 // Materialize zero constants as copies from WZR/XZR. This allows
5039 // the coalescer to propagate these into other instructions.
5040 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
5041 if (ConstNode->isZero()) {
5042 if (VT == MVT::i32) {
5043 SDValue New = CurDAG->getCopyFromReg(
5044 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
5045 ReplaceNode(Node, New.getNode());
5046 return;
5047 } else if (VT == MVT::i64) {
5048 SDValue New = CurDAG->getCopyFromReg(
5049 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
5050 ReplaceNode(Node, New.getNode());
5051 return;
5052 }
5053 }
5054 break;
5055 }
5056
5057 case ISD::FrameIndex: {
5058 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
5059 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
5060 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
5061 const TargetLowering *TLI = getTargetLowering();
5062 SDValue TFI = CurDAG->getTargetFrameIndex(
5063 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
5064 SDLoc DL(Node);
5065 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
5066 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
5067 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
5068 return;
5069 }
5071 unsigned IntNo = Node->getConstantOperandVal(1);
5072 switch (IntNo) {
5073 default:
5074 break;
5075 case Intrinsic::aarch64_gcsss: {
5076 SDLoc DL(Node);
5077 SDValue Chain = Node->getOperand(0);
5078 SDValue Val = Node->getOperand(2);
5079 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
5080 SDNode *SS1 =
5081 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
5082 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
5083 MVT::Other, Zero, SDValue(SS1, 0));
5084 ReplaceNode(Node, SS2);
5085 return;
5086 }
5087 case Intrinsic::aarch64_ldaxp:
5088 case Intrinsic::aarch64_ldxp: {
5089 unsigned Op =
5090 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
5091 SDValue MemAddr = Node->getOperand(2);
5092 SDLoc DL(Node);
5093 SDValue Chain = Node->getOperand(0);
5094
5095 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
5096 MVT::Other, MemAddr, Chain);
5097
5098 // Transfer memoperands.
5099 MachineMemOperand *MemOp =
5100 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
5101 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
5102 ReplaceNode(Node, Ld);
5103 return;
5104 }
5105 case Intrinsic::aarch64_stlxp:
5106 case Intrinsic::aarch64_stxp: {
5107 unsigned Op =
5108 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
5109 SDLoc DL(Node);
5110 SDValue Chain = Node->getOperand(0);
5111 SDValue ValLo = Node->getOperand(2);
5112 SDValue ValHi = Node->getOperand(3);
5113 SDValue MemAddr = Node->getOperand(4);
5114
5115 // Place arguments in the right order.
5116 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
5117
5118 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
5119 // Transfer memoperands.
5120 MachineMemOperand *MemOp =
5121 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
5122 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
5123
5124 ReplaceNode(Node, St);
5125 return;
5126 }
5127 case Intrinsic::aarch64_neon_ld1x2:
5128 if (VT == MVT::v8i8) {
5129 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
5130 return;
5131 } else if (VT == MVT::v16i8) {
5132 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
5133 return;
5134 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5135 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
5136 return;
5137 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5138 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
5139 return;
5140 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5141 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
5142 return;
5143 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5144 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
5145 return;
5146 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5147 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5148 return;
5149 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5150 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
5151 return;
5152 }
5153 break;
5154 case Intrinsic::aarch64_neon_ld1x3:
5155 if (VT == MVT::v8i8) {
5156 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
5157 return;
5158 } else if (VT == MVT::v16i8) {
5159 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
5160 return;
5161 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5162 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5163 return;
5164 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5165 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5166 return;
5167 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5168 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5169 return;
5170 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5171 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5172 return;
5173 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5174 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5175 return;
5176 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5177 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5178 return;
5179 }
5180 break;
5181 case Intrinsic::aarch64_neon_ld1x4:
5182 if (VT == MVT::v8i8) {
5183 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5184 return;
5185 } else if (VT == MVT::v16i8) {
5186 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5187 return;
5188 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5189 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5190 return;
5191 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5192 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5193 return;
5194 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5195 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5196 return;
5197 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5198 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5199 return;
5200 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5201 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5202 return;
5203 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5204 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5205 return;
5206 }
5207 break;
5208 case Intrinsic::aarch64_neon_ld2:
5209 if (VT == MVT::v8i8) {
5210 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5211 return;
5212 } else if (VT == MVT::v16i8) {
5213 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5214 return;
5215 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5216 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5217 return;
5218 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5219 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5220 return;
5221 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5222 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5223 return;
5224 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5225 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5226 return;
5227 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5228 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5229 return;
5230 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5231 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5232 return;
5233 }
5234 break;
5235 case Intrinsic::aarch64_neon_ld3:
5236 if (VT == MVT::v8i8) {
5237 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5238 return;
5239 } else if (VT == MVT::v16i8) {
5240 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5241 return;
5242 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5243 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5244 return;
5245 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5246 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5247 return;
5248 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5249 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5250 return;
5251 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5252 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5253 return;
5254 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5255 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5256 return;
5257 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5258 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5259 return;
5260 }
5261 break;
5262 case Intrinsic::aarch64_neon_ld4:
5263 if (VT == MVT::v8i8) {
5264 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5265 return;
5266 } else if (VT == MVT::v16i8) {
5267 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5268 return;
5269 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5270 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5271 return;
5272 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5273 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5274 return;
5275 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5276 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5277 return;
5278 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5279 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5280 return;
5281 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5282 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5283 return;
5284 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5285 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5286 return;
5287 }
5288 break;
5289 case Intrinsic::aarch64_neon_ld2r:
5290 if (VT == MVT::v8i8) {
5291 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5292 return;
5293 } else if (VT == MVT::v16i8) {
5294 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5295 return;
5296 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5297 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5298 return;
5299 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5300 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5301 return;
5302 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5303 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5304 return;
5305 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5306 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5307 return;
5308 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5309 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5310 return;
5311 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5312 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5313 return;
5314 }
5315 break;
5316 case Intrinsic::aarch64_neon_ld3r:
5317 if (VT == MVT::v8i8) {
5318 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5319 return;
5320 } else if (VT == MVT::v16i8) {
5321 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5322 return;
5323 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5324 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5325 return;
5326 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5327 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5328 return;
5329 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5330 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5331 return;
5332 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5333 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5334 return;
5335 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5336 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5337 return;
5338 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5339 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5340 return;
5341 }
5342 break;
5343 case Intrinsic::aarch64_neon_ld4r:
5344 if (VT == MVT::v8i8) {
5345 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5346 return;
5347 } else if (VT == MVT::v16i8) {
5348 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5349 return;
5350 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5351 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5352 return;
5353 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5354 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5355 return;
5356 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5357 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5358 return;
5359 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5360 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5361 return;
5362 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5363 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5364 return;
5365 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5366 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5367 return;
5368 }
5369 break;
5370 case Intrinsic::aarch64_neon_ld2lane:
5371 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5372 SelectLoadLane(Node, 2, AArch64::LD2i8);
5373 return;
5374 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5375 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5376 SelectLoadLane(Node, 2, AArch64::LD2i16);
5377 return;
5378 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5379 VT == MVT::v2f32) {
5380 SelectLoadLane(Node, 2, AArch64::LD2i32);
5381 return;
5382 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5383 VT == MVT::v1f64) {
5384 SelectLoadLane(Node, 2, AArch64::LD2i64);
5385 return;
5386 }
5387 break;
5388 case Intrinsic::aarch64_neon_ld3lane:
5389 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5390 SelectLoadLane(Node, 3, AArch64::LD3i8);
5391 return;
5392 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5393 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5394 SelectLoadLane(Node, 3, AArch64::LD3i16);
5395 return;
5396 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5397 VT == MVT::v2f32) {
5398 SelectLoadLane(Node, 3, AArch64::LD3i32);
5399 return;
5400 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5401 VT == MVT::v1f64) {
5402 SelectLoadLane(Node, 3, AArch64::LD3i64);
5403 return;
5404 }
5405 break;
5406 case Intrinsic::aarch64_neon_ld4lane:
5407 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5408 SelectLoadLane(Node, 4, AArch64::LD4i8);
5409 return;
5410 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5411 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5412 SelectLoadLane(Node, 4, AArch64::LD4i16);
5413 return;
5414 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5415 VT == MVT::v2f32) {
5416 SelectLoadLane(Node, 4, AArch64::LD4i32);
5417 return;
5418 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5419 VT == MVT::v1f64) {
5420 SelectLoadLane(Node, 4, AArch64::LD4i64);
5421 return;
5422 }
5423 break;
5424 case Intrinsic::aarch64_ld64b:
5425 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5426 return;
5427 case Intrinsic::aarch64_sve_ld2q_sret: {
5428 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5429 return;
5430 }
5431 case Intrinsic::aarch64_sve_ld3q_sret: {
5432 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5433 return;
5434 }
5435 case Intrinsic::aarch64_sve_ld4q_sret: {
5436 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5437 return;
5438 }
5439 case Intrinsic::aarch64_sve_ld2_sret: {
5440 if (VT == MVT::nxv16i8) {
5441 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5442 true);
5443 return;
5444 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5445 VT == MVT::nxv8bf16) {
5446 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5447 true);
5448 return;
5449 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5450 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5451 true);
5452 return;
5453 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5454 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5455 true);
5456 return;
5457 }
5458 break;
5459 }
5460 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5461 if (VT == MVT::nxv16i8) {
5462 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5463 SelectContiguousMultiVectorLoad(
5464 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5465 else if (Subtarget->hasSVE2p1())
5466 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5467 AArch64::LD1B_2Z);
5468 else
5469 break;
5470 return;
5471 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5472 VT == MVT::nxv8bf16) {
5473 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5474 SelectContiguousMultiVectorLoad(
5475 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5476 else if (Subtarget->hasSVE2p1())
5477 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5478 AArch64::LD1H_2Z);
5479 else
5480 break;
5481 return;
5482 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5483 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5484 SelectContiguousMultiVectorLoad(
5485 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5486 else if (Subtarget->hasSVE2p1())
5487 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5488 AArch64::LD1W_2Z);
5489 else
5490 break;
5491 return;
5492 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5493 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5494 SelectContiguousMultiVectorLoad(
5495 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5496 else if (Subtarget->hasSVE2p1())
5497 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5498 AArch64::LD1D_2Z);
5499 else
5500 break;
5501 return;
5502 }
5503 break;
5504 }
5505 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5506 if (VT == MVT::nxv16i8) {
5507 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5508 SelectContiguousMultiVectorLoad(
5509 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5510 else if (Subtarget->hasSVE2p1())
5511 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5512 AArch64::LD1B_4Z);
5513 else
5514 break;
5515 return;
5516 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5517 VT == MVT::nxv8bf16) {
5518 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5519 SelectContiguousMultiVectorLoad(
5520 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5521 else if (Subtarget->hasSVE2p1())
5522 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5523 AArch64::LD1H_4Z);
5524 else
5525 break;
5526 return;
5527 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5528 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5529 SelectContiguousMultiVectorLoad(
5530 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5531 else if (Subtarget->hasSVE2p1())
5532 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5533 AArch64::LD1W_4Z);
5534 else
5535 break;
5536 return;
5537 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5538 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5539 SelectContiguousMultiVectorLoad(
5540 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5541 else if (Subtarget->hasSVE2p1())
5542 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5543 AArch64::LD1D_4Z);
5544 else
5545 break;
5546 return;
5547 }
5548 break;
5549 }
5550 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5551 if (VT == MVT::nxv16i8) {
5552 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5553 SelectContiguousMultiVectorLoad(Node, 2, 0,
5554 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5555 AArch64::LDNT1B_2Z_PSEUDO);
5556 else if (Subtarget->hasSVE2p1())
5557 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5558 AArch64::LDNT1B_2Z);
5559 else
5560 break;
5561 return;
5562 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5563 VT == MVT::nxv8bf16) {
5564 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5565 SelectContiguousMultiVectorLoad(Node, 2, 1,
5566 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5567 AArch64::LDNT1H_2Z_PSEUDO);
5568 else if (Subtarget->hasSVE2p1())
5569 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5570 AArch64::LDNT1H_2Z);
5571 else
5572 break;
5573 return;
5574 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5575 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5576 SelectContiguousMultiVectorLoad(Node, 2, 2,
5577 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5578 AArch64::LDNT1W_2Z_PSEUDO);
5579 else if (Subtarget->hasSVE2p1())
5580 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5581 AArch64::LDNT1W_2Z);
5582 else
5583 break;
5584 return;
5585 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5586 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5587 SelectContiguousMultiVectorLoad(Node, 2, 3,
5588 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5589 AArch64::LDNT1D_2Z_PSEUDO);
5590 else if (Subtarget->hasSVE2p1())
5591 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5592 AArch64::LDNT1D_2Z);
5593 else
5594 break;
5595 return;
5596 }
5597 break;
5598 }
5599 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5600 if (VT == MVT::nxv16i8) {
5601 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5602 SelectContiguousMultiVectorLoad(Node, 4, 0,
5603 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5604 AArch64::LDNT1B_4Z_PSEUDO);
5605 else if (Subtarget->hasSVE2p1())
5606 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5607 AArch64::LDNT1B_4Z);
5608 else
5609 break;
5610 return;
5611 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5612 VT == MVT::nxv8bf16) {
5613 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5614 SelectContiguousMultiVectorLoad(Node, 4, 1,
5615 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5616 AArch64::LDNT1H_4Z_PSEUDO);
5617 else if (Subtarget->hasSVE2p1())
5618 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5619 AArch64::LDNT1H_4Z);
5620 else
5621 break;
5622 return;
5623 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5624 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5625 SelectContiguousMultiVectorLoad(Node, 4, 2,
5626 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5627 AArch64::LDNT1W_4Z_PSEUDO);
5628 else if (Subtarget->hasSVE2p1())
5629 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5630 AArch64::LDNT1W_4Z);
5631 else
5632 break;
5633 return;
5634 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5635 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5636 SelectContiguousMultiVectorLoad(Node, 4, 3,
5637 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5638 AArch64::LDNT1D_4Z_PSEUDO);
5639 else if (Subtarget->hasSVE2p1())
5640 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5641 AArch64::LDNT1D_4Z);
5642 else
5643 break;
5644 return;
5645 }
5646 break;
5647 }
5648 case Intrinsic::aarch64_sve_ld3_sret: {
5649 if (VT == MVT::nxv16i8) {
5650 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5651 true);
5652 return;
5653 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5654 VT == MVT::nxv8bf16) {
5655 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5656 true);
5657 return;
5658 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5659 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5660 true);
5661 return;
5662 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5663 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5664 true);
5665 return;
5666 }
5667 break;
5668 }
5669 case Intrinsic::aarch64_sve_ld4_sret: {
5670 if (VT == MVT::nxv16i8) {
5671 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5672 true);
5673 return;
5674 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5675 VT == MVT::nxv8bf16) {
5676 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5677 true);
5678 return;
5679 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5680 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5681 true);
5682 return;
5683 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5684 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5685 true);
5686 return;
5687 }
5688 break;
5689 }
5690 case Intrinsic::aarch64_sme_read_hor_vg2: {
5691 if (VT == MVT::nxv16i8) {
5692 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5693 AArch64::MOVA_2ZMXI_H_B);
5694 return;
5695 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5696 VT == MVT::nxv8bf16) {
5697 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5698 AArch64::MOVA_2ZMXI_H_H);
5699 return;
5700 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5701 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5702 AArch64::MOVA_2ZMXI_H_S);
5703 return;
5704 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5705 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5706 AArch64::MOVA_2ZMXI_H_D);
5707 return;
5708 }
5709 break;
5710 }
5711 case Intrinsic::aarch64_sme_read_ver_vg2: {
5712 if (VT == MVT::nxv16i8) {
5713 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5714 AArch64::MOVA_2ZMXI_V_B);
5715 return;
5716 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5717 VT == MVT::nxv8bf16) {
5718 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5719 AArch64::MOVA_2ZMXI_V_H);
5720 return;
5721 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5722 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5723 AArch64::MOVA_2ZMXI_V_S);
5724 return;
5725 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5726 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5727 AArch64::MOVA_2ZMXI_V_D);
5728 return;
5729 }
5730 break;
5731 }
5732 case Intrinsic::aarch64_sme_read_hor_vg4: {
5733 if (VT == MVT::nxv16i8) {
5734 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5735 AArch64::MOVA_4ZMXI_H_B);
5736 return;
5737 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5738 VT == MVT::nxv8bf16) {
5739 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5740 AArch64::MOVA_4ZMXI_H_H);
5741 return;
5742 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5743 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5744 AArch64::MOVA_4ZMXI_H_S);
5745 return;
5746 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5747 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5748 AArch64::MOVA_4ZMXI_H_D);
5749 return;
5750 }
5751 break;
5752 }
5753 case Intrinsic::aarch64_sme_read_ver_vg4: {
5754 if (VT == MVT::nxv16i8) {
5755 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5756 AArch64::MOVA_4ZMXI_V_B);
5757 return;
5758 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5759 VT == MVT::nxv8bf16) {
5760 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5761 AArch64::MOVA_4ZMXI_V_H);
5762 return;
5763 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5764 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5765 AArch64::MOVA_4ZMXI_V_S);
5766 return;
5767 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5768 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5769 AArch64::MOVA_4ZMXI_V_D);
5770 return;
5771 }
5772 break;
5773 }
5774 case Intrinsic::aarch64_sme_read_vg1x2: {
5775 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5776 AArch64::MOVA_VG2_2ZMXI);
5777 return;
5778 }
5779 case Intrinsic::aarch64_sme_read_vg1x4: {
5780 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5781 AArch64::MOVA_VG4_4ZMXI);
5782 return;
5783 }
5784 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5785 if (VT == MVT::nxv16i8) {
5786 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5787 return;
5788 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5789 VT == MVT::nxv8bf16) {
5790 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5791 return;
5792 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5793 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5794 return;
5795 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5796 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5797 return;
5798 }
5799 break;
5800 }
5801 case Intrinsic::aarch64_sme_readz_vert_x2: {
5802 if (VT == MVT::nxv16i8) {
5803 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5804 return;
5805 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5806 VT == MVT::nxv8bf16) {
5807 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5808 return;
5809 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5810 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5811 return;
5812 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5813 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5814 return;
5815 }
5816 break;
5817 }
5818 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5819 if (VT == MVT::nxv16i8) {
5820 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5821 return;
5822 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5823 VT == MVT::nxv8bf16) {
5824 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5825 return;
5826 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5827 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5828 return;
5829 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5830 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5831 return;
5832 }
5833 break;
5834 }
5835 case Intrinsic::aarch64_sme_readz_vert_x4: {
5836 if (VT == MVT::nxv16i8) {
5837 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5838 return;
5839 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5840 VT == MVT::nxv8bf16) {
5841 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5842 return;
5843 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5844 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5845 return;
5846 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5847 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5848 return;
5849 }
5850 break;
5851 }
5852 case Intrinsic::aarch64_sme_readz_x2: {
5853 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5854 AArch64::ZA);
5855 return;
5856 }
5857 case Intrinsic::aarch64_sme_readz_x4: {
5858 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5859 AArch64::ZA);
5860 return;
5861 }
5862 case Intrinsic::swift_async_context_addr: {
5863 SDLoc DL(Node);
5864 SDValue Chain = Node->getOperand(0);
5865 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5866 SDValue Res = SDValue(
5867 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5868 CurDAG->getTargetConstant(8, DL, MVT::i32),
5869 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5870 0);
5871 ReplaceUses(SDValue(Node, 0), Res);
5872 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5873 CurDAG->RemoveDeadNode(Node);
5874
5875 auto &MF = CurDAG->getMachineFunction();
5876 MF.getFrameInfo().setFrameAddressIsTaken(true);
5877 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5878 return;
5879 }
5880 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5882 Node->getValueType(0),
5883 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5884 AArch64::LUTI2_4ZTZI_S}))
5885 // Second Immediate must be <= 3:
5886 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5887 return;
5888 }
5889 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5891 Node->getValueType(0),
5892 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5893 // Second Immediate must be <= 1:
5894 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5895 return;
5896 }
5897 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5899 Node->getValueType(0),
5900 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5901 AArch64::LUTI2_2ZTZI_S}))
5902 // Second Immediate must be <= 7:
5903 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5904 return;
5905 }
5906 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5908 Node->getValueType(0),
5909 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5910 AArch64::LUTI4_2ZTZI_S}))
5911 // Second Immediate must be <= 3:
5912 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5913 return;
5914 }
5915 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5916 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5917 return;
5918 }
5919 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5921 Node->getValueType(0),
5922 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5923 SelectCVTIntrinsicFP8(Node, 2, Opc);
5924 return;
5925 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5927 Node->getValueType(0),
5928 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5929 SelectCVTIntrinsicFP8(Node, 2, Opc);
5930 return;
5931 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5933 Node->getValueType(0),
5934 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5935 SelectCVTIntrinsicFP8(Node, 2, Opc);
5936 return;
5937 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5939 Node->getValueType(0),
5940 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5941 SelectCVTIntrinsicFP8(Node, 2, Opc);
5942 return;
5943 case Intrinsic::ptrauth_resign_load_relative:
5944 SelectPtrauthResign(Node);
5945 return;
5946 }
5947 } break;
5949 unsigned IntNo = Node->getConstantOperandVal(0);
5950 switch (IntNo) {
5951 default:
5952 break;
5953 case Intrinsic::aarch64_tagp:
5954 SelectTagP(Node);
5955 return;
5956
5957 case Intrinsic::ptrauth_auth:
5958 SelectPtrauthAuth(Node);
5959 return;
5960
5961 case Intrinsic::ptrauth_resign:
5962 SelectPtrauthResign(Node);
5963 return;
5964
5965 case Intrinsic::aarch64_neon_tbl2:
5966 SelectTable(Node, 2,
5967 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5968 false);
5969 return;
5970 case Intrinsic::aarch64_neon_tbl3:
5971 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5972 : AArch64::TBLv16i8Three,
5973 false);
5974 return;
5975 case Intrinsic::aarch64_neon_tbl4:
5976 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5977 : AArch64::TBLv16i8Four,
5978 false);
5979 return;
5980 case Intrinsic::aarch64_neon_tbx2:
5981 SelectTable(Node, 2,
5982 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5983 true);
5984 return;
5985 case Intrinsic::aarch64_neon_tbx3:
5986 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5987 : AArch64::TBXv16i8Three,
5988 true);
5989 return;
5990 case Intrinsic::aarch64_neon_tbx4:
5991 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5992 : AArch64::TBXv16i8Four,
5993 true);
5994 return;
5995 case Intrinsic::aarch64_sve_srshl_single_x2:
5997 Node->getValueType(0),
5998 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5999 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
6000 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6001 return;
6002 case Intrinsic::aarch64_sve_srshl_single_x4:
6004 Node->getValueType(0),
6005 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
6006 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
6007 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6008 return;
6009 case Intrinsic::aarch64_sve_urshl_single_x2:
6011 Node->getValueType(0),
6012 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
6013 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
6014 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6015 return;
6016 case Intrinsic::aarch64_sve_urshl_single_x4:
6018 Node->getValueType(0),
6019 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
6020 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
6021 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6022 return;
6023 case Intrinsic::aarch64_sve_srshl_x2:
6025 Node->getValueType(0),
6026 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
6027 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
6028 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6029 return;
6030 case Intrinsic::aarch64_sve_srshl_x4:
6032 Node->getValueType(0),
6033 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
6034 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
6035 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6036 return;
6037 case Intrinsic::aarch64_sve_urshl_x2:
6039 Node->getValueType(0),
6040 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
6041 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
6042 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6043 return;
6044 case Intrinsic::aarch64_sve_urshl_x4:
6046 Node->getValueType(0),
6047 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
6048 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
6049 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6050 return;
6051 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
6053 Node->getValueType(0),
6054 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
6055 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
6056 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6057 return;
6058 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
6060 Node->getValueType(0),
6061 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
6062 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
6063 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6064 return;
6065 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
6067 Node->getValueType(0),
6068 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
6069 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
6070 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6071 return;
6072 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
6074 Node->getValueType(0),
6075 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
6076 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
6077 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6078 return;
6079 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
6081 Node->getValueType(0),
6082 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
6083 AArch64::FSCALE_2ZZ_D}))
6084 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6085 return;
6086 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
6088 Node->getValueType(0),
6089 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
6090 AArch64::FSCALE_4ZZ_D}))
6091 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6092 return;
6093 case Intrinsic::aarch64_sme_fp8_scale_x2:
6095 Node->getValueType(0),
6096 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
6097 AArch64::FSCALE_2Z2Z_D}))
6098 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6099 return;
6100 case Intrinsic::aarch64_sme_fp8_scale_x4:
6102 Node->getValueType(0),
6103 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
6104 AArch64::FSCALE_4Z4Z_D}))
6105 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6106 return;
6107 case Intrinsic::aarch64_sve_whilege_x2:
6109 Node->getValueType(0),
6110 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
6111 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
6112 SelectWhilePair(Node, Op);
6113 return;
6114 case Intrinsic::aarch64_sve_whilegt_x2:
6116 Node->getValueType(0),
6117 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
6118 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
6119 SelectWhilePair(Node, Op);
6120 return;
6121 case Intrinsic::aarch64_sve_whilehi_x2:
6123 Node->getValueType(0),
6124 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
6125 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
6126 SelectWhilePair(Node, Op);
6127 return;
6128 case Intrinsic::aarch64_sve_whilehs_x2:
6130 Node->getValueType(0),
6131 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
6132 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
6133 SelectWhilePair(Node, Op);
6134 return;
6135 case Intrinsic::aarch64_sve_whilele_x2:
6137 Node->getValueType(0),
6138 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
6139 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
6140 SelectWhilePair(Node, Op);
6141 return;
6142 case Intrinsic::aarch64_sve_whilelo_x2:
6144 Node->getValueType(0),
6145 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
6146 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
6147 SelectWhilePair(Node, Op);
6148 return;
6149 case Intrinsic::aarch64_sve_whilels_x2:
6151 Node->getValueType(0),
6152 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
6153 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
6154 SelectWhilePair(Node, Op);
6155 return;
6156 case Intrinsic::aarch64_sve_whilelt_x2:
6158 Node->getValueType(0),
6159 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
6160 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
6161 SelectWhilePair(Node, Op);
6162 return;
6163 case Intrinsic::aarch64_sve_smax_single_x2:
6165 Node->getValueType(0),
6166 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6167 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6168 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6169 return;
6170 case Intrinsic::aarch64_sve_umax_single_x2:
6172 Node->getValueType(0),
6173 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6174 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6175 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6176 return;
6177 case Intrinsic::aarch64_sve_fmax_single_x2:
6179 Node->getValueType(0),
6180 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6181 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6182 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6183 return;
6184 case Intrinsic::aarch64_sve_smax_single_x4:
6186 Node->getValueType(0),
6187 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6188 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6189 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6190 return;
6191 case Intrinsic::aarch64_sve_umax_single_x4:
6193 Node->getValueType(0),
6194 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6195 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6196 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6197 return;
6198 case Intrinsic::aarch64_sve_fmax_single_x4:
6200 Node->getValueType(0),
6201 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6202 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6203 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6204 return;
6205 case Intrinsic::aarch64_sve_smin_single_x2:
6207 Node->getValueType(0),
6208 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6209 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6210 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6211 return;
6212 case Intrinsic::aarch64_sve_umin_single_x2:
6214 Node->getValueType(0),
6215 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6216 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6217 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6218 return;
6219 case Intrinsic::aarch64_sve_fmin_single_x2:
6221 Node->getValueType(0),
6222 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6223 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6224 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6225 return;
6226 case Intrinsic::aarch64_sve_smin_single_x4:
6228 Node->getValueType(0),
6229 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6230 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6231 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6232 return;
6233 case Intrinsic::aarch64_sve_umin_single_x4:
6235 Node->getValueType(0),
6236 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6237 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6238 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6239 return;
6240 case Intrinsic::aarch64_sve_fmin_single_x4:
6242 Node->getValueType(0),
6243 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6244 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6245 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6246 return;
6247 case Intrinsic::aarch64_sve_smax_x2:
6249 Node->getValueType(0),
6250 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6251 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6252 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6253 return;
6254 case Intrinsic::aarch64_sve_umax_x2:
6256 Node->getValueType(0),
6257 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6258 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6259 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6260 return;
6261 case Intrinsic::aarch64_sve_fmax_x2:
6263 Node->getValueType(0),
6264 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6265 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6266 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6267 return;
6268 case Intrinsic::aarch64_sve_smax_x4:
6270 Node->getValueType(0),
6271 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6272 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6273 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6274 return;
6275 case Intrinsic::aarch64_sve_umax_x4:
6277 Node->getValueType(0),
6278 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6279 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6280 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6281 return;
6282 case Intrinsic::aarch64_sve_fmax_x4:
6284 Node->getValueType(0),
6285 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6286 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6287 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6288 return;
6289 case Intrinsic::aarch64_sme_famax_x2:
6291 Node->getValueType(0),
6292 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6293 AArch64::FAMAX_2Z2Z_D}))
6294 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6295 return;
6296 case Intrinsic::aarch64_sme_famax_x4:
6298 Node->getValueType(0),
6299 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6300 AArch64::FAMAX_4Z4Z_D}))
6301 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6302 return;
6303 case Intrinsic::aarch64_sme_famin_x2:
6305 Node->getValueType(0),
6306 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6307 AArch64::FAMIN_2Z2Z_D}))
6308 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6309 return;
6310 case Intrinsic::aarch64_sme_famin_x4:
6312 Node->getValueType(0),
6313 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6314 AArch64::FAMIN_4Z4Z_D}))
6315 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6316 return;
6317 case Intrinsic::aarch64_sve_smin_x2:
6319 Node->getValueType(0),
6320 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6321 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6322 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6323 return;
6324 case Intrinsic::aarch64_sve_umin_x2:
6326 Node->getValueType(0),
6327 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6328 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6329 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6330 return;
6331 case Intrinsic::aarch64_sve_fmin_x2:
6333 Node->getValueType(0),
6334 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6335 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6336 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6337 return;
6338 case Intrinsic::aarch64_sve_smin_x4:
6340 Node->getValueType(0),
6341 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6342 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6343 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6344 return;
6345 case Intrinsic::aarch64_sve_umin_x4:
6347 Node->getValueType(0),
6348 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6349 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6350 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6351 return;
6352 case Intrinsic::aarch64_sve_fmin_x4:
6354 Node->getValueType(0),
6355 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6356 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6357 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6358 return;
6359 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6361 Node->getValueType(0),
6362 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6363 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6364 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6365 return;
6366 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6368 Node->getValueType(0),
6369 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6370 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6371 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6372 return;
6373 case Intrinsic::aarch64_sve_fminnm_single_x2:
6375 Node->getValueType(0),
6376 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6377 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6378 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6379 return;
6380 case Intrinsic::aarch64_sve_fminnm_single_x4:
6382 Node->getValueType(0),
6383 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6384 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6385 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6386 return;
6387 case Intrinsic::aarch64_sve_fscale_single_x4:
6388 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::BFSCALE_4ZZ);
6389 return;
6390 case Intrinsic::aarch64_sve_fscale_single_x2:
6391 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::BFSCALE_2ZZ);
6392 return;
6393 case Intrinsic::aarch64_sve_fmul_single_x4:
6395 Node->getValueType(0),
6396 {AArch64::BFMUL_4ZZ, AArch64::FMUL_4ZZ_H, AArch64::FMUL_4ZZ_S,
6397 AArch64::FMUL_4ZZ_D}))
6398 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6399 return;
6400 case Intrinsic::aarch64_sve_fmul_single_x2:
6402 Node->getValueType(0),
6403 {AArch64::BFMUL_2ZZ, AArch64::FMUL_2ZZ_H, AArch64::FMUL_2ZZ_S,
6404 AArch64::FMUL_2ZZ_D}))
6405 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6406 return;
6407 case Intrinsic::aarch64_sve_fmaxnm_x2:
6409 Node->getValueType(0),
6410 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6411 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6412 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6413 return;
6414 case Intrinsic::aarch64_sve_fmaxnm_x4:
6416 Node->getValueType(0),
6417 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6418 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6419 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6420 return;
6421 case Intrinsic::aarch64_sve_fminnm_x2:
6423 Node->getValueType(0),
6424 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6425 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6426 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6427 return;
6428 case Intrinsic::aarch64_sve_fminnm_x4:
6430 Node->getValueType(0),
6431 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6432 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6433 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6434 return;
6435 case Intrinsic::aarch64_sve_aese_lane_x2:
6436 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESE_2ZZI_B);
6437 return;
6438 case Intrinsic::aarch64_sve_aesd_lane_x2:
6439 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESD_2ZZI_B);
6440 return;
6441 case Intrinsic::aarch64_sve_aesemc_lane_x2:
6442 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESEMC_2ZZI_B);
6443 return;
6444 case Intrinsic::aarch64_sve_aesdimc_lane_x2:
6445 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESDIMC_2ZZI_B);
6446 return;
6447 case Intrinsic::aarch64_sve_aese_lane_x4:
6448 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESE_4ZZI_B);
6449 return;
6450 case Intrinsic::aarch64_sve_aesd_lane_x4:
6451 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESD_4ZZI_B);
6452 return;
6453 case Intrinsic::aarch64_sve_aesemc_lane_x4:
6454 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESEMC_4ZZI_B);
6455 return;
6456 case Intrinsic::aarch64_sve_aesdimc_lane_x4:
6457 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESDIMC_4ZZI_B);
6458 return;
6459 case Intrinsic::aarch64_sve_pmlal_pair_x2:
6460 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::PMLAL_2ZZZ_Q);
6461 return;
6462 case Intrinsic::aarch64_sve_pmull_pair_x2: {
6463 SDLoc DL(Node);
6464 SmallVector<SDValue, 4> Regs(Node->ops().slice(1, 2));
6465 SDNode *Res =
6466 CurDAG->getMachineNode(AArch64::PMULL_2ZZZ_Q, DL, MVT::Untyped, Regs);
6467 SDValue SuperReg = SDValue(Res, 0);
6468 for (unsigned I = 0; I < 2; I++)
6469 ReplaceUses(SDValue(Node, I),
6470 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
6471 SuperReg));
6472 CurDAG->RemoveDeadNode(Node);
6473 return;
6474 }
6475 case Intrinsic::aarch64_sve_fscale_x4:
6476 SelectDestructiveMultiIntrinsic(Node, 4, true, AArch64::BFSCALE_4Z4Z);
6477 return;
6478 case Intrinsic::aarch64_sve_fscale_x2:
6479 SelectDestructiveMultiIntrinsic(Node, 2, true, AArch64::BFSCALE_2Z2Z);
6480 return;
6481 case Intrinsic::aarch64_sve_fmul_x4:
6483 Node->getValueType(0),
6484 {AArch64::BFMUL_4Z4Z, AArch64::FMUL_4Z4Z_H, AArch64::FMUL_4Z4Z_S,
6485 AArch64::FMUL_4Z4Z_D}))
6486 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6487 return;
6488 case Intrinsic::aarch64_sve_fmul_x2:
6490 Node->getValueType(0),
6491 {AArch64::BFMUL_2Z2Z, AArch64::FMUL_2Z2Z_H, AArch64::FMUL_2Z2Z_S,
6492 AArch64::FMUL_2Z2Z_D}))
6493 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6494 return;
6495 case Intrinsic::aarch64_sve_fcvtzs_x2:
6496 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6497 return;
6498 case Intrinsic::aarch64_sve_scvtf_x2:
6499 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6500 return;
6501 case Intrinsic::aarch64_sve_fcvtzu_x2:
6502 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6503 return;
6504 case Intrinsic::aarch64_sve_ucvtf_x2:
6505 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6506 return;
6507 case Intrinsic::aarch64_sve_fcvtzs_x4:
6508 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6509 return;
6510 case Intrinsic::aarch64_sve_scvtf_x4:
6511 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6512 return;
6513 case Intrinsic::aarch64_sve_fcvtzu_x4:
6514 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6515 return;
6516 case Intrinsic::aarch64_sve_ucvtf_x4:
6517 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6518 return;
6519 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6520 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6521 return;
6522 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6523 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6524 return;
6525 case Intrinsic::aarch64_sve_sclamp_single_x2:
6527 Node->getValueType(0),
6528 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6529 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6530 SelectClamp(Node, 2, Op);
6531 return;
6532 case Intrinsic::aarch64_sve_uclamp_single_x2:
6534 Node->getValueType(0),
6535 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6536 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6537 SelectClamp(Node, 2, Op);
6538 return;
6539 case Intrinsic::aarch64_sve_fclamp_single_x2:
6541 Node->getValueType(0),
6542 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6543 AArch64::FCLAMP_VG2_2Z2Z_D}))
6544 SelectClamp(Node, 2, Op);
6545 return;
6546 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6547 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6548 return;
6549 case Intrinsic::aarch64_sve_sclamp_single_x4:
6551 Node->getValueType(0),
6552 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6553 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6554 SelectClamp(Node, 4, Op);
6555 return;
6556 case Intrinsic::aarch64_sve_uclamp_single_x4:
6558 Node->getValueType(0),
6559 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6560 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6561 SelectClamp(Node, 4, Op);
6562 return;
6563 case Intrinsic::aarch64_sve_fclamp_single_x4:
6565 Node->getValueType(0),
6566 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6567 AArch64::FCLAMP_VG4_4Z4Z_D}))
6568 SelectClamp(Node, 4, Op);
6569 return;
6570 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6571 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6572 return;
6573 case Intrinsic::aarch64_sve_add_single_x2:
6575 Node->getValueType(0),
6576 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6577 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6578 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6579 return;
6580 case Intrinsic::aarch64_sve_add_single_x4:
6582 Node->getValueType(0),
6583 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6584 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6585 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6586 return;
6587 case Intrinsic::aarch64_sve_zip_x2:
6589 Node->getValueType(0),
6590 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6591 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6592 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6593 return;
6594 case Intrinsic::aarch64_sve_zipq_x2:
6595 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6596 AArch64::ZIP_VG2_2ZZZ_Q);
6597 return;
6598 case Intrinsic::aarch64_sve_zip_x4:
6600 Node->getValueType(0),
6601 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6602 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6603 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6604 return;
6605 case Intrinsic::aarch64_sve_zipq_x4:
6606 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6607 AArch64::ZIP_VG4_4Z4Z_Q);
6608 return;
6609 case Intrinsic::aarch64_sve_uzp_x2:
6611 Node->getValueType(0),
6612 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6613 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6614 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6615 return;
6616 case Intrinsic::aarch64_sve_uzpq_x2:
6617 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6618 AArch64::UZP_VG2_2ZZZ_Q);
6619 return;
6620 case Intrinsic::aarch64_sve_uzp_x4:
6622 Node->getValueType(0),
6623 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6624 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6625 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6626 return;
6627 case Intrinsic::aarch64_sve_uzpq_x4:
6628 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6629 AArch64::UZP_VG4_4Z4Z_Q);
6630 return;
6631 case Intrinsic::aarch64_sve_sel_x2:
6633 Node->getValueType(0),
6634 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6635 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6636 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6637 return;
6638 case Intrinsic::aarch64_sve_sel_x4:
6640 Node->getValueType(0),
6641 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6642 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6643 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6644 return;
6645 case Intrinsic::aarch64_sve_frinta_x2:
6646 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6647 return;
6648 case Intrinsic::aarch64_sve_frinta_x4:
6649 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6650 return;
6651 case Intrinsic::aarch64_sve_frintm_x2:
6652 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6653 return;
6654 case Intrinsic::aarch64_sve_frintm_x4:
6655 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6656 return;
6657 case Intrinsic::aarch64_sve_frintn_x2:
6658 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6659 return;
6660 case Intrinsic::aarch64_sve_frintn_x4:
6661 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6662 return;
6663 case Intrinsic::aarch64_sve_frintp_x2:
6664 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6665 return;
6666 case Intrinsic::aarch64_sve_frintp_x4:
6667 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6668 return;
6669 case Intrinsic::aarch64_sve_sunpk_x2:
6671 Node->getValueType(0),
6672 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6673 AArch64::SUNPK_VG2_2ZZ_D}))
6674 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6675 return;
6676 case Intrinsic::aarch64_sve_uunpk_x2:
6678 Node->getValueType(0),
6679 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6680 AArch64::UUNPK_VG2_2ZZ_D}))
6681 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6682 return;
6683 case Intrinsic::aarch64_sve_sunpk_x4:
6685 Node->getValueType(0),
6686 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6687 AArch64::SUNPK_VG4_4Z2Z_D}))
6688 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6689 return;
6690 case Intrinsic::aarch64_sve_uunpk_x4:
6692 Node->getValueType(0),
6693 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6694 AArch64::UUNPK_VG4_4Z2Z_D}))
6695 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6696 return;
6697 case Intrinsic::aarch64_sve_pext_x2: {
6699 Node->getValueType(0),
6700 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6701 AArch64::PEXT_2PCI_D}))
6702 SelectPExtPair(Node, Op);
6703 return;
6704 }
6705 }
6706 break;
6707 }
6708 case ISD::INTRINSIC_VOID: {
6709 unsigned IntNo = Node->getConstantOperandVal(1);
6710 if (Node->getNumOperands() >= 3)
6711 VT = Node->getOperand(2)->getValueType(0);
6712 switch (IntNo) {
6713 default:
6714 break;
6715 case Intrinsic::aarch64_neon_st1x2: {
6716 if (VT == MVT::v8i8) {
6717 SelectStore(Node, 2, AArch64::ST1Twov8b);
6718 return;
6719 } else if (VT == MVT::v16i8) {
6720 SelectStore(Node, 2, AArch64::ST1Twov16b);
6721 return;
6722 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6723 VT == MVT::v4bf16) {
6724 SelectStore(Node, 2, AArch64::ST1Twov4h);
6725 return;
6726 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6727 VT == MVT::v8bf16) {
6728 SelectStore(Node, 2, AArch64::ST1Twov8h);
6729 return;
6730 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6731 SelectStore(Node, 2, AArch64::ST1Twov2s);
6732 return;
6733 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6734 SelectStore(Node, 2, AArch64::ST1Twov4s);
6735 return;
6736 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6737 SelectStore(Node, 2, AArch64::ST1Twov2d);
6738 return;
6739 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6740 SelectStore(Node, 2, AArch64::ST1Twov1d);
6741 return;
6742 }
6743 break;
6744 }
6745 case Intrinsic::aarch64_neon_st1x3: {
6746 if (VT == MVT::v8i8) {
6747 SelectStore(Node, 3, AArch64::ST1Threev8b);
6748 return;
6749 } else if (VT == MVT::v16i8) {
6750 SelectStore(Node, 3, AArch64::ST1Threev16b);
6751 return;
6752 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6753 VT == MVT::v4bf16) {
6754 SelectStore(Node, 3, AArch64::ST1Threev4h);
6755 return;
6756 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6757 VT == MVT::v8bf16) {
6758 SelectStore(Node, 3, AArch64::ST1Threev8h);
6759 return;
6760 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6761 SelectStore(Node, 3, AArch64::ST1Threev2s);
6762 return;
6763 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6764 SelectStore(Node, 3, AArch64::ST1Threev4s);
6765 return;
6766 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6767 SelectStore(Node, 3, AArch64::ST1Threev2d);
6768 return;
6769 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6770 SelectStore(Node, 3, AArch64::ST1Threev1d);
6771 return;
6772 }
6773 break;
6774 }
6775 case Intrinsic::aarch64_neon_st1x4: {
6776 if (VT == MVT::v8i8) {
6777 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6778 return;
6779 } else if (VT == MVT::v16i8) {
6780 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6781 return;
6782 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6783 VT == MVT::v4bf16) {
6784 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6785 return;
6786 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6787 VT == MVT::v8bf16) {
6788 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6789 return;
6790 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6791 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6792 return;
6793 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6794 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6795 return;
6796 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6797 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6798 return;
6799 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6800 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6801 return;
6802 }
6803 break;
6804 }
6805 case Intrinsic::aarch64_neon_st2: {
6806 if (VT == MVT::v8i8) {
6807 SelectStore(Node, 2, AArch64::ST2Twov8b);
6808 return;
6809 } else if (VT == MVT::v16i8) {
6810 SelectStore(Node, 2, AArch64::ST2Twov16b);
6811 return;
6812 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6813 VT == MVT::v4bf16) {
6814 SelectStore(Node, 2, AArch64::ST2Twov4h);
6815 return;
6816 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6817 VT == MVT::v8bf16) {
6818 SelectStore(Node, 2, AArch64::ST2Twov8h);
6819 return;
6820 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6821 SelectStore(Node, 2, AArch64::ST2Twov2s);
6822 return;
6823 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6824 SelectStore(Node, 2, AArch64::ST2Twov4s);
6825 return;
6826 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6827 SelectStore(Node, 2, AArch64::ST2Twov2d);
6828 return;
6829 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6830 SelectStore(Node, 2, AArch64::ST1Twov1d);
6831 return;
6832 }
6833 break;
6834 }
6835 case Intrinsic::aarch64_neon_st3: {
6836 if (VT == MVT::v8i8) {
6837 SelectStore(Node, 3, AArch64::ST3Threev8b);
6838 return;
6839 } else if (VT == MVT::v16i8) {
6840 SelectStore(Node, 3, AArch64::ST3Threev16b);
6841 return;
6842 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6843 VT == MVT::v4bf16) {
6844 SelectStore(Node, 3, AArch64::ST3Threev4h);
6845 return;
6846 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6847 VT == MVT::v8bf16) {
6848 SelectStore(Node, 3, AArch64::ST3Threev8h);
6849 return;
6850 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6851 SelectStore(Node, 3, AArch64::ST3Threev2s);
6852 return;
6853 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6854 SelectStore(Node, 3, AArch64::ST3Threev4s);
6855 return;
6856 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6857 SelectStore(Node, 3, AArch64::ST3Threev2d);
6858 return;
6859 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6860 SelectStore(Node, 3, AArch64::ST1Threev1d);
6861 return;
6862 }
6863 break;
6864 }
6865 case Intrinsic::aarch64_neon_st4: {
6866 if (VT == MVT::v8i8) {
6867 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6868 return;
6869 } else if (VT == MVT::v16i8) {
6870 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6871 return;
6872 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6873 VT == MVT::v4bf16) {
6874 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6875 return;
6876 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6877 VT == MVT::v8bf16) {
6878 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6879 return;
6880 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6881 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6882 return;
6883 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6884 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6885 return;
6886 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6887 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6888 return;
6889 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6890 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6891 return;
6892 }
6893 break;
6894 }
6895 case Intrinsic::aarch64_neon_st2lane: {
6896 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6897 SelectStoreLane(Node, 2, AArch64::ST2i8);
6898 return;
6899 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6900 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6901 SelectStoreLane(Node, 2, AArch64::ST2i16);
6902 return;
6903 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6904 VT == MVT::v2f32) {
6905 SelectStoreLane(Node, 2, AArch64::ST2i32);
6906 return;
6907 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6908 VT == MVT::v1f64) {
6909 SelectStoreLane(Node, 2, AArch64::ST2i64);
6910 return;
6911 }
6912 break;
6913 }
6914 case Intrinsic::aarch64_neon_st3lane: {
6915 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6916 SelectStoreLane(Node, 3, AArch64::ST3i8);
6917 return;
6918 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6919 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6920 SelectStoreLane(Node, 3, AArch64::ST3i16);
6921 return;
6922 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6923 VT == MVT::v2f32) {
6924 SelectStoreLane(Node, 3, AArch64::ST3i32);
6925 return;
6926 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6927 VT == MVT::v1f64) {
6928 SelectStoreLane(Node, 3, AArch64::ST3i64);
6929 return;
6930 }
6931 break;
6932 }
6933 case Intrinsic::aarch64_neon_st4lane: {
6934 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6935 SelectStoreLane(Node, 4, AArch64::ST4i8);
6936 return;
6937 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6938 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6939 SelectStoreLane(Node, 4, AArch64::ST4i16);
6940 return;
6941 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6942 VT == MVT::v2f32) {
6943 SelectStoreLane(Node, 4, AArch64::ST4i32);
6944 return;
6945 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6946 VT == MVT::v1f64) {
6947 SelectStoreLane(Node, 4, AArch64::ST4i64);
6948 return;
6949 }
6950 break;
6951 }
6952 case Intrinsic::aarch64_sve_st2q: {
6953 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6954 return;
6955 }
6956 case Intrinsic::aarch64_sve_st3q: {
6957 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6958 return;
6959 }
6960 case Intrinsic::aarch64_sve_st4q: {
6961 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6962 return;
6963 }
6964 case Intrinsic::aarch64_sve_st2: {
6965 if (VT == MVT::nxv16i8) {
6966 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6967 return;
6968 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6969 VT == MVT::nxv8bf16) {
6970 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6971 return;
6972 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6973 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6974 return;
6975 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6976 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6977 return;
6978 }
6979 break;
6980 }
6981 case Intrinsic::aarch64_sve_st3: {
6982 if (VT == MVT::nxv16i8) {
6983 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6984 return;
6985 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6986 VT == MVT::nxv8bf16) {
6987 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6988 return;
6989 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6990 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6991 return;
6992 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6993 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6994 return;
6995 }
6996 break;
6997 }
6998 case Intrinsic::aarch64_sve_st4: {
6999 if (VT == MVT::nxv16i8) {
7000 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
7001 return;
7002 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7003 VT == MVT::nxv8bf16) {
7004 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
7005 return;
7006 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7007 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
7008 return;
7009 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7010 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
7011 return;
7012 }
7013 break;
7014 }
7015 }
7016 break;
7017 }
7018 case AArch64ISD::LD2post: {
7019 if (VT == MVT::v8i8) {
7020 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
7021 return;
7022 } else if (VT == MVT::v16i8) {
7023 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
7024 return;
7025 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7026 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
7027 return;
7028 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7029 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
7030 return;
7031 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7032 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
7033 return;
7034 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7035 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
7036 return;
7037 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7038 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
7039 return;
7040 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7041 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
7042 return;
7043 }
7044 break;
7045 }
7046 case AArch64ISD::LD3post: {
7047 if (VT == MVT::v8i8) {
7048 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
7049 return;
7050 } else if (VT == MVT::v16i8) {
7051 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
7052 return;
7053 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7054 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
7055 return;
7056 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7057 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
7058 return;
7059 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7060 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
7061 return;
7062 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7063 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
7064 return;
7065 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7066 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
7067 return;
7068 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7069 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
7070 return;
7071 }
7072 break;
7073 }
7074 case AArch64ISD::LD4post: {
7075 if (VT == MVT::v8i8) {
7076 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
7077 return;
7078 } else if (VT == MVT::v16i8) {
7079 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
7080 return;
7081 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7082 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
7083 return;
7084 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7085 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
7086 return;
7087 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7088 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
7089 return;
7090 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7091 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
7092 return;
7093 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7094 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7095 return;
7096 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7097 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
7098 return;
7099 }
7100 break;
7101 }
7102 case AArch64ISD::LD1x2post: {
7103 if (VT == MVT::v8i8) {
7104 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
7105 return;
7106 } else if (VT == MVT::v16i8) {
7107 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
7108 return;
7109 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7110 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
7111 return;
7112 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7113 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
7114 return;
7115 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7116 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
7117 return;
7118 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7119 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
7120 return;
7121 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7122 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
7123 return;
7124 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7125 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
7126 return;
7127 }
7128 break;
7129 }
7130 case AArch64ISD::LD1x3post: {
7131 if (VT == MVT::v8i8) {
7132 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
7133 return;
7134 } else if (VT == MVT::v16i8) {
7135 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
7136 return;
7137 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7138 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
7139 return;
7140 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7141 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
7142 return;
7143 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7144 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
7145 return;
7146 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7147 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
7148 return;
7149 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7150 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
7151 return;
7152 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7153 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
7154 return;
7155 }
7156 break;
7157 }
7158 case AArch64ISD::LD1x4post: {
7159 if (VT == MVT::v8i8) {
7160 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
7161 return;
7162 } else if (VT == MVT::v16i8) {
7163 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
7164 return;
7165 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7166 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
7167 return;
7168 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7169 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
7170 return;
7171 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7172 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
7173 return;
7174 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7175 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
7176 return;
7177 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7178 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7179 return;
7180 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7181 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
7182 return;
7183 }
7184 break;
7185 }
7186 case AArch64ISD::LD1DUPpost: {
7187 if (VT == MVT::v8i8) {
7188 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
7189 return;
7190 } else if (VT == MVT::v16i8) {
7191 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
7192 return;
7193 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7194 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
7195 return;
7196 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7197 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
7198 return;
7199 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7200 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
7201 return;
7202 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7203 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
7204 return;
7205 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7206 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
7207 return;
7208 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7209 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
7210 return;
7211 }
7212 break;
7213 }
7214 case AArch64ISD::LD2DUPpost: {
7215 if (VT == MVT::v8i8) {
7216 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
7217 return;
7218 } else if (VT == MVT::v16i8) {
7219 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
7220 return;
7221 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7222 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
7223 return;
7224 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7225 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
7226 return;
7227 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7228 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
7229 return;
7230 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7231 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
7232 return;
7233 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7234 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
7235 return;
7236 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7237 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
7238 return;
7239 }
7240 break;
7241 }
7242 case AArch64ISD::LD3DUPpost: {
7243 if (VT == MVT::v8i8) {
7244 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7245 return;
7246 } else if (VT == MVT::v16i8) {
7247 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7248 return;
7249 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7250 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7251 return;
7252 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7253 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7254 return;
7255 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7256 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7257 return;
7258 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7259 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7260 return;
7261 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7262 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7263 return;
7264 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7265 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7266 return;
7267 }
7268 break;
7269 }
7270 case AArch64ISD::LD4DUPpost: {
7271 if (VT == MVT::v8i8) {
7272 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7273 return;
7274 } else if (VT == MVT::v16i8) {
7275 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7276 return;
7277 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7278 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7279 return;
7280 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7281 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7282 return;
7283 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7284 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7285 return;
7286 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7287 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7288 return;
7289 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7290 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7291 return;
7292 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7293 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7294 return;
7295 }
7296 break;
7297 }
7298 case AArch64ISD::LD1LANEpost: {
7299 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7300 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7301 return;
7302 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7303 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7304 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7305 return;
7306 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7307 VT == MVT::v2f32) {
7308 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7309 return;
7310 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7311 VT == MVT::v1f64) {
7312 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7313 return;
7314 }
7315 break;
7316 }
7317 case AArch64ISD::LD2LANEpost: {
7318 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7319 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7320 return;
7321 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7322 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7323 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7324 return;
7325 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7326 VT == MVT::v2f32) {
7327 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7328 return;
7329 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7330 VT == MVT::v1f64) {
7331 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7332 return;
7333 }
7334 break;
7335 }
7336 case AArch64ISD::LD3LANEpost: {
7337 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7338 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7339 return;
7340 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7341 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7342 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7343 return;
7344 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7345 VT == MVT::v2f32) {
7346 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7347 return;
7348 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7349 VT == MVT::v1f64) {
7350 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7351 return;
7352 }
7353 break;
7354 }
7355 case AArch64ISD::LD4LANEpost: {
7356 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7357 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7358 return;
7359 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7360 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7361 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7362 return;
7363 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7364 VT == MVT::v2f32) {
7365 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7366 return;
7367 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7368 VT == MVT::v1f64) {
7369 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7370 return;
7371 }
7372 break;
7373 }
7374 case AArch64ISD::ST2post: {
7375 VT = Node->getOperand(1).getValueType();
7376 if (VT == MVT::v8i8) {
7377 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7378 return;
7379 } else if (VT == MVT::v16i8) {
7380 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7381 return;
7382 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7383 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7384 return;
7385 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7386 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7387 return;
7388 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7389 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7390 return;
7391 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7392 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7393 return;
7394 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7395 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7396 return;
7397 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7398 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7399 return;
7400 }
7401 break;
7402 }
7403 case AArch64ISD::ST3post: {
7404 VT = Node->getOperand(1).getValueType();
7405 if (VT == MVT::v8i8) {
7406 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7407 return;
7408 } else if (VT == MVT::v16i8) {
7409 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7410 return;
7411 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7412 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7413 return;
7414 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7415 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7416 return;
7417 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7418 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7419 return;
7420 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7421 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7422 return;
7423 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7424 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7425 return;
7426 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7427 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7428 return;
7429 }
7430 break;
7431 }
7432 case AArch64ISD::ST4post: {
7433 VT = Node->getOperand(1).getValueType();
7434 if (VT == MVT::v8i8) {
7435 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7436 return;
7437 } else if (VT == MVT::v16i8) {
7438 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7439 return;
7440 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7441 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7442 return;
7443 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7444 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7445 return;
7446 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7447 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7448 return;
7449 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7450 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7451 return;
7452 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7453 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7454 return;
7455 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7456 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7457 return;
7458 }
7459 break;
7460 }
7461 case AArch64ISD::ST1x2post: {
7462 VT = Node->getOperand(1).getValueType();
7463 if (VT == MVT::v8i8) {
7464 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7465 return;
7466 } else if (VT == MVT::v16i8) {
7467 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7468 return;
7469 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7470 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7471 return;
7472 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7473 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7474 return;
7475 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7476 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7477 return;
7478 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7479 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7480 return;
7481 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7482 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7483 return;
7484 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7485 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7486 return;
7487 }
7488 break;
7489 }
7490 case AArch64ISD::ST1x3post: {
7491 VT = Node->getOperand(1).getValueType();
7492 if (VT == MVT::v8i8) {
7493 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7494 return;
7495 } else if (VT == MVT::v16i8) {
7496 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7497 return;
7498 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7499 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7500 return;
7501 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7502 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7503 return;
7504 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7505 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7506 return;
7507 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7508 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7509 return;
7510 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7511 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7512 return;
7513 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7514 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7515 return;
7516 }
7517 break;
7518 }
7519 case AArch64ISD::ST1x4post: {
7520 VT = Node->getOperand(1).getValueType();
7521 if (VT == MVT::v8i8) {
7522 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7523 return;
7524 } else if (VT == MVT::v16i8) {
7525 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7526 return;
7527 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7528 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7529 return;
7530 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7531 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7532 return;
7533 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7534 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7535 return;
7536 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7537 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7538 return;
7539 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7540 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7541 return;
7542 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7543 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7544 return;
7545 }
7546 break;
7547 }
7548 case AArch64ISD::ST2LANEpost: {
7549 VT = Node->getOperand(1).getValueType();
7550 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7551 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7552 return;
7553 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7554 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7555 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7556 return;
7557 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7558 VT == MVT::v2f32) {
7559 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7560 return;
7561 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7562 VT == MVT::v1f64) {
7563 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7564 return;
7565 }
7566 break;
7567 }
7568 case AArch64ISD::ST3LANEpost: {
7569 VT = Node->getOperand(1).getValueType();
7570 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7571 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7572 return;
7573 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7574 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7575 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7576 return;
7577 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7578 VT == MVT::v2f32) {
7579 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7580 return;
7581 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7582 VT == MVT::v1f64) {
7583 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7584 return;
7585 }
7586 break;
7587 }
7588 case AArch64ISD::ST4LANEpost: {
7589 VT = Node->getOperand(1).getValueType();
7590 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7591 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7592 return;
7593 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7594 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7595 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7596 return;
7597 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7598 VT == MVT::v2f32) {
7599 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7600 return;
7601 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7602 VT == MVT::v1f64) {
7603 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7604 return;
7605 }
7606 break;
7607 }
7608 }
7609
7610 // Select the default instruction
7611 SelectCode(Node);
7612}
7613
7614/// createAArch64ISelDag - This pass converts a legalized DAG into a
7615/// AArch64-specific DAG, ready for instruction scheduling.
7617 CodeGenOptLevel OptLevel) {
7618 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7619}
7620
7621/// When \p PredVT is a scalable vector predicate in the form
7622/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7623/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7624/// structured vectors (NumVec >1), the output data type is
7625/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7626/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7627/// EVT.
7629 unsigned NumVec) {
7630 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7631 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7632 return EVT();
7633
7634 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7635 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7636 return EVT();
7637
7638 ElementCount EC = PredVT.getVectorElementCount();
7639 EVT ScalarVT =
7640 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7641 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7642
7643 return MemVT;
7644}
7645
7646/// Return the EVT of the data associated to a memory operation in \p
7647/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7649 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7650 return MemIntr->getMemoryVT();
7651
7652 if (isa<MemSDNode>(Root)) {
7653 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7654
7655 EVT DataVT;
7656 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7657 DataVT = Load->getValueType(0);
7658 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7659 DataVT = Load->getValueType(0);
7660 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7661 DataVT = Store->getValue().getValueType();
7662 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7663 DataVT = Store->getValue().getValueType();
7664 else
7665 llvm_unreachable("Unexpected MemSDNode!");
7666
7667 return DataVT.changeVectorElementType(Ctx, MemVT.getVectorElementType());
7668 }
7669
7670 const unsigned Opcode = Root->getOpcode();
7671 // For custom ISD nodes, we have to look at them individually to extract the
7672 // type of the data moved to/from memory.
7673 switch (Opcode) {
7674 case AArch64ISD::LD1_MERGE_ZERO:
7675 case AArch64ISD::LD1S_MERGE_ZERO:
7676 case AArch64ISD::LDNF1_MERGE_ZERO:
7677 case AArch64ISD::LDNF1S_MERGE_ZERO:
7678 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7679 case AArch64ISD::ST1_PRED:
7680 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7681 default:
7682 break;
7683 }
7684
7685 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7686 return EVT();
7687
7688 switch (Root->getConstantOperandVal(1)) {
7689 default:
7690 return EVT();
7691 case Intrinsic::aarch64_sme_ldr:
7692 case Intrinsic::aarch64_sme_str:
7693 return MVT::nxv16i8;
7694 case Intrinsic::aarch64_sve_prf:
7695 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7696 // width of the predicate.
7698 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7699 case Intrinsic::aarch64_sve_ld2_sret:
7700 case Intrinsic::aarch64_sve_ld2q_sret:
7702 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7703 case Intrinsic::aarch64_sve_st2q:
7705 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7706 case Intrinsic::aarch64_sve_ld3_sret:
7707 case Intrinsic::aarch64_sve_ld3q_sret:
7709 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7710 case Intrinsic::aarch64_sve_st3q:
7712 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7713 case Intrinsic::aarch64_sve_ld4_sret:
7714 case Intrinsic::aarch64_sve_ld4q_sret:
7716 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7717 case Intrinsic::aarch64_sve_st4q:
7719 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7720 case Intrinsic::aarch64_sve_ld1udq:
7721 case Intrinsic::aarch64_sve_st1dq:
7722 return EVT(MVT::nxv1i64);
7723 case Intrinsic::aarch64_sve_ld1uwq:
7724 case Intrinsic::aarch64_sve_st1wq:
7725 return EVT(MVT::nxv1i32);
7726 }
7727}
7728
7729/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7730/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7731/// where Root is the memory access using N for its address.
7732template <int64_t Min, int64_t Max>
7733bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7734 SDValue &Base,
7735 SDValue &OffImm) {
7736 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7737 const DataLayout &DL = CurDAG->getDataLayout();
7738 const MachineFrameInfo &MFI = MF->getFrameInfo();
7739
7740 if (N.getOpcode() == ISD::FrameIndex) {
7741 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7742 // We can only encode VL scaled offsets, so only fold in frame indexes
7743 // referencing SVE objects.
7744 if (MFI.hasScalableStackID(FI)) {
7745 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7746 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7747 return true;
7748 }
7749
7750 return false;
7751 }
7752
7753 if (MemVT == EVT())
7754 return false;
7755
7756 if (N.getOpcode() != ISD::ADD)
7757 return false;
7758
7759 SDValue VScale = N.getOperand(1);
7760 int64_t MulImm = std::numeric_limits<int64_t>::max();
7761 if (VScale.getOpcode() == ISD::VSCALE) {
7762 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7763 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7764 int64_t ByteOffset = C->getSExtValue();
7765 const auto KnownVScale =
7767
7768 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7769 return false;
7770
7771 MulImm = ByteOffset / KnownVScale;
7772 } else
7773 return false;
7774
7775 TypeSize TS = MemVT.getSizeInBits();
7776 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7777
7778 if ((MulImm % MemWidthBytes) != 0)
7779 return false;
7780
7781 int64_t Offset = MulImm / MemWidthBytes;
7783 return false;
7784
7785 Base = N.getOperand(0);
7786 if (Base.getOpcode() == ISD::FrameIndex) {
7787 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7788 // We can only encode VL scaled offsets, so only fold in frame indexes
7789 // referencing SVE objects.
7790 if (MFI.hasScalableStackID(FI))
7791 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7792 }
7793
7794 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7795 return true;
7796}
7797
7798/// Select register plus register addressing mode for SVE, with scaled
7799/// offset.
7800bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7801 SDValue &Base,
7802 SDValue &Offset) {
7803 if (N.getOpcode() != ISD::ADD)
7804 return false;
7805
7806 // Process an ADD node.
7807 const SDValue LHS = N.getOperand(0);
7808 const SDValue RHS = N.getOperand(1);
7809
7810 // 8 bit data does not come with the SHL node, so it is treated
7811 // separately.
7812 if (Scale == 0) {
7813 Base = LHS;
7814 Offset = RHS;
7815 return true;
7816 }
7817
7818 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7819 int64_t ImmOff = C->getSExtValue();
7820 unsigned Size = 1 << Scale;
7821
7822 // To use the reg+reg addressing mode, the immediate must be a multiple of
7823 // the vector element's byte size.
7824 if (ImmOff % Size)
7825 return false;
7826
7827 SDLoc DL(N);
7828 Base = LHS;
7829 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7830 SDValue Ops[] = {Offset};
7831 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7832 Offset = SDValue(MI, 0);
7833 return true;
7834 }
7835
7836 // Check if the RHS is a shift node with a constant.
7837 if (RHS.getOpcode() != ISD::SHL)
7838 return false;
7839
7840 const SDValue ShiftRHS = RHS.getOperand(1);
7841 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7842 if (C->getZExtValue() == Scale) {
7843 Base = LHS;
7844 Offset = RHS.getOperand(0);
7845 return true;
7846 }
7847
7848 return false;
7849}
7850
7851bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7852 const AArch64TargetLowering *TLI =
7853 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7854
7855 return TLI->isAllActivePredicate(*CurDAG, N);
7856}
7857
7858bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7859 EVT VT = N.getValueType();
7860 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7861}
7862
7863bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7865 unsigned Scale) {
7866 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7867 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7868 int64_t ImmOff = C->getSExtValue();
7869 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7870 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7871 }
7872 return SDValue();
7873 };
7874
7875 if (SDValue C = MatchConstantOffset(N)) {
7876 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7877 Offset = C;
7878 return true;
7879 }
7880
7881 // Try to untangle an ADD node into a 'reg + offset'
7882 if (CurDAG->isBaseWithConstantOffset(N)) {
7883 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7884 Base = N.getOperand(0);
7885 Offset = C;
7886 return true;
7887 }
7888 }
7889
7890 // By default, just match reg + 0.
7891 Base = N;
7892 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7893 return true;
7894}
7895
7896bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7897 SDValue &Imm) {
7899 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7900 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7901 // Check conservatively if the immediate fits the valid range [0, 64).
7902 // Immediate variants for GE and HS definitely need to be decremented
7903 // when lowering the pseudos later, so an immediate of 1 would become 0.
7904 // For the inverse conditions LT and LO we don't know for sure if they
7905 // will need a decrement but should the decision be made to reverse the
7906 // branch condition, we again end up with the need to decrement.
7907 // The same argument holds for LE, LS, GT and HI and possibly
7908 // incremented immediates. This can lead to slightly less optimal
7909 // codegen, e.g. we never codegen the legal case
7910 // cblt w0, #63, A
7911 // because we could end up with the illegal case
7912 // cbge w0, #64, B
7913 // should the decision to reverse the branch direction be made. For the
7914 // lower bound cases this is no problem since we can express comparisons
7915 // against 0 with either tbz/tnbz or using wzr/xzr.
7916 uint64_t LowerBound = 0, UpperBound = 64;
7917 switch (CC) {
7918 case AArch64CC::GE:
7919 case AArch64CC::HS:
7920 case AArch64CC::LT:
7921 case AArch64CC::LO:
7922 LowerBound = 1;
7923 break;
7924 case AArch64CC::LE:
7925 case AArch64CC::LS:
7926 case AArch64CC::GT:
7927 case AArch64CC::HI:
7928 UpperBound = 63;
7929 break;
7930 default:
7931 break;
7932 }
7933
7934 if (CN->getAPIntValue().uge(LowerBound) &&
7935 CN->getAPIntValue().ult(UpperBound)) {
7936 SDLoc DL(N);
7937 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
7938 return true;
7939 }
7940 }
7941
7942 return false;
7943}
7944
7945template <bool MatchCBB>
7946bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
7947 SDValue &ExtType) {
7948
7949 // Use an invalid shift-extend value to indicate we don't need to extend later
7950 if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) {
7951 EVT Ty = cast<VTSDNode>(N.getOperand(1))->getVT();
7952 if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
7953 return false;
7954 Reg = N.getOperand(0);
7955 ExtType = CurDAG->getSignedTargetConstant(AArch64_AM::InvalidShiftExtend,
7956 SDLoc(N), MVT::i32);
7957 return true;
7958 }
7959
7961
7962 if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) ||
7963 (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) {
7964 Reg = N.getOperand(0);
7965 ExtType =
7966 CurDAG->getTargetConstant(getExtendEncoding(ET), SDLoc(N), MVT::i32);
7967 return true;
7968 }
7969
7970 return false;
7971}
7972
7973void AArch64DAGToDAGISel::PreprocessISelDAG() {
7974 bool MadeChange = false;
7975 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
7976 if (N.use_empty())
7977 continue;
7978
7980 switch (N.getOpcode()) {
7981 case ISD::SCALAR_TO_VECTOR: {
7982 EVT ScalarTy = N.getValueType(0).getVectorElementType();
7983 if ((ScalarTy == MVT::i32 || ScalarTy == MVT::i64) &&
7984 ScalarTy == N.getOperand(0).getValueType())
7985 Result = addBitcastHints(*CurDAG, N);
7986
7987 break;
7988 }
7989 default:
7990 break;
7991 }
7992
7993 if (Result) {
7994 LLVM_DEBUG(dbgs() << "AArch64 DAG preprocessing replacing:\nOld: ");
7995 LLVM_DEBUG(N.dump(CurDAG));
7996 LLVM_DEBUG(dbgs() << "\nNew: ");
7997 LLVM_DEBUG(Result.dump(CurDAG));
7998 LLVM_DEBUG(dbgs() << "\n");
7999
8000 CurDAG->ReplaceAllUsesOfValueWith(SDValue(&N, 0), Result);
8001 MadeChange = true;
8002 }
8003 }
8004
8005 if (MadeChange)
8006 CurDAG->RemoveDeadNodes();
8007
8009}
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static std::optional< APInt > GetNEONSplatValue(SDValue N)
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static SDValue addBitcastHints(SelectionDAG &DAG, SDNode &N)
addBitcastHints - This method adds bitcast hints to the operands of a node to help instruction select...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static std::optional< APInt > DecodeNEONSplat(SDValue N)
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
Value * RHS
Value * LHS
const AArch64RegisterInfo * getRegisterInfo() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1685
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1044
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1654
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1613
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1467
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasScalableStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
LLVM Value Representation.
Definition Value.h:75
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:967
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t decodeAdvSIMDModImmType12(uint8_t Imm)
static uint64_t decodeAdvSIMDModImmType11(uint8_t Imm)
unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET)
Mapping from extend bits to required operation: shifter: 000 ==> uxtb 001 ==> uxth 010 ==> uxtw 011 =...
static uint64_t decodeAdvSIMDModImmType10(uint8_t Imm)
static bool isSVELogicalImm(unsigned SizeInBits, uint64_t ImmVal, uint64_t &Encoding)
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
isSignExtendShiftType - Returns true if Type is sign extending.
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
unsigned CheckFixedPointOperandConstant(APFloat &FVal, unsigned RegWidth, bool isReciprocal)
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2026
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
Extended Value Type.
Definition ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:479
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:367
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:215
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
bool isFixedLengthVector() const
Definition ValueTypes.h:189
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:182
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:210
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.