LLVM 23.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64 void PreprocessISelDAG() override;
65
66 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
67 /// inline asm expressions.
68 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
69 InlineAsm::ConstraintCode ConstraintID,
70 std::vector<SDValue> &OutOps) override;
71
72 template <signed Low, signed High, signed Scale>
73 bool SelectRDVLImm(SDValue N, SDValue &Imm);
74
75 template <signed Low, signed High>
76 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
77
78 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
79 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
80 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
81 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
82 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
83 return SelectShiftedRegister(N, false, Reg, Shift);
84 }
85 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
86 return SelectShiftedRegister(N, true, Reg, Shift);
87 }
88 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
89 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
90 }
91 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
92 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
93 }
94 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
95 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
96 }
97 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
98 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
99 }
100 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
101 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
102 }
103 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
104 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
105 }
106 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
107 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
108 }
109 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
110 return SelectAddrModeIndexed(N, 1, Base, OffImm);
111 }
112 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
113 return SelectAddrModeIndexed(N, 2, Base, OffImm);
114 }
115 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
116 return SelectAddrModeIndexed(N, 4, Base, OffImm);
117 }
118 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
119 return SelectAddrModeIndexed(N, 8, Base, OffImm);
120 }
121 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
122 return SelectAddrModeIndexed(N, 16, Base, OffImm);
123 }
124 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
125 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
126 }
127 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
128 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
129 }
130 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
131 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
132 }
133 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
134 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
135 }
136 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
137 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
138 }
139 template <unsigned Size, unsigned Max>
140 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
141 // Test if there is an appropriate addressing mode and check if the
142 // immediate fits.
143 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
144 if (Found) {
145 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
146 int64_t C = CI->getSExtValue();
147 if (C <= Max)
148 return true;
149 }
150 }
151
152 // Otherwise, base only, materialize address in register.
153 Base = N;
154 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
155 return true;
156 }
157
158 template<int Width>
159 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
160 SDValue &SignExtend, SDValue &DoShift) {
161 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
162 }
163
164 template<int Width>
165 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
166 SDValue &SignExtend, SDValue &DoShift) {
167 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
168 }
169
170 bool SelectExtractHigh(SDValue N, SDValue &Res) {
171 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
172 N = N->getOperand(0);
173 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
174 !isa<ConstantSDNode>(N->getOperand(1)))
175 return false;
176 EVT VT = N->getValueType(0);
177 EVT LVT = N->getOperand(0).getValueType();
178 unsigned Index = N->getConstantOperandVal(1);
179 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
180 Index != VT.getVectorNumElements())
181 return false;
182 Res = N->getOperand(0);
183 return true;
184 }
185
186 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
187 if (N.getOpcode() != AArch64ISD::VLSHR)
188 return false;
189 SDValue Op = N->getOperand(0);
190 EVT VT = Op.getValueType();
191 unsigned ShtAmt = N->getConstantOperandVal(1);
192 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
193 return false;
194
195 APInt Imm;
196 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
197 Imm = APInt(VT.getScalarSizeInBits(),
198 Op.getOperand(1).getConstantOperandVal(0)
199 << Op.getOperand(1).getConstantOperandVal(1));
200 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
201 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
202 Imm = APInt(VT.getScalarSizeInBits(),
203 Op.getOperand(1).getConstantOperandVal(0));
204 else
205 return false;
206
207 if (Imm != 1ULL << (ShtAmt - 1))
208 return false;
209
210 Res1 = Op.getOperand(0);
211 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
212 return true;
213 }
214
215 bool SelectDupZeroOrUndef(SDValue N) {
216 switch(N->getOpcode()) {
217 case ISD::UNDEF:
218 return true;
219 case AArch64ISD::DUP:
220 case ISD::SPLAT_VECTOR: {
221 auto Opnd0 = N->getOperand(0);
222 if (isNullConstant(Opnd0))
223 return true;
224 if (isNullFPConstant(Opnd0))
225 return true;
226 break;
227 }
228 default:
229 break;
230 }
231
232 return false;
233 }
234
235 bool SelectAny(SDValue) { return true; }
236
237 bool SelectDupZero(SDValue N) {
238 switch(N->getOpcode()) {
239 case AArch64ISD::DUP:
240 case ISD::SPLAT_VECTOR: {
241 auto Opnd0 = N->getOperand(0);
242 if (isNullConstant(Opnd0))
243 return true;
244 if (isNullFPConstant(Opnd0))
245 return true;
246 break;
247 }
248 }
249
250 return false;
251 }
252
253 template <MVT::SimpleValueType VT, bool Negate>
254 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
255 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
256 }
257
258 template <MVT::SimpleValueType VT, bool Negate>
259 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
260 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
261 }
262
263 template <MVT::SimpleValueType VT>
264 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
265 return SelectSVECpyDupImm(N, VT, Imm, Shift);
266 }
267
268 template <MVT::SimpleValueType VT, bool Invert = false>
269 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
270 return SelectSVELogicalImm(N, VT, Imm, Invert);
271 }
272
273 template <MVT::SimpleValueType VT>
274 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
275 return SelectSVEArithImm(N, VT, Imm);
276 }
277
278 template <unsigned Low, unsigned High, bool AllowSaturation = false>
279 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
280 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
281 }
282
283 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
284 if (N->getOpcode() != ISD::SPLAT_VECTOR)
285 return false;
286
287 EVT EltVT = N->getValueType(0).getVectorElementType();
288 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
289 /* High */ EltVT.getFixedSizeInBits(),
290 /* AllowSaturation */ true, Imm);
291 }
292
293 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
294 template<signed Min, signed Max, signed Scale, bool Shift>
295 bool SelectCntImm(SDValue N, SDValue &Imm) {
297 return false;
298
299 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
300 if (Shift)
301 MulImm = 1LL << MulImm;
302
303 if ((MulImm % std::abs(Scale)) != 0)
304 return false;
305
306 MulImm /= Scale;
307 if ((MulImm >= Min) && (MulImm <= Max)) {
308 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
309 return true;
310 }
311
312 return false;
313 }
314
315 template <signed Max, signed Scale>
316 bool SelectEXTImm(SDValue N, SDValue &Imm) {
318 return false;
319
320 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
321
322 if (MulImm >= 0 && MulImm <= Max) {
323 MulImm *= Scale;
324 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
325 return true;
326 }
327
328 return false;
329 }
330
331 template <unsigned BaseReg, unsigned Max>
332 bool ImmToReg(SDValue N, SDValue &Imm) {
333 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
334 uint64_t C = CI->getZExtValue();
335
336 if (C > Max)
337 return false;
338
339 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
340 return true;
341 }
342 return false;
343 }
344
345 /// Form sequences of consecutive 64/128-bit registers for use in NEON
346 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
347 /// between 1 and 4 elements. If it contains a single element that is returned
348 /// unchanged; otherwise a REG_SEQUENCE value is returned.
351 // Form a sequence of SVE registers for instructions using list of vectors,
352 // e.g. structured loads and stores (ldN, stN).
353 SDValue createZTuple(ArrayRef<SDValue> Vecs);
354
355 // Similar to above, except the register must start at a multiple of the
356 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
357 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
358
359 /// Generic helper for the createDTuple/createQTuple
360 /// functions. Those should almost always be called instead.
361 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
362 const unsigned SubRegs[]);
363
364 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
365
366 bool tryIndexedLoad(SDNode *N);
367
368 void SelectPtrauthAuth(SDNode *N);
369 void SelectPtrauthResign(SDNode *N);
370
371 bool trySelectStackSlotTagP(SDNode *N);
372 void SelectTagP(SDNode *N);
373
374 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
375 unsigned SubRegIdx);
376 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
377 unsigned SubRegIdx);
378 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
380 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
381 unsigned Opc_rr, unsigned Opc_ri,
382 bool IsIntr = false);
383 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
384 unsigned Scale, unsigned Opc_ri,
385 unsigned Opc_rr);
386 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
387 bool IsZmMulti, unsigned Opcode,
388 bool HasPred = false);
389 void SelectPExtPair(SDNode *N, unsigned Opc);
390 void SelectWhilePair(SDNode *N, unsigned Opc);
391 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
394 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
395 bool IsTupleInput, unsigned Opc);
396 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
397
398 template <unsigned MaxIdx, unsigned Scale>
399 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
400 unsigned Op);
401 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
402 unsigned Op, unsigned MaxIdx, unsigned Scale,
403 unsigned BaseReg = 0);
404 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
405 /// SVE Reg+Imm addressing mode.
406 template <int64_t Min, int64_t Max>
407 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
408 SDValue &OffImm);
409 /// SVE Reg+Reg address mode.
410 template <unsigned Scale>
411 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
412 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
413 }
414
415 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
416 unsigned Opc, uint32_t MaxImm);
417
418 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
419
420 template <unsigned MaxIdx, unsigned Scale>
421 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
422 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
423 }
424
425 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
426 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
429 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
430 unsigned Opc_rr, unsigned Opc_ri);
431 std::tuple<unsigned, SDValue, SDValue>
432 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
433 const SDValue &OldBase, const SDValue &OldOffset,
434 unsigned Scale);
435
436 bool tryBitfieldExtractOp(SDNode *N);
437 bool tryBitfieldExtractOpFromSExt(SDNode *N);
438 bool tryBitfieldInsertOp(SDNode *N);
439 bool tryBitfieldInsertInZeroOp(SDNode *N);
440 bool tryShiftAmountMod(SDNode *N);
441
442 bool tryReadRegister(SDNode *N);
443 bool tryWriteRegister(SDNode *N);
444
445 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
446 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
447
448 bool trySelectXAR(SDNode *N);
449
450// Include the pieces autogenerated from the target description.
451#include "AArch64GenDAGISel.inc"
452
453private:
454 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
455 SDValue &Shift);
456 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
457 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
458 SDValue &OffImm) {
459 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
460 }
461 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
462 unsigned Size, SDValue &Base,
463 SDValue &OffImm);
464 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
465 SDValue &OffImm);
466 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
467 SDValue &OffImm);
468 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
469 SDValue &Offset, SDValue &SignExtend,
470 SDValue &DoShift);
471 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
472 SDValue &Offset, SDValue &SignExtend,
473 SDValue &DoShift);
474 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
475 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
476 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
477 SDValue &Offset, SDValue &SignExtend);
478
479 template<unsigned RegWidth>
480 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
481 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
482 }
483 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
484
485 template <unsigned RegWidth>
486 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos) {
487 return SelectCVTFixedPointVec(N, FixedPos, RegWidth);
488 }
489 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos, unsigned Width);
490
491 template<unsigned RegWidth>
492 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
493 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
494 }
495
496 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
497 unsigned Width);
498
499 template <unsigned FloatWidth>
500 bool SelectCVTFixedPosRecipOperandVec(SDValue N, SDValue &FixedPos) {
501 return SelectCVTFixedPosRecipOperandVec(N, FixedPos, FloatWidth);
502 }
503
504 bool SelectCVTFixedPosRecipOperandVec(SDValue N, SDValue &FixedPos,
505 unsigned Width);
506
507 bool SelectCMP_SWAP(SDNode *N);
508
509 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
510 bool Negate);
511 bool SelectSVEAddSubImm(SDLoc DL, APInt Value, MVT VT, SDValue &Imm,
512 SDValue &Shift, bool Negate);
513 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
514 bool Negate);
515 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
516 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
517
518 // Match `<NEON Splat> SVEImm` (where <NEON Splat> could be fmov, movi, etc).
519 bool SelectNEONSplatOfSVELogicalImm(SDValue N, SDValue &Imm);
520 bool SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift);
521 bool SelectNEONSplatOfSVEArithSImm(SDValue N, SDValue &Imm);
522
523 bool SelectSVESignedArithImm(SDLoc DL, APInt Value, SDValue &Imm);
524 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
525 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
526 bool AllowSaturation, SDValue &Imm);
527
528 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
529 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
530 SDValue &Offset);
531 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
532 SDValue &Offset, unsigned Scale = 1);
533
534 bool SelectAllActivePredicate(SDValue N);
535 bool SelectAnyPredicate(SDValue N);
536
537 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
538
539 template <bool MatchCBB>
540 bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
541};
542
543class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
544public:
545 static char ID;
546 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
547 CodeGenOptLevel OptLevel)
549 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
550};
551} // end anonymous namespace
552
553char AArch64DAGToDAGISelLegacy::ID = 0;
554
555INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
556
557/// addBitcastHints - This method adds bitcast hints to the operands of a node
558/// to help instruction selector determine which operands are in Neon registers.
560 SDLoc DL(&N);
561 auto getFloatVT = [&](EVT VT) {
562 EVT ScalarVT = VT.getScalarType();
563 assert((ScalarVT == MVT::i32 || ScalarVT == MVT::i64) && "Unexpected VT");
564 return VT.changeElementType(*(DAG.getContext()),
565 ScalarVT == MVT::i32 ? MVT::f32 : MVT::f64);
566 };
568 NewOps.reserve(N.getNumOperands());
569
570 for (unsigned I = 0, E = N.getNumOperands(); I < E; ++I) {
571 auto bitcasted = DAG.getBitcast(getFloatVT(N.getOperand(I).getValueType()),
572 N.getOperand(I));
573 NewOps.push_back(bitcasted);
574 }
575 EVT OrigVT = N.getValueType(0);
576 SDValue OpNode = DAG.getNode(N.getOpcode(), DL, getFloatVT(OrigVT), NewOps);
577 return DAG.getBitcast(OrigVT, OpNode);
578}
579
580/// isIntImmediate - This method tests to see if the node is a constant
581/// operand. If so Imm will receive the 64-bit value.
582static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
584 Imm = C->getZExtValue();
585 return true;
586 }
587 return false;
588}
589
590// isIntImmediate - This method tests to see if a constant operand.
591// If so Imm will receive the value.
592static bool isIntImmediate(SDValue N, uint64_t &Imm) {
593 return isIntImmediate(N.getNode(), Imm);
594}
595
596// isOpcWithIntImmediate - This method tests to see if the node is a specific
597// opcode and that it has a immediate integer right operand.
598// If so Imm will receive the 32 bit value.
599static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
600 uint64_t &Imm) {
601 return N->getOpcode() == Opc &&
602 isIntImmediate(N->getOperand(1).getNode(), Imm);
603}
604
605// isIntImmediateEq - This method tests to see if N is a constant operand that
606// is equivalent to 'ImmExpected'.
607#ifndef NDEBUG
608static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
609 uint64_t Imm;
610 if (!isIntImmediate(N.getNode(), Imm))
611 return false;
612 return Imm == ImmExpected;
613}
614#endif
615
616static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth) {
617 assert(RegWidth == 32 || RegWidth == 64);
618 if (RegWidth == 32)
619 return APInt(RegWidth,
621 return APInt(RegWidth, AArch64_AM::decodeAdvSIMDModImmType12(Imm));
622}
623
624// Decodes the raw integer splat value from a NEON splat operation.
625static std::optional<APInt> DecodeNEONSplat(SDValue N) {
626 assert(N.getValueType().isInteger() && "Only integers are supported");
627 if (N->getOpcode() == AArch64ISD::NVCAST)
628 N = N->getOperand(0);
629 unsigned SplatWidth = N.getScalarValueSizeInBits();
630 if (N.getOpcode() == AArch64ISD::FMOV)
631 return DecodeFMOVImm(N.getConstantOperandVal(0), SplatWidth);
632 if (N->getOpcode() == AArch64ISD::MOVI)
633 return APInt(SplatWidth, N.getConstantOperandVal(0));
634 if (N->getOpcode() == AArch64ISD::MOVIshift)
635 return APInt(SplatWidth, N.getConstantOperandVal(0)
636 << N.getConstantOperandVal(1));
637 if (N->getOpcode() == AArch64ISD::MVNIshift)
638 return ~APInt(SplatWidth, N.getConstantOperandVal(0)
639 << N.getConstantOperandVal(1));
640 if (N->getOpcode() == AArch64ISD::MOVIedit)
642 N.getConstantOperandVal(0)));
643 if (N->getOpcode() == AArch64ISD::DUP)
644 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(0)))
645 return Const->getAPIntValue().trunc(SplatWidth);
646 // TODO: Recognize more splat-like NEON operations. See ConstantBuildVector
647 // in AArch64ISelLowering.
648 return std::nullopt;
649}
650
651// If \p N is a NEON splat operation (movi, fmov, etc), return the splat value
652// matching the element size of N.
653static std::optional<APInt> GetNEONSplatValue(SDValue N) {
654 unsigned SplatWidth = N.getScalarValueSizeInBits();
655 if (std::optional<APInt> SplatVal = DecodeNEONSplat(N)) {
656 if (SplatVal->getBitWidth() <= SplatWidth)
657 return APInt::getSplat(SplatWidth, *SplatVal);
658 if (SplatVal->isSplat(SplatWidth))
659 return SplatVal->trunc(SplatWidth);
660 }
661 return std::nullopt;
662}
663
664bool AArch64DAGToDAGISel::SelectNEONSplatOfSVELogicalImm(SDValue N,
665 SDValue &Imm) {
666 std::optional<APInt> ImmVal = GetNEONSplatValue(N);
667 if (!ImmVal)
668 return false;
669 uint64_t Encoding;
670 if (!AArch64_AM::isSVELogicalImm(N.getScalarValueSizeInBits(),
671 ImmVal->getZExtValue(), Encoding))
672 return false;
673
674 Imm = CurDAG->getTargetConstant(Encoding, SDLoc(N), MVT::i64);
675 return true;
676}
677
678bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm,
679 SDValue &Shift) {
680 if (std::optional<APInt> ImmVal = GetNEONSplatValue(N))
681 return SelectSVEAddSubImm(SDLoc(N), *ImmVal,
682 N.getValueType().getScalarType().getSimpleVT(),
683 Imm, Shift,
684 /*Negate=*/false);
685 return false;
686}
687
688bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEArithSImm(SDValue N,
689 SDValue &Imm) {
690 if (std::optional<APInt> ImmVal = GetNEONSplatValue(N))
691 return SelectSVESignedArithImm(SDLoc(N), *ImmVal, Imm);
692 return false;
693}
694
695bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
696 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
697 std::vector<SDValue> &OutOps) {
698 switch(ConstraintID) {
699 default:
700 llvm_unreachable("Unexpected asm memory constraint");
701 case InlineAsm::ConstraintCode::m:
702 case InlineAsm::ConstraintCode::o:
703 case InlineAsm::ConstraintCode::Q:
704 // We need to make sure that this one operand does not end up in XZR, thus
705 // require the address to be in a PointerRegClass register.
706 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
707 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
708 SDLoc dl(Op);
709 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
710 SDValue NewOp =
711 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
712 dl, Op.getValueType(),
713 Op, RC), 0);
714 OutOps.push_back(NewOp);
715 return false;
716 }
717 return true;
718}
719
720/// SelectArithImmed - Select an immediate value that can be represented as
721/// a 12-bit value shifted left by either 0 or 12. If so, return true with
722/// Val set to the 12-bit value and Shift set to the shifter operand.
723bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
724 SDValue &Shift) {
725 // This function is called from the addsub_shifted_imm ComplexPattern,
726 // which lists [imm] as the list of opcode it's interested in, however
727 // we still need to check whether the operand is actually an immediate
728 // here because the ComplexPattern opcode list is only used in
729 // root-level opcode matching.
730 if (!isa<ConstantSDNode>(N.getNode()))
731 return false;
732
733 uint64_t Immed = N.getNode()->getAsZExtVal();
734 unsigned ShiftAmt;
735
736 if (Immed >> 12 == 0) {
737 ShiftAmt = 0;
738 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
739 ShiftAmt = 12;
740 Immed = Immed >> 12;
741 } else
742 return false;
743
744 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
745 SDLoc dl(N);
746 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
747 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
748 return true;
749}
750
751/// SelectNegArithImmed - As above, but negates the value before trying to
752/// select it.
753bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
754 SDValue &Shift) {
755 // This function is called from the addsub_shifted_imm ComplexPattern,
756 // which lists [imm] as the list of opcode it's interested in, however
757 // we still need to check whether the operand is actually an immediate
758 // here because the ComplexPattern opcode list is only used in
759 // root-level opcode matching.
760 if (!isa<ConstantSDNode>(N.getNode()))
761 return false;
762
763 // The immediate operand must be a 24-bit zero-extended immediate.
764 uint64_t Immed = N.getNode()->getAsZExtVal();
765
766 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
767 // have the opposite effect on the C flag, so this pattern mustn't match under
768 // those circumstances.
769 if (Immed == 0)
770 return false;
771
772 if (N.getValueType() == MVT::i32)
773 Immed = ~((uint32_t)Immed) + 1;
774 else
775 Immed = ~Immed + 1ULL;
776 if (Immed & 0xFFFFFFFFFF000000ULL)
777 return false;
778
779 Immed &= 0xFFFFFFULL;
780 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
781 Shift);
782}
783
784/// getShiftTypeForNode - Translate a shift node to the corresponding
785/// ShiftType value.
787 switch (N.getOpcode()) {
788 default:
790 case ISD::SHL:
791 return AArch64_AM::LSL;
792 case ISD::SRL:
793 return AArch64_AM::LSR;
794 case ISD::SRA:
795 return AArch64_AM::ASR;
796 case ISD::ROTR:
797 return AArch64_AM::ROR;
798 }
799}
800
802 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
803}
804
805/// Determine whether it is worth it to fold SHL into the addressing
806/// mode.
808 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
809 // It is worth folding logical shift of up to three places.
810 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
811 if (!CSD)
812 return false;
813 unsigned ShiftVal = CSD->getZExtValue();
814 if (ShiftVal > 3)
815 return false;
816
817 // Check if this particular node is reused in any non-memory related
818 // operation. If yes, do not try to fold this node into the address
819 // computation, since the computation will be kept.
820 const SDNode *Node = V.getNode();
821 for (SDNode *UI : Node->users())
822 if (!isMemOpOrPrefetch(UI))
823 for (SDNode *UII : UI->users())
824 if (!isMemOpOrPrefetch(UII))
825 return false;
826 return true;
827}
828
829/// Determine whether it is worth to fold V into an extended register addressing
830/// mode.
831bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
832 // Trivial if we are optimizing for code size or if there is only
833 // one use of the value.
834 if (CurDAG->shouldOptForSize() || V.hasOneUse())
835 return true;
836
837 // If a subtarget has a slow shift, folding a shift into multiple loads
838 // costs additional micro-ops.
839 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
840 return false;
841
842 // Check whether we're going to emit the address arithmetic anyway because
843 // it's used by a non-address operation.
844 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
845 return true;
846 if (V.getOpcode() == ISD::ADD) {
847 const SDValue LHS = V.getOperand(0);
848 const SDValue RHS = V.getOperand(1);
849 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
850 return true;
851 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
852 return true;
853 }
854
855 // It hurts otherwise, since the value will be reused.
856 return false;
857}
858
859/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
860/// to select more shifted register
861bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
862 SDValue &Shift) {
863 EVT VT = N.getValueType();
864 if (VT != MVT::i32 && VT != MVT::i64)
865 return false;
866
867 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
868 return false;
869 SDValue LHS = N.getOperand(0);
870 if (!LHS->hasOneUse())
871 return false;
872
873 unsigned LHSOpcode = LHS->getOpcode();
874 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
875 return false;
876
877 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
878 if (!ShiftAmtNode)
879 return false;
880
881 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
882 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
883 if (!RHSC)
884 return false;
885
886 APInt AndMask = RHSC->getAPIntValue();
887 unsigned LowZBits, MaskLen;
888 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
889 return false;
890
891 unsigned BitWidth = N.getValueSizeInBits();
892 SDLoc DL(LHS);
893 uint64_t NewShiftC;
894 unsigned NewShiftOp;
895 if (LHSOpcode == ISD::SHL) {
896 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
897 // BitWidth != LowZBits + MaskLen doesn't match the pattern
898 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
899 return false;
900
901 NewShiftC = LowZBits - ShiftAmtC;
902 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
903 } else {
904 if (LowZBits == 0)
905 return false;
906
907 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
908 NewShiftC = LowZBits + ShiftAmtC;
909 if (NewShiftC >= BitWidth)
910 return false;
911
912 // SRA need all high bits
913 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
914 return false;
915
916 // SRL high bits can be 0 or 1
917 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
918 return false;
919
920 if (LHSOpcode == ISD::SRL)
921 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
922 else
923 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
924 }
925
926 assert(NewShiftC < BitWidth && "Invalid shift amount");
927 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
928 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
929 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
930 NewShiftAmt, BitWidthMinus1),
931 0);
932 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
933 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
934 return true;
935}
936
937/// getExtendTypeForNode - Translate an extend node to the corresponding
938/// ExtendType value.
940getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
941 if (N.getOpcode() == ISD::SIGN_EXTEND ||
942 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
943 EVT SrcVT;
944 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
945 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
946 else
947 SrcVT = N.getOperand(0).getValueType();
948
949 if (!IsLoadStore && SrcVT == MVT::i8)
950 return AArch64_AM::SXTB;
951 else if (!IsLoadStore && SrcVT == MVT::i16)
952 return AArch64_AM::SXTH;
953 else if (SrcVT == MVT::i32)
954 return AArch64_AM::SXTW;
955 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
956
958 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
959 N.getOpcode() == ISD::ANY_EXTEND) {
960 EVT SrcVT = N.getOperand(0).getValueType();
961 if (!IsLoadStore && SrcVT == MVT::i8)
962 return AArch64_AM::UXTB;
963 else if (!IsLoadStore && SrcVT == MVT::i16)
964 return AArch64_AM::UXTH;
965 else if (SrcVT == MVT::i32)
966 return AArch64_AM::UXTW;
967 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
968
970 } else if (N.getOpcode() == ISD::AND) {
971 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
972 if (!CSD)
974 uint64_t AndMask = CSD->getZExtValue();
975
976 switch (AndMask) {
977 default:
979 case 0xFF:
980 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
981 case 0xFFFF:
982 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
983 case 0xFFFFFFFF:
984 return AArch64_AM::UXTW;
985 }
986 }
987
989}
990
991/// Determine whether it is worth to fold V into an extended register of an
992/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
993/// instruction, and the shift should be treated as worth folding even if has
994/// multiple uses.
995bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
996 // Trivial if we are optimizing for code size or if there is only
997 // one use of the value.
998 if (CurDAG->shouldOptForSize() || V.hasOneUse())
999 return true;
1000
1001 // If a subtarget has a fastpath LSL we can fold a logical shift into
1002 // the add/sub and save a cycle.
1003 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
1004 V.getConstantOperandVal(1) <= 4 &&
1006 return true;
1007
1008 // It hurts otherwise, since the value will be reused.
1009 return false;
1010}
1011
1012/// SelectShiftedRegister - Select a "shifted register" operand. If the value
1013/// is not shifted, set the Shift operand to default of "LSL 0". The logical
1014/// instructions allow the shifted register to be rotated, but the arithmetic
1015/// instructions do not. The AllowROR parameter specifies whether ROR is
1016/// supported.
1017bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
1018 SDValue &Reg, SDValue &Shift) {
1019 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
1020 return true;
1021
1023 if (ShType == AArch64_AM::InvalidShiftExtend)
1024 return false;
1025 if (!AllowROR && ShType == AArch64_AM::ROR)
1026 return false;
1027
1028 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1029 unsigned BitSize = N.getValueSizeInBits();
1030 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
1031 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
1032
1033 Reg = N.getOperand(0);
1034 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
1035 return isWorthFoldingALU(N, true);
1036 }
1037
1038 return false;
1039}
1040
1041/// Instructions that accept extend modifiers like UXTW expect the register
1042/// being extended to be a GPR32, but the incoming DAG might be acting on a
1043/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
1044/// this is the case.
1046 if (N.getValueType() == MVT::i32)
1047 return N;
1048
1049 SDLoc dl(N);
1050 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
1051}
1052
1053// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
1054template<signed Low, signed High, signed Scale>
1055bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
1056 if (!isa<ConstantSDNode>(N))
1057 return false;
1058
1059 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
1060 if ((MulImm % std::abs(Scale)) == 0) {
1061 int64_t RDVLImm = MulImm / Scale;
1062 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
1063 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
1064 return true;
1065 }
1066 }
1067
1068 return false;
1069}
1070
1071// Returns a suitable RDSVL multiplier from a left shift.
1072template <signed Low, signed High>
1073bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
1074 if (!isa<ConstantSDNode>(N))
1075 return false;
1076
1077 int64_t MulImm = 1LL << cast<ConstantSDNode>(N)->getSExtValue();
1078 if (MulImm >= Low && MulImm <= High) {
1079 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
1080 return true;
1081 }
1082
1083 return false;
1084}
1085
1086/// SelectArithExtendedRegister - Select a "extended register" operand. This
1087/// operand folds in an extend followed by an optional left shift.
1088bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
1089 SDValue &Shift) {
1090 unsigned ShiftVal = 0;
1092
1093 if (N.getOpcode() == ISD::SHL) {
1094 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1095 if (!CSD)
1096 return false;
1097 ShiftVal = CSD->getZExtValue();
1098 if (ShiftVal > 4)
1099 return false;
1100
1101 Ext = getExtendTypeForNode(N.getOperand(0));
1103 return false;
1104
1105 Reg = N.getOperand(0).getOperand(0);
1106 } else {
1107 Ext = getExtendTypeForNode(N);
1109 return false;
1110
1111 // Don't match sext of vector extracts. These can use SMOV, but if we match
1112 // this as an extended register, we'll always fold the extend into an ALU op
1113 // user of the extend (which results in a UMOV).
1115 SDValue Op = N.getOperand(0);
1116 if (Op->getOpcode() == ISD::ANY_EXTEND)
1117 Op = Op->getOperand(0);
1118 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
1119 Op.getOperand(0).getValueType().isFixedLengthVector())
1120 return false;
1121 }
1122
1123 Reg = N.getOperand(0);
1124
1125 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
1126 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
1127 auto isDef32 = [](SDValue N) {
1128 unsigned Opc = N.getOpcode();
1129 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
1132 Opc != ISD::FREEZE;
1133 };
1134 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
1135 isDef32(Reg))
1136 return false;
1137 }
1138
1139 // AArch64 mandates that the RHS of the operation must use the smallest
1140 // register class that could contain the size being extended from. Thus,
1141 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1142 // there might not be an actual 32-bit value in the program. We can
1143 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1144 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1145 Reg = narrowIfNeeded(CurDAG, Reg);
1146 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1147 MVT::i32);
1148 return isWorthFoldingALU(N);
1149}
1150
1151/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1152/// operand is referred by the instructions have SP operand
1153bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1154 SDValue &Shift) {
1155 unsigned ShiftVal = 0;
1157
1158 if (N.getOpcode() != ISD::SHL)
1159 return false;
1160
1161 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1162 if (!CSD)
1163 return false;
1164 ShiftVal = CSD->getZExtValue();
1165 if (ShiftVal > 4)
1166 return false;
1167
1168 Ext = AArch64_AM::UXTX;
1169 Reg = N.getOperand(0);
1170 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1171 MVT::i32);
1172 return isWorthFoldingALU(N);
1173}
1174
1175/// If there's a use of this ADDlow that's not itself a load/store then we'll
1176/// need to create a real ADD instruction from it anyway and there's no point in
1177/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1178/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1179/// leads to duplicated ADRP instructions.
1181 for (auto *User : N->users()) {
1182 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1183 User->getOpcode() != ISD::ATOMIC_LOAD &&
1184 User->getOpcode() != ISD::ATOMIC_STORE)
1185 return false;
1186
1187 // ldar and stlr have much more restrictive addressing modes (just a
1188 // register).
1189 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1190 return false;
1191 }
1192
1193 return true;
1194}
1195
1196/// Check if the immediate offset is valid as a scaled immediate.
1197static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1198 unsigned Size) {
1199 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1200 Offset < (Range << Log2_32(Size)))
1201 return true;
1202 return false;
1203}
1204
1205/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1206/// immediate" address. The "Size" argument is the size in bytes of the memory
1207/// reference, which determines the scale.
1208bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1209 unsigned BW, unsigned Size,
1210 SDValue &Base,
1211 SDValue &OffImm) {
1212 SDLoc dl(N);
1213 const DataLayout &DL = CurDAG->getDataLayout();
1214 const TargetLowering *TLI = getTargetLowering();
1215 if (N.getOpcode() == ISD::FrameIndex) {
1216 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1217 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1218 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1219 return true;
1220 }
1221
1222 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1223 // selected here doesn't support labels/immediates, only base+offset.
1224 if (CurDAG->isBaseWithConstantOffset(N)) {
1225 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1226 if (IsSignedImm) {
1227 int64_t RHSC = RHS->getSExtValue();
1228 unsigned Scale = Log2_32(Size);
1229 int64_t Range = 0x1LL << (BW - 1);
1230
1231 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1232 RHSC < (Range << Scale)) {
1233 Base = N.getOperand(0);
1234 if (Base.getOpcode() == ISD::FrameIndex) {
1235 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1236 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1237 }
1238 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1239 return true;
1240 }
1241 } else {
1242 // unsigned Immediate
1243 uint64_t RHSC = RHS->getZExtValue();
1244 unsigned Scale = Log2_32(Size);
1245 uint64_t Range = 0x1ULL << BW;
1246
1247 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1248 Base = N.getOperand(0);
1249 if (Base.getOpcode() == ISD::FrameIndex) {
1250 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1251 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1252 }
1253 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1254 return true;
1255 }
1256 }
1257 }
1258 }
1259 // Base only. The address will be materialized into a register before
1260 // the memory is accessed.
1261 // add x0, Xbase, #offset
1262 // stp x1, x2, [x0]
1263 Base = N;
1264 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1265 return true;
1266}
1267
1268/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1269/// immediate" address. The "Size" argument is the size in bytes of the memory
1270/// reference, which determines the scale.
1271bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1272 SDValue &Base, SDValue &OffImm) {
1273 SDLoc dl(N);
1274 const DataLayout &DL = CurDAG->getDataLayout();
1275 const TargetLowering *TLI = getTargetLowering();
1276 if (N.getOpcode() == ISD::FrameIndex) {
1277 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1278 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1279 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1280 return true;
1281 }
1282
1283 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1284 GlobalAddressSDNode *GAN =
1285 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1286 Base = N.getOperand(0);
1287 OffImm = N.getOperand(1);
1288 if (!GAN)
1289 return true;
1290
1291 if (GAN->getOffset() % Size == 0 &&
1293 return true;
1294 }
1295
1296 if (CurDAG->isBaseWithConstantOffset(N)) {
1297 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1298 int64_t RHSC = (int64_t)RHS->getZExtValue();
1299 unsigned Scale = Log2_32(Size);
1300 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1301 Base = N.getOperand(0);
1302 if (Base.getOpcode() == ISD::FrameIndex) {
1303 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1304 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1305 }
1306 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1307 return true;
1308 }
1309 }
1310 }
1311
1312 // Before falling back to our general case, check if the unscaled
1313 // instructions can handle this. If so, that's preferable.
1314 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1315 return false;
1316
1317 // Base only. The address will be materialized into a register before
1318 // the memory is accessed.
1319 // add x0, Xbase, #offset
1320 // ldr x0, [x0]
1321 Base = N;
1322 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1323 return true;
1324}
1325
1326/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1327/// immediate" address. This should only match when there is an offset that
1328/// is not valid for a scaled immediate addressing mode. The "Size" argument
1329/// is the size in bytes of the memory reference, which is needed here to know
1330/// what is valid for a scaled immediate.
1331bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1332 SDValue &Base,
1333 SDValue &OffImm) {
1334 if (!CurDAG->isBaseWithConstantOffset(N))
1335 return false;
1336 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1337 int64_t RHSC = RHS->getSExtValue();
1338 if (RHSC >= -256 && RHSC < 256) {
1339 Base = N.getOperand(0);
1340 if (Base.getOpcode() == ISD::FrameIndex) {
1341 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1342 const TargetLowering *TLI = getTargetLowering();
1343 Base = CurDAG->getTargetFrameIndex(
1344 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1345 }
1346 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1347 return true;
1348 }
1349 }
1350 return false;
1351}
1352
1354 SDLoc dl(N);
1355 SDValue ImpDef = SDValue(
1356 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1357 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1358 N);
1359}
1360
1361/// Check if the given SHL node (\p N), can be used to form an
1362/// extended register for an addressing mode.
1363bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1364 bool WantExtend, SDValue &Offset,
1365 SDValue &SignExtend) {
1366 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1367 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1368 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1369 return false;
1370
1371 SDLoc dl(N);
1372 if (WantExtend) {
1374 getExtendTypeForNode(N.getOperand(0), true);
1376 return false;
1377
1378 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1379 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1380 MVT::i32);
1381 } else {
1382 Offset = N.getOperand(0);
1383 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1384 }
1385
1386 unsigned LegalShiftVal = Log2_32(Size);
1387 unsigned ShiftVal = CSD->getZExtValue();
1388
1389 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1390 return false;
1391
1392 return isWorthFoldingAddr(N, Size);
1393}
1394
1395bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1397 SDValue &SignExtend,
1398 SDValue &DoShift) {
1399 if (N.getOpcode() != ISD::ADD)
1400 return false;
1401 SDValue LHS = N.getOperand(0);
1402 SDValue RHS = N.getOperand(1);
1403 SDLoc dl(N);
1404
1405 // We don't want to match immediate adds here, because they are better lowered
1406 // to the register-immediate addressing modes.
1408 return false;
1409
1410 // Check if this particular node is reused in any non-memory related
1411 // operation. If yes, do not try to fold this node into the address
1412 // computation, since the computation will be kept.
1413 const SDNode *Node = N.getNode();
1414 for (SDNode *UI : Node->users()) {
1415 if (!isMemOpOrPrefetch(UI))
1416 return false;
1417 }
1418
1419 // Remember if it is worth folding N when it produces extended register.
1420 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1421
1422 // Try to match a shifted extend on the RHS.
1423 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1424 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1425 Base = LHS;
1426 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1427 return true;
1428 }
1429
1430 // Try to match a shifted extend on the LHS.
1431 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1432 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1433 Base = RHS;
1434 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1435 return true;
1436 }
1437
1438 // There was no shift, whatever else we find.
1439 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1440
1442 // Try to match an unshifted extend on the LHS.
1443 if (IsExtendedRegisterWorthFolding &&
1444 (Ext = getExtendTypeForNode(LHS, true)) !=
1446 Base = RHS;
1447 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1448 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1449 MVT::i32);
1450 if (isWorthFoldingAddr(LHS, Size))
1451 return true;
1452 }
1453
1454 // Try to match an unshifted extend on the RHS.
1455 if (IsExtendedRegisterWorthFolding &&
1456 (Ext = getExtendTypeForNode(RHS, true)) !=
1458 Base = LHS;
1459 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1460 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1461 MVT::i32);
1462 if (isWorthFoldingAddr(RHS, Size))
1463 return true;
1464 }
1465
1466 return false;
1467}
1468
1469// Check if the given immediate is preferred by ADD. If an immediate can be
1470// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1471// encoded by one MOVZ, return true.
1472static bool isPreferredADD(int64_t ImmOff) {
1473 // Constant in [0x0, 0xfff] can be encoded in ADD.
1474 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1475 return true;
1476 // Check if it can be encoded in an "ADD LSL #12".
1477 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1478 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1479 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1480 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1481 return false;
1482}
1483
1484bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1486 SDValue &SignExtend,
1487 SDValue &DoShift) {
1488 if (N.getOpcode() != ISD::ADD)
1489 return false;
1490 SDValue LHS = N.getOperand(0);
1491 SDValue RHS = N.getOperand(1);
1492 SDLoc DL(N);
1493
1494 // Check if this particular node is reused in any non-memory related
1495 // operation. If yes, do not try to fold this node into the address
1496 // computation, since the computation will be kept.
1497 const SDNode *Node = N.getNode();
1498 for (SDNode *UI : Node->users()) {
1499 if (!isMemOpOrPrefetch(UI))
1500 return false;
1501 }
1502
1503 // Watch out if RHS is a wide immediate, it can not be selected into
1504 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1505 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1506 // instructions like:
1507 // MOV X0, WideImmediate
1508 // ADD X1, BaseReg, X0
1509 // LDR X2, [X1, 0]
1510 // For such situation, using [BaseReg, XReg] addressing mode can save one
1511 // ADD/SUB:
1512 // MOV X0, WideImmediate
1513 // LDR X2, [BaseReg, X0]
1514 if (isa<ConstantSDNode>(RHS)) {
1515 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1516 // Skip the immediate can be selected by load/store addressing mode.
1517 // Also skip the immediate can be encoded by a single ADD (SUB is also
1518 // checked by using -ImmOff).
1519 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1520 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1521 return false;
1522
1523 SDValue Ops[] = { RHS };
1524 SDNode *MOVI =
1525 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1526 SDValue MOVIV = SDValue(MOVI, 0);
1527 // This ADD of two X register will be selected into [Reg+Reg] mode.
1528 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1529 }
1530
1531 // Remember if it is worth folding N when it produces extended register.
1532 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1533
1534 // Try to match a shifted extend on the RHS.
1535 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1536 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1537 Base = LHS;
1538 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1539 return true;
1540 }
1541
1542 // Try to match a shifted extend on the LHS.
1543 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1544 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1545 Base = RHS;
1546 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1547 return true;
1548 }
1549
1550 // Match any non-shifted, non-extend, non-immediate add expression.
1551 Base = LHS;
1552 Offset = RHS;
1553 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1554 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1555 // Reg1 + Reg2 is free: no check needed.
1556 return true;
1557}
1558
1559SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1560 static const unsigned RegClassIDs[] = {
1561 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1562 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1563 AArch64::dsub2, AArch64::dsub3};
1564
1565 return createTuple(Regs, RegClassIDs, SubRegs);
1566}
1567
1568SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1569 static const unsigned RegClassIDs[] = {
1570 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1571 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1572 AArch64::qsub2, AArch64::qsub3};
1573
1574 return createTuple(Regs, RegClassIDs, SubRegs);
1575}
1576
1577SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1578 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1579 AArch64::ZPR3RegClassID,
1580 AArch64::ZPR4RegClassID};
1581 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1582 AArch64::zsub2, AArch64::zsub3};
1583
1584 return createTuple(Regs, RegClassIDs, SubRegs);
1585}
1586
1587SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1588 assert(Regs.size() == 2 || Regs.size() == 4);
1589
1590 // The createTuple interface requires 3 RegClassIDs for each possible
1591 // tuple type even though we only have them for ZPR2 and ZPR4.
1592 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1593 AArch64::ZPR4Mul4RegClassID};
1594 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1595 AArch64::zsub2, AArch64::zsub3};
1596 return createTuple(Regs, RegClassIDs, SubRegs);
1597}
1598
1599SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1600 const unsigned RegClassIDs[],
1601 const unsigned SubRegs[]) {
1602 // There's no special register-class for a vector-list of 1 element: it's just
1603 // a vector.
1604 if (Regs.size() == 1)
1605 return Regs[0];
1606
1607 assert(Regs.size() >= 2 && Regs.size() <= 4);
1608
1609 SDLoc DL(Regs[0]);
1610
1612
1613 // First operand of REG_SEQUENCE is the desired RegClass.
1614 Ops.push_back(
1615 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1616
1617 // Then we get pairs of source & subregister-position for the components.
1618 for (unsigned i = 0; i < Regs.size(); ++i) {
1619 Ops.push_back(Regs[i]);
1620 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1621 }
1622
1623 SDNode *N =
1624 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1625 return SDValue(N, 0);
1626}
1627
1628void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1629 bool isExt) {
1630 SDLoc dl(N);
1631 EVT VT = N->getValueType(0);
1632
1633 unsigned ExtOff = isExt;
1634
1635 // Form a REG_SEQUENCE to force register allocation.
1636 unsigned Vec0Off = ExtOff + 1;
1637 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1638 SDValue RegSeq = createQTuple(Regs);
1639
1641 if (isExt)
1642 Ops.push_back(N->getOperand(1));
1643 Ops.push_back(RegSeq);
1644 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1645 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1646}
1647
1648static std::tuple<SDValue, SDValue>
1650 SDLoc DL(Disc);
1651 SDValue AddrDisc;
1652 SDValue ConstDisc;
1653
1654 // If this is a blend, remember the constant and address discriminators.
1655 // Otherwise, it's either a constant discriminator, or a non-blended
1656 // address discriminator.
1657 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1658 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1659 AddrDisc = Disc->getOperand(1);
1660 ConstDisc = Disc->getOperand(2);
1661 } else {
1662 ConstDisc = Disc;
1663 }
1664
1665 // If the constant discriminator (either the blend RHS, or the entire
1666 // discriminator value) isn't a 16-bit constant, bail out, and let the
1667 // discriminator be computed separately.
1668 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1669 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1670 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1671
1672 // If there's no address discriminator, use XZR directly.
1673 if (!AddrDisc)
1674 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1675
1676 return std::make_tuple(
1677 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1678 AddrDisc);
1679}
1680
1681void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1682 SDLoc DL(N);
1683 // IntrinsicID is operand #0
1684 SDValue Val = N->getOperand(1);
1685 SDValue AUTKey = N->getOperand(2);
1686 SDValue AUTDisc = N->getOperand(3);
1687
1688 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1689 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1690
1691 SDValue AUTAddrDisc, AUTConstDisc;
1692 std::tie(AUTConstDisc, AUTAddrDisc) =
1693 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1694
1695 if (!Subtarget->isX16X17Safer()) {
1696 std::vector<SDValue> Ops = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1697 // Copy deactivation symbol if present.
1698 if (N->getNumOperands() > 4)
1699 Ops.push_back(N->getOperand(4));
1700
1701 SDNode *AUT =
1702 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1703 ReplaceNode(N, AUT);
1704 } else {
1705 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1706 AArch64::X16, Val, SDValue());
1707 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1708
1709 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1710 ReplaceNode(N, AUT);
1711 }
1712}
1713
1714void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1715 SDLoc DL(N);
1716 // IntrinsicID is operand #0, if W_CHAIN it is #1
1717 int OffsetBase = N->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 1 : 0;
1718 SDValue Val = N->getOperand(OffsetBase + 1);
1719 SDValue AUTKey = N->getOperand(OffsetBase + 2);
1720 SDValue AUTDisc = N->getOperand(OffsetBase + 3);
1721 SDValue PACKey = N->getOperand(OffsetBase + 4);
1722 SDValue PACDisc = N->getOperand(OffsetBase + 5);
1723 uint32_t IntNum = N->getConstantOperandVal(OffsetBase + 0);
1724 bool HasLoad = IntNum == Intrinsic::ptrauth_resign_load_relative;
1725
1726 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1727 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1728
1729 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1730 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1731
1732 SDValue AUTAddrDisc, AUTConstDisc;
1733 std::tie(AUTConstDisc, AUTAddrDisc) =
1734 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1735
1736 SDValue PACAddrDisc, PACConstDisc;
1737 std::tie(PACConstDisc, PACAddrDisc) =
1738 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1739
1740 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1741 AArch64::X16, Val, SDValue());
1742
1743 if (HasLoad) {
1744 SDValue Addend = N->getOperand(OffsetBase + 6);
1745 SDValue IncomingChain = N->getOperand(0);
1746 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc,
1747 PACKey, PACConstDisc, PACAddrDisc,
1748 Addend, IncomingChain, X16Copy.getValue(1)};
1749
1750 SDNode *AUTRELLOADPAC = CurDAG->getMachineNode(AArch64::AUTRELLOADPAC, DL,
1751 MVT::i64, MVT::Other, Ops);
1752 ReplaceNode(N, AUTRELLOADPAC);
1753 } else {
1754 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1755 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1756
1757 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1758 ReplaceNode(N, AUTPAC);
1759 }
1760}
1761
1762bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1763 LoadSDNode *LD = cast<LoadSDNode>(N);
1764 if (LD->isUnindexed())
1765 return false;
1766 EVT VT = LD->getMemoryVT();
1767 EVT DstVT = N->getValueType(0);
1768 ISD::MemIndexedMode AM = LD->getAddressingMode();
1769 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1770 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1771 int OffsetVal = (int)OffsetOp->getZExtValue();
1772
1773 // We're not doing validity checking here. That was done when checking
1774 // if we should mark the load as indexed or not. We're just selecting
1775 // the right instruction.
1776 unsigned Opcode = 0;
1777
1778 ISD::LoadExtType ExtType = LD->getExtensionType();
1779 bool InsertTo64 = false;
1780 if (VT == MVT::i64)
1781 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1782 else if (VT == MVT::i32) {
1783 if (ExtType == ISD::NON_EXTLOAD)
1784 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1785 else if (ExtType == ISD::SEXTLOAD)
1786 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1787 else {
1788 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1789 InsertTo64 = true;
1790 // The result of the load is only i32. It's the subreg_to_reg that makes
1791 // it into an i64.
1792 DstVT = MVT::i32;
1793 }
1794 } else if (VT == MVT::i16) {
1795 if (ExtType == ISD::SEXTLOAD) {
1796 if (DstVT == MVT::i64)
1797 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1798 else
1799 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1800 } else {
1801 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1802 InsertTo64 = DstVT == MVT::i64;
1803 // The result of the load is only i32. It's the subreg_to_reg that makes
1804 // it into an i64.
1805 DstVT = MVT::i32;
1806 }
1807 } else if (VT == MVT::i8) {
1808 if (ExtType == ISD::SEXTLOAD) {
1809 if (DstVT == MVT::i64)
1810 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1811 else
1812 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1813 } else {
1814 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1815 InsertTo64 = DstVT == MVT::i64;
1816 // The result of the load is only i32. It's the subreg_to_reg that makes
1817 // it into an i64.
1818 DstVT = MVT::i32;
1819 }
1820 } else if (VT == MVT::f16) {
1821 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1822 } else if (VT == MVT::bf16) {
1823 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1824 } else if (VT == MVT::f32) {
1825 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1826 } else if (VT == MVT::f64 ||
1827 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1828 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1829 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1830 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1831 } else if (VT.is64BitVector()) {
1832 if (IsPre || OffsetVal != 8)
1833 return false;
1834 switch (VT.getScalarSizeInBits()) {
1835 case 8:
1836 Opcode = AArch64::LD1Onev8b_POST;
1837 break;
1838 case 16:
1839 Opcode = AArch64::LD1Onev4h_POST;
1840 break;
1841 case 32:
1842 Opcode = AArch64::LD1Onev2s_POST;
1843 break;
1844 case 64:
1845 Opcode = AArch64::LD1Onev1d_POST;
1846 break;
1847 default:
1848 llvm_unreachable("Expected vector element to be a power of 2");
1849 }
1850 } else if (VT.is128BitVector()) {
1851 if (IsPre || OffsetVal != 16)
1852 return false;
1853 switch (VT.getScalarSizeInBits()) {
1854 case 8:
1855 Opcode = AArch64::LD1Onev16b_POST;
1856 break;
1857 case 16:
1858 Opcode = AArch64::LD1Onev8h_POST;
1859 break;
1860 case 32:
1861 Opcode = AArch64::LD1Onev4s_POST;
1862 break;
1863 case 64:
1864 Opcode = AArch64::LD1Onev2d_POST;
1865 break;
1866 default:
1867 llvm_unreachable("Expected vector element to be a power of 2");
1868 }
1869 } else
1870 return false;
1871 SDValue Chain = LD->getChain();
1872 SDValue Base = LD->getBasePtr();
1873 SDLoc dl(N);
1874 // LD1 encodes an immediate offset by using XZR as the offset register.
1875 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1876 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1877 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1878 SDValue Ops[] = { Base, Offset, Chain };
1879 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1880 MVT::Other, Ops);
1881
1882 // Transfer memoperands.
1883 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1884 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1885
1886 // Either way, we're replacing the node, so tell the caller that.
1887 SDValue LoadedVal = SDValue(Res, 1);
1888 if (InsertTo64) {
1889 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1890 LoadedVal = SDValue(CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, dl,
1891 MVT::i64, LoadedVal, SubReg),
1892 0);
1893 }
1894
1895 ReplaceUses(SDValue(N, 0), LoadedVal);
1896 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1897 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1898 CurDAG->RemoveDeadNode(N);
1899 return true;
1900}
1901
1902void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1903 unsigned SubRegIdx) {
1904 SDLoc dl(N);
1905 EVT VT = N->getValueType(0);
1906 SDValue Chain = N->getOperand(0);
1907
1908 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1909 Chain};
1910
1911 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1912
1913 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1914 SDValue SuperReg = SDValue(Ld, 0);
1915 for (unsigned i = 0; i < NumVecs; ++i)
1916 ReplaceUses(SDValue(N, i),
1917 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1918
1919 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1920
1921 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1922 // because it's too simple to have needed special treatment during lowering.
1923 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1924 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1925 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1926 }
1927
1928 CurDAG->RemoveDeadNode(N);
1929}
1930
1931void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1932 unsigned Opc, unsigned SubRegIdx) {
1933 SDLoc dl(N);
1934 EVT VT = N->getValueType(0);
1935 SDValue Chain = N->getOperand(0);
1936
1937 SDValue Ops[] = {N->getOperand(1), // Mem operand
1938 N->getOperand(2), // Incremental
1939 Chain};
1940
1941 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1942 MVT::Untyped, MVT::Other};
1943
1944 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1945
1946 // Update uses of write back register
1947 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1948
1949 // Update uses of vector list
1950 SDValue SuperReg = SDValue(Ld, 1);
1951 if (NumVecs == 1)
1952 ReplaceUses(SDValue(N, 0), SuperReg);
1953 else
1954 for (unsigned i = 0; i < NumVecs; ++i)
1955 ReplaceUses(SDValue(N, i),
1956 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1957
1958 // Update the chain
1959 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1960 CurDAG->RemoveDeadNode(N);
1961}
1962
1963/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1964/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1965/// new Base and an SDValue representing the new offset.
1966std::tuple<unsigned, SDValue, SDValue>
1967AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1968 unsigned Opc_ri,
1969 const SDValue &OldBase,
1970 const SDValue &OldOffset,
1971 unsigned Scale) {
1972 SDValue NewBase = OldBase;
1973 SDValue NewOffset = OldOffset;
1974 // Detect a possible Reg+Imm addressing mode.
1975 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1976 N, OldBase, NewBase, NewOffset);
1977
1978 // Detect a possible reg+reg addressing mode, but only if we haven't already
1979 // detected a Reg+Imm one.
1980 const bool IsRegReg =
1981 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1982
1983 // Select the instruction.
1984 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1985}
1986
1987enum class SelectTypeKind {
1988 Int1 = 0,
1989 Int = 1,
1990 FP = 2,
1992};
1993
1994/// This function selects an opcode from a list of opcodes, which is
1995/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1996/// element types, in this order.
1997template <SelectTypeKind Kind>
1998static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1999 // Only match scalable vector VTs
2000 if (!VT.isScalableVector())
2001 return 0;
2002
2003 EVT EltVT = VT.getVectorElementType();
2004 unsigned Key = VT.getVectorMinNumElements();
2005 switch (Kind) {
2007 break;
2009 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
2010 EltVT != MVT::i64)
2011 return 0;
2012 break;
2014 if (EltVT != MVT::i1)
2015 return 0;
2016 break;
2017 case SelectTypeKind::FP:
2018 if (EltVT == MVT::bf16)
2019 Key = 16;
2020 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
2021 EltVT != MVT::f64)
2022 return 0;
2023 break;
2024 }
2025
2026 unsigned Offset;
2027 switch (Key) {
2028 case 16: // 8-bit or bf16
2029 Offset = 0;
2030 break;
2031 case 8: // 16-bit
2032 Offset = 1;
2033 break;
2034 case 4: // 32-bit
2035 Offset = 2;
2036 break;
2037 case 2: // 64-bit
2038 Offset = 3;
2039 break;
2040 default:
2041 return 0;
2042 }
2043
2044 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
2045}
2046
2047// This function is almost identical to SelectWhilePair, but has an
2048// extra check on the range of the immediate operand.
2049// TODO: Merge these two functions together at some point?
2050void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
2051 // Immediate can be either 0 or 1.
2052 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
2053 if (Imm->getZExtValue() > 1)
2054 return;
2055
2056 SDLoc DL(N);
2057 EVT VT = N->getValueType(0);
2058 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
2059 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2060 SDValue SuperReg = SDValue(WhilePair, 0);
2061
2062 for (unsigned I = 0; I < 2; ++I)
2063 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2064 AArch64::psub0 + I, DL, VT, SuperReg));
2065
2066 CurDAG->RemoveDeadNode(N);
2067}
2068
2069void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
2070 SDLoc DL(N);
2071 EVT VT = N->getValueType(0);
2072
2073 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
2074
2075 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2076 SDValue SuperReg = SDValue(WhilePair, 0);
2077
2078 for (unsigned I = 0; I < 2; ++I)
2079 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2080 AArch64::psub0 + I, DL, VT, SuperReg));
2081
2082 CurDAG->RemoveDeadNode(N);
2083}
2084
2085void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
2086 unsigned Opcode) {
2087 EVT VT = N->getValueType(0);
2088 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2089 SDValue Ops = createZTuple(Regs);
2090 SDLoc DL(N);
2091 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2092 SDValue SuperReg = SDValue(Intrinsic, 0);
2093 for (unsigned i = 0; i < NumVecs; ++i)
2094 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2095 AArch64::zsub0 + i, DL, VT, SuperReg));
2096
2097 CurDAG->RemoveDeadNode(N);
2098}
2099
2100void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
2101 unsigned Opcode) {
2102 SDLoc DL(N);
2103 EVT VT = N->getValueType(0);
2104 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
2105 Ops.push_back(/*Chain*/ N->getOperand(0));
2106
2107 SDNode *Instruction =
2108 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
2109 SDValue SuperReg = SDValue(Instruction, 0);
2110
2111 for (unsigned i = 0; i < NumVecs; ++i)
2112 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2113 AArch64::zsub0 + i, DL, VT, SuperReg));
2114
2115 // Copy chain
2116 unsigned ChainIdx = NumVecs;
2117 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
2118 CurDAG->RemoveDeadNode(N);
2119}
2120
2121void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
2122 unsigned NumVecs,
2123 bool IsZmMulti,
2124 unsigned Opcode,
2125 bool HasPred) {
2126 assert(Opcode != 0 && "Unexpected opcode");
2127
2128 SDLoc DL(N);
2129 EVT VT = N->getValueType(0);
2130 SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID
2132
2133 auto GetMultiVecOperand = [&]() {
2134 SmallVector<SDValue, 4> Regs(OpsIter, OpsIter + NumVecs);
2135 OpsIter += NumVecs;
2136 return createZMulTuple(Regs);
2137 };
2138
2139 if (HasPred)
2140 Ops.push_back(*OpsIter++);
2141
2142 Ops.push_back(GetMultiVecOperand());
2143 if (IsZmMulti)
2144 Ops.push_back(GetMultiVecOperand());
2145 else
2146 Ops.push_back(*OpsIter++);
2147
2148 // Append any remaining operands.
2149 Ops.append(OpsIter, N->op_end());
2150 SDNode *Intrinsic;
2151 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2152 SDValue SuperReg = SDValue(Intrinsic, 0);
2153 for (unsigned i = 0; i < NumVecs; ++i)
2154 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2155 AArch64::zsub0 + i, DL, VT, SuperReg));
2156
2157 CurDAG->RemoveDeadNode(N);
2158}
2159
2160void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2161 unsigned Scale, unsigned Opc_ri,
2162 unsigned Opc_rr, bool IsIntr) {
2163 assert(Scale < 5 && "Invalid scaling value.");
2164 SDLoc DL(N);
2165 EVT VT = N->getValueType(0);
2166 SDValue Chain = N->getOperand(0);
2167
2168 // Optimize addressing mode.
2170 unsigned Opc;
2171 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2172 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2173 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2174
2175 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2176 Base, // Memory operand
2177 Offset, Chain};
2178
2179 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2180
2181 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2182 SDValue SuperReg = SDValue(Load, 0);
2183 for (unsigned i = 0; i < NumVecs; ++i)
2184 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2185 AArch64::zsub0 + i, DL, VT, SuperReg));
2186
2187 // Copy chain
2188 unsigned ChainIdx = NumVecs;
2189 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2190 CurDAG->RemoveDeadNode(N);
2191}
2192
2193void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2194 unsigned NumVecs,
2195 unsigned Scale,
2196 unsigned Opc_ri,
2197 unsigned Opc_rr) {
2198 assert(Scale < 4 && "Invalid scaling value.");
2199 SDLoc DL(N);
2200 EVT VT = N->getValueType(0);
2201 SDValue Chain = N->getOperand(0);
2202
2203 SDValue PNg = N->getOperand(2);
2204 SDValue Base = N->getOperand(3);
2205 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2206 unsigned Opc;
2207 std::tie(Opc, Base, Offset) =
2208 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2209
2210 SDValue Ops[] = {PNg, // Predicate-as-counter
2211 Base, // Memory operand
2212 Offset, Chain};
2213
2214 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2215
2216 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2217 SDValue SuperReg = SDValue(Load, 0);
2218 for (unsigned i = 0; i < NumVecs; ++i)
2219 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2220 AArch64::zsub0 + i, DL, VT, SuperReg));
2221
2222 // Copy chain
2223 unsigned ChainIdx = NumVecs;
2224 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2225 CurDAG->RemoveDeadNode(N);
2226}
2227
2228void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2229 unsigned Opcode) {
2230 if (N->getValueType(0) != MVT::nxv4f32)
2231 return;
2232 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2233}
2234
2235void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2236 unsigned NumOutVecs,
2237 unsigned Opc,
2238 uint32_t MaxImm) {
2239 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2240 if (Imm->getZExtValue() > MaxImm)
2241 return;
2242
2243 SDValue ZtValue;
2244 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2245 return;
2246
2247 SDValue Chain = Node->getOperand(0);
2248 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
2249 SDLoc DL(Node);
2250 EVT VT = Node->getValueType(0);
2251
2252 SDNode *Instruction =
2253 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2254 SDValue SuperReg = SDValue(Instruction, 0);
2255
2256 for (unsigned I = 0; I < NumOutVecs; ++I)
2257 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2258 AArch64::zsub0 + I, DL, VT, SuperReg));
2259
2260 // Copy chain
2261 unsigned ChainIdx = NumOutVecs;
2262 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2263 CurDAG->RemoveDeadNode(Node);
2264}
2265
2266void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2267 unsigned NumOutVecs,
2268 unsigned Opc) {
2269 SDValue ZtValue;
2270 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2271 return;
2272
2273 SDValue Chain = Node->getOperand(0);
2274 SDValue Ops[] = {ZtValue,
2275 createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
2276 Chain};
2277
2278 SDLoc DL(Node);
2279 EVT VT = Node->getValueType(0);
2280
2281 SDNode *Instruction =
2282 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2283 SDValue SuperReg = SDValue(Instruction, 0);
2284
2285 for (unsigned I = 0; I < NumOutVecs; ++I)
2286 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2287 AArch64::zsub0 + I, DL, VT, SuperReg));
2288
2289 // Copy chain
2290 unsigned ChainIdx = NumOutVecs;
2291 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2292 CurDAG->RemoveDeadNode(Node);
2293}
2294
2295void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2296 unsigned Op) {
2297 SDLoc DL(N);
2298 EVT VT = N->getValueType(0);
2299
2300 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2301 SDValue Zd = createZMulTuple(Regs);
2302 SDValue Zn = N->getOperand(1 + NumVecs);
2303 SDValue Zm = N->getOperand(2 + NumVecs);
2304
2305 SDValue Ops[] = {Zd, Zn, Zm};
2306
2307 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2308 SDValue SuperReg = SDValue(Intrinsic, 0);
2309 for (unsigned i = 0; i < NumVecs; ++i)
2310 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2311 AArch64::zsub0 + i, DL, VT, SuperReg));
2312
2313 CurDAG->RemoveDeadNode(N);
2314}
2315
2316bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2317 switch (BaseReg) {
2318 default:
2319 return false;
2320 case AArch64::ZA:
2321 case AArch64::ZAB0:
2322 if (TileNum == 0)
2323 break;
2324 return false;
2325 case AArch64::ZAH0:
2326 if (TileNum <= 1)
2327 break;
2328 return false;
2329 case AArch64::ZAS0:
2330 if (TileNum <= 3)
2331 break;
2332 return false;
2333 case AArch64::ZAD0:
2334 if (TileNum <= 7)
2335 break;
2336 return false;
2337 }
2338
2339 BaseReg += TileNum;
2340 return true;
2341}
2342
2343template <unsigned MaxIdx, unsigned Scale>
2344void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2345 unsigned BaseReg, unsigned Op) {
2346 unsigned TileNum = 0;
2347 if (BaseReg != AArch64::ZA)
2348 TileNum = N->getConstantOperandVal(2);
2349
2350 if (!SelectSMETile(BaseReg, TileNum))
2351 return;
2352
2353 SDValue SliceBase, Base, Offset;
2354 if (BaseReg == AArch64::ZA)
2355 SliceBase = N->getOperand(2);
2356 else
2357 SliceBase = N->getOperand(3);
2358
2359 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2360 return;
2361
2362 SDLoc DL(N);
2363 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2364 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2365 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2366
2367 EVT VT = N->getValueType(0);
2368 for (unsigned I = 0; I < NumVecs; ++I)
2369 ReplaceUses(SDValue(N, I),
2370 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2371 SDValue(Mov, 0)));
2372 // Copy chain
2373 unsigned ChainIdx = NumVecs;
2374 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2375 CurDAG->RemoveDeadNode(N);
2376}
2377
2378void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2379 unsigned Op, unsigned MaxIdx,
2380 unsigned Scale, unsigned BaseReg) {
2381 // Slice can be in different positions
2382 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2383 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2384 SDValue SliceBase = N->getOperand(2);
2385 if (BaseReg != AArch64::ZA)
2386 SliceBase = N->getOperand(3);
2387
2389 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2390 return;
2391 // The correct Za tile number is computed in Machine Instruction
2392 // See EmitZAInstr
2393 // DAG cannot select Za tile as an output register with ZReg
2394 SDLoc DL(N);
2396 if (BaseReg != AArch64::ZA )
2397 Ops.push_back(N->getOperand(2));
2398 Ops.push_back(Base);
2399 Ops.push_back(Offset);
2400 Ops.push_back(N->getOperand(0)); //Chain
2401 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2402
2403 EVT VT = N->getValueType(0);
2404 for (unsigned I = 0; I < NumVecs; ++I)
2405 ReplaceUses(SDValue(N, I),
2406 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2407 SDValue(Mov, 0)));
2408
2409 // Copy chain
2410 unsigned ChainIdx = NumVecs;
2411 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2412 CurDAG->RemoveDeadNode(N);
2413}
2414
2415void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2416 unsigned NumOutVecs,
2417 bool IsTupleInput,
2418 unsigned Opc) {
2419 SDLoc DL(N);
2420 EVT VT = N->getValueType(0);
2421 unsigned NumInVecs = N->getNumOperands() - 1;
2422
2424 if (IsTupleInput) {
2425 assert((NumInVecs == 2 || NumInVecs == 4) &&
2426 "Don't know how to handle multi-register input!");
2427 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2428 Ops.push_back(createZMulTuple(Regs));
2429 } else {
2430 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2431 for (unsigned I = 0; I < NumInVecs; I++)
2432 Ops.push_back(N->getOperand(1 + I));
2433 }
2434
2435 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2436 SDValue SuperReg = SDValue(Res, 0);
2437
2438 for (unsigned I = 0; I < NumOutVecs; I++)
2439 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2440 AArch64::zsub0 + I, DL, VT, SuperReg));
2441 CurDAG->RemoveDeadNode(N);
2442}
2443
2444void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2445 unsigned Opc) {
2446 SDLoc dl(N);
2447 EVT VT = N->getOperand(2)->getValueType(0);
2448
2449 // Form a REG_SEQUENCE to force register allocation.
2450 bool Is128Bit = VT.getSizeInBits() == 128;
2451 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2452 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2453
2454 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2455 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2456
2457 // Transfer memoperands.
2458 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2459 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2460
2461 ReplaceNode(N, St);
2462}
2463
2464void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2465 unsigned Scale, unsigned Opc_rr,
2466 unsigned Opc_ri) {
2467 SDLoc dl(N);
2468
2469 // Form a REG_SEQUENCE to force register allocation.
2470 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2471 SDValue RegSeq = createZTuple(Regs);
2472
2473 // Optimize addressing mode.
2474 unsigned Opc;
2476 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2477 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2478 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2479
2480 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2481 Base, // address
2482 Offset, // offset
2483 N->getOperand(0)}; // chain
2484 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2485
2486 ReplaceNode(N, St);
2487}
2488
2489bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2490 SDValue &OffImm) {
2491 SDLoc dl(N);
2492 const DataLayout &DL = CurDAG->getDataLayout();
2493 const TargetLowering *TLI = getTargetLowering();
2494
2495 // Try to match it for the frame address
2496 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2497 int FI = FINode->getIndex();
2498 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2499 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2500 return true;
2501 }
2502
2503 return false;
2504}
2505
2506void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2507 unsigned Opc) {
2508 SDLoc dl(N);
2509 EVT VT = N->getOperand(2)->getValueType(0);
2510 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2511 MVT::Other}; // Type for the Chain
2512
2513 // Form a REG_SEQUENCE to force register allocation.
2514 bool Is128Bit = VT.getSizeInBits() == 128;
2515 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2516 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2517
2518 SDValue Ops[] = {RegSeq,
2519 N->getOperand(NumVecs + 1), // base register
2520 N->getOperand(NumVecs + 2), // Incremental
2521 N->getOperand(0)}; // Chain
2522 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2523
2524 ReplaceNode(N, St);
2525}
2526
2527namespace {
2528/// WidenVector - Given a value in the V64 register class, produce the
2529/// equivalent value in the V128 register class.
2530class WidenVector {
2531 SelectionDAG &DAG;
2532
2533public:
2534 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2535
2536 SDValue operator()(SDValue V64Reg) {
2537 EVT VT = V64Reg.getValueType();
2538 unsigned NarrowSize = VT.getVectorNumElements();
2539 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2540 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2541 SDLoc DL(V64Reg);
2542
2543 SDValue Undef =
2544 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2545 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2546 }
2547};
2548} // namespace
2549
2550/// NarrowVector - Given a value in the V128 register class, produce the
2551/// equivalent value in the V64 register class.
2553 EVT VT = V128Reg.getValueType();
2554 unsigned WideSize = VT.getVectorNumElements();
2555 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2556 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2557
2558 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2559 V128Reg);
2560}
2561
2562void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2563 unsigned Opc) {
2564 SDLoc dl(N);
2565 EVT VT = N->getValueType(0);
2566 bool Narrow = VT.getSizeInBits() == 64;
2567
2568 // Form a REG_SEQUENCE to force register allocation.
2569 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2570
2571 if (Narrow)
2572 transform(Regs, Regs.begin(),
2573 WidenVector(*CurDAG));
2574
2575 SDValue RegSeq = createQTuple(Regs);
2576
2577 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2578
2579 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2580
2581 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2582 N->getOperand(NumVecs + 3), N->getOperand(0)};
2583 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2584 SDValue SuperReg = SDValue(Ld, 0);
2585
2586 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2587 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2588 AArch64::qsub2, AArch64::qsub3 };
2589 for (unsigned i = 0; i < NumVecs; ++i) {
2590 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2591 if (Narrow)
2592 NV = NarrowVector(NV, *CurDAG);
2593 ReplaceUses(SDValue(N, i), NV);
2594 }
2595
2596 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2597 CurDAG->RemoveDeadNode(N);
2598}
2599
2600void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2601 unsigned Opc) {
2602 SDLoc dl(N);
2603 EVT VT = N->getValueType(0);
2604 bool Narrow = VT.getSizeInBits() == 64;
2605
2606 // Form a REG_SEQUENCE to force register allocation.
2607 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2608
2609 if (Narrow)
2610 transform(Regs, Regs.begin(),
2611 WidenVector(*CurDAG));
2612
2613 SDValue RegSeq = createQTuple(Regs);
2614
2615 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2616 RegSeq->getValueType(0), MVT::Other};
2617
2618 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2619
2620 SDValue Ops[] = {RegSeq,
2621 CurDAG->getTargetConstant(LaneNo, dl,
2622 MVT::i64), // Lane Number
2623 N->getOperand(NumVecs + 2), // Base register
2624 N->getOperand(NumVecs + 3), // Incremental
2625 N->getOperand(0)};
2626 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2627
2628 // Update uses of the write back register
2629 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2630
2631 // Update uses of the vector list
2632 SDValue SuperReg = SDValue(Ld, 1);
2633 if (NumVecs == 1) {
2634 ReplaceUses(SDValue(N, 0),
2635 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2636 } else {
2637 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2638 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2639 AArch64::qsub2, AArch64::qsub3 };
2640 for (unsigned i = 0; i < NumVecs; ++i) {
2641 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2642 SuperReg);
2643 if (Narrow)
2644 NV = NarrowVector(NV, *CurDAG);
2645 ReplaceUses(SDValue(N, i), NV);
2646 }
2647 }
2648
2649 // Update the Chain
2650 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2651 CurDAG->RemoveDeadNode(N);
2652}
2653
2654void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2655 unsigned Opc) {
2656 SDLoc dl(N);
2657 EVT VT = N->getOperand(2)->getValueType(0);
2658 bool Narrow = VT.getSizeInBits() == 64;
2659
2660 // Form a REG_SEQUENCE to force register allocation.
2661 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2662
2663 if (Narrow)
2664 transform(Regs, Regs.begin(),
2665 WidenVector(*CurDAG));
2666
2667 SDValue RegSeq = createQTuple(Regs);
2668
2669 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2670
2671 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2672 N->getOperand(NumVecs + 3), N->getOperand(0)};
2673 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2674
2675 // Transfer memoperands.
2676 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2677 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2678
2679 ReplaceNode(N, St);
2680}
2681
2682void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2683 unsigned Opc) {
2684 SDLoc dl(N);
2685 EVT VT = N->getOperand(2)->getValueType(0);
2686 bool Narrow = VT.getSizeInBits() == 64;
2687
2688 // Form a REG_SEQUENCE to force register allocation.
2689 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2690
2691 if (Narrow)
2692 transform(Regs, Regs.begin(),
2693 WidenVector(*CurDAG));
2694
2695 SDValue RegSeq = createQTuple(Regs);
2696
2697 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2698 MVT::Other};
2699
2700 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2701
2702 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2703 N->getOperand(NumVecs + 2), // Base Register
2704 N->getOperand(NumVecs + 3), // Incremental
2705 N->getOperand(0)};
2706 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2707
2708 // Transfer memoperands.
2709 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2710 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2711
2712 ReplaceNode(N, St);
2713}
2714
2716 unsigned &Opc, SDValue &Opd0,
2717 unsigned &LSB, unsigned &MSB,
2718 unsigned NumberOfIgnoredLowBits,
2719 bool BiggerPattern) {
2720 assert(N->getOpcode() == ISD::AND &&
2721 "N must be a AND operation to call this function");
2722
2723 EVT VT = N->getValueType(0);
2724
2725 // Here we can test the type of VT and return false when the type does not
2726 // match, but since it is done prior to that call in the current context
2727 // we turned that into an assert to avoid redundant code.
2728 assert((VT == MVT::i32 || VT == MVT::i64) &&
2729 "Type checking must have been done before calling this function");
2730
2731 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2732 // changed the AND node to a 32-bit mask operation. We'll have to
2733 // undo that as part of the transform here if we want to catch all
2734 // the opportunities.
2735 // Currently the NumberOfIgnoredLowBits argument helps to recover
2736 // from these situations when matching bigger pattern (bitfield insert).
2737
2738 // For unsigned extracts, check for a shift right and mask
2739 uint64_t AndImm = 0;
2740 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2741 return false;
2742
2743 const SDNode *Op0 = N->getOperand(0).getNode();
2744
2745 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2746 // simplified. Try to undo that
2747 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2748
2749 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2750 if (AndImm & (AndImm + 1))
2751 return false;
2752
2753 bool ClampMSB = false;
2754 uint64_t SrlImm = 0;
2755 // Handle the SRL + ANY_EXTEND case.
2756 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2757 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2758 // Extend the incoming operand of the SRL to 64-bit.
2759 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2760 // Make sure to clamp the MSB so that we preserve the semantics of the
2761 // original operations.
2762 ClampMSB = true;
2763 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2765 SrlImm)) {
2766 // If the shift result was truncated, we can still combine them.
2767 Opd0 = Op0->getOperand(0).getOperand(0);
2768
2769 // Use the type of SRL node.
2770 VT = Opd0->getValueType(0);
2771 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2772 Opd0 = Op0->getOperand(0);
2773 ClampMSB = (VT == MVT::i32);
2774 } else if (BiggerPattern) {
2775 // Let's pretend a 0 shift right has been performed.
2776 // The resulting code will be at least as good as the original one
2777 // plus it may expose more opportunities for bitfield insert pattern.
2778 // FIXME: Currently we limit this to the bigger pattern, because
2779 // some optimizations expect AND and not UBFM.
2780 Opd0 = N->getOperand(0);
2781 } else
2782 return false;
2783
2784 // Bail out on large immediates. This happens when no proper
2785 // combining/constant folding was performed.
2786 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2787 LLVM_DEBUG(
2788 (dbgs() << N
2789 << ": Found large shift immediate, this should not happen\n"));
2790 return false;
2791 }
2792
2793 LSB = SrlImm;
2794 MSB = SrlImm +
2795 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2796 : llvm::countr_one<uint64_t>(AndImm)) -
2797 1;
2798 if (ClampMSB)
2799 // Since we're moving the extend before the right shift operation, we need
2800 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2801 // the zeros which would get shifted in with the original right shift
2802 // operation.
2803 MSB = MSB > 31 ? 31 : MSB;
2804
2805 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2806 return true;
2807}
2808
2810 SDValue &Opd0, unsigned &Immr,
2811 unsigned &Imms) {
2812 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2813
2814 EVT VT = N->getValueType(0);
2815 unsigned BitWidth = VT.getSizeInBits();
2816 assert((VT == MVT::i32 || VT == MVT::i64) &&
2817 "Type checking must have been done before calling this function");
2818
2819 SDValue Op = N->getOperand(0);
2820 if (Op->getOpcode() == ISD::TRUNCATE) {
2821 Op = Op->getOperand(0);
2822 VT = Op->getValueType(0);
2823 BitWidth = VT.getSizeInBits();
2824 }
2825
2826 uint64_t ShiftImm;
2827 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2828 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2829 return false;
2830
2831 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2832 if (ShiftImm + Width > BitWidth)
2833 return false;
2834
2835 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2836 Opd0 = Op.getOperand(0);
2837 Immr = ShiftImm;
2838 Imms = ShiftImm + Width - 1;
2839 return true;
2840}
2841
2843 SDValue &Opd0, unsigned &LSB,
2844 unsigned &MSB) {
2845 // We are looking for the following pattern which basically extracts several
2846 // continuous bits from the source value and places it from the LSB of the
2847 // destination value, all other bits of the destination value or set to zero:
2848 //
2849 // Value2 = AND Value, MaskImm
2850 // SRL Value2, ShiftImm
2851 //
2852 // with MaskImm >> ShiftImm to search for the bit width.
2853 //
2854 // This gets selected into a single UBFM:
2855 //
2856 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2857 //
2858
2859 if (N->getOpcode() != ISD::SRL)
2860 return false;
2861
2862 uint64_t AndMask = 0;
2863 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2864 return false;
2865
2866 Opd0 = N->getOperand(0).getOperand(0);
2867
2868 uint64_t SrlImm = 0;
2869 if (!isIntImmediate(N->getOperand(1), SrlImm))
2870 return false;
2871
2872 // Check whether we really have several bits extract here.
2873 if (!isMask_64(AndMask >> SrlImm))
2874 return false;
2875
2876 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2877 LSB = SrlImm;
2878 MSB = llvm::Log2_64(AndMask);
2879 return true;
2880}
2881
2882static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2883 unsigned &Immr, unsigned &Imms,
2884 bool BiggerPattern) {
2885 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2886 "N must be a SHR/SRA operation to call this function");
2887
2888 EVT VT = N->getValueType(0);
2889
2890 // Here we can test the type of VT and return false when the type does not
2891 // match, but since it is done prior to that call in the current context
2892 // we turned that into an assert to avoid redundant code.
2893 assert((VT == MVT::i32 || VT == MVT::i64) &&
2894 "Type checking must have been done before calling this function");
2895
2896 // Check for AND + SRL doing several bits extract.
2897 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2898 return true;
2899
2900 // We're looking for a shift of a shift.
2901 uint64_t ShlImm = 0;
2902 uint64_t TruncBits = 0;
2903 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2904 Opd0 = N->getOperand(0).getOperand(0);
2905 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2906 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2907 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2908 // be considered as setting high 32 bits as zero. Our strategy here is to
2909 // always generate 64bit UBFM. This consistency will help the CSE pass
2910 // later find more redundancy.
2911 Opd0 = N->getOperand(0).getOperand(0);
2912 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2913 VT = Opd0.getValueType();
2914 assert(VT == MVT::i64 && "the promoted type should be i64");
2915 } else if (BiggerPattern) {
2916 // Let's pretend a 0 shift left has been performed.
2917 // FIXME: Currently we limit this to the bigger pattern case,
2918 // because some optimizations expect AND and not UBFM
2919 Opd0 = N->getOperand(0);
2920 } else
2921 return false;
2922
2923 // Missing combines/constant folding may have left us with strange
2924 // constants.
2925 if (ShlImm >= VT.getSizeInBits()) {
2926 LLVM_DEBUG(
2927 (dbgs() << N
2928 << ": Found large shift immediate, this should not happen\n"));
2929 return false;
2930 }
2931
2932 uint64_t SrlImm = 0;
2933 if (!isIntImmediate(N->getOperand(1), SrlImm))
2934 return false;
2935
2936 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2937 "bad amount in shift node!");
2938 int immr = SrlImm - ShlImm;
2939 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2940 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2941 // SRA requires a signed extraction
2942 if (VT == MVT::i32)
2943 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2944 else
2945 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2946 return true;
2947}
2948
2949bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2950 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2951
2952 EVT VT = N->getValueType(0);
2953 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2954 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2955 return false;
2956
2957 uint64_t ShiftImm;
2958 SDValue Op = N->getOperand(0);
2959 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2960 return false;
2961
2962 SDLoc dl(N);
2963 // Extend the incoming operand of the shift to 64-bits.
2964 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2965 unsigned Immr = ShiftImm;
2966 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2967 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2968 CurDAG->getTargetConstant(Imms, dl, VT)};
2969 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2970 return true;
2971}
2972
2973static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2974 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2975 unsigned NumberOfIgnoredLowBits = 0,
2976 bool BiggerPattern = false) {
2977 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2978 return false;
2979
2980 switch (N->getOpcode()) {
2981 default:
2982 if (!N->isMachineOpcode())
2983 return false;
2984 break;
2985 case ISD::AND:
2986 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2987 NumberOfIgnoredLowBits, BiggerPattern);
2988 case ISD::SRL:
2989 case ISD::SRA:
2990 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2991
2993 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2994 }
2995
2996 unsigned NOpc = N->getMachineOpcode();
2997 switch (NOpc) {
2998 default:
2999 return false;
3000 case AArch64::SBFMWri:
3001 case AArch64::UBFMWri:
3002 case AArch64::SBFMXri:
3003 case AArch64::UBFMXri:
3004 Opc = NOpc;
3005 Opd0 = N->getOperand(0);
3006 Immr = N->getConstantOperandVal(1);
3007 Imms = N->getConstantOperandVal(2);
3008 return true;
3009 }
3010 // Unreachable
3011 return false;
3012}
3013
3014bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
3015 unsigned Opc, Immr, Imms;
3016 SDValue Opd0;
3017 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
3018 return false;
3019
3020 EVT VT = N->getValueType(0);
3021 SDLoc dl(N);
3022
3023 // If the bit extract operation is 64bit but the original type is 32bit, we
3024 // need to add one EXTRACT_SUBREG.
3025 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
3026 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
3027 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
3028
3029 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
3030 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
3031 MVT::i32, SDValue(BFM, 0));
3032 ReplaceNode(N, Inner.getNode());
3033 return true;
3034 }
3035
3036 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
3037 CurDAG->getTargetConstant(Imms, dl, VT)};
3038 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3039 return true;
3040}
3041
3042/// Does DstMask form a complementary pair with the mask provided by
3043/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
3044/// this asks whether DstMask zeroes precisely those bits that will be set by
3045/// the other half.
3046static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
3047 unsigned NumberOfIgnoredHighBits, EVT VT) {
3048 assert((VT == MVT::i32 || VT == MVT::i64) &&
3049 "i32 or i64 mask type expected!");
3050 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
3051
3052 // Enable implicitTrunc as we're intentionally ignoring high bits.
3053 APInt SignificantDstMask =
3054 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
3055 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
3056
3057 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
3058 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
3059}
3060
3061// Look for bits that will be useful for later uses.
3062// A bit is consider useless as soon as it is dropped and never used
3063// before it as been dropped.
3064// E.g., looking for useful bit of x
3065// 1. y = x & 0x7
3066// 2. z = y >> 2
3067// After #1, x useful bits are 0x7, then the useful bits of x, live through
3068// y.
3069// After #2, the useful bits of x are 0x4.
3070// However, if x is used on an unpredictable instruction, then all its bits
3071// are useful.
3072// E.g.
3073// 1. y = x & 0x7
3074// 2. z = y >> 2
3075// 3. str x, [@x]
3076static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
3077
3079 unsigned Depth) {
3080 uint64_t Imm =
3081 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
3082 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
3083 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
3084 getUsefulBits(Op, UsefulBits, Depth + 1);
3085}
3086
3088 uint64_t Imm, uint64_t MSB,
3089 unsigned Depth) {
3090 // inherit the bitwidth value
3091 APInt OpUsefulBits(UsefulBits);
3092 OpUsefulBits = 1;
3093
3094 if (MSB >= Imm) {
3095 OpUsefulBits <<= MSB - Imm + 1;
3096 --OpUsefulBits;
3097 // The interesting part will be in the lower part of the result
3098 getUsefulBits(Op, OpUsefulBits, Depth + 1);
3099 // The interesting part was starting at Imm in the argument
3100 OpUsefulBits <<= Imm;
3101 } else {
3102 OpUsefulBits <<= MSB + 1;
3103 --OpUsefulBits;
3104 // The interesting part will be shifted in the result
3105 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
3106 getUsefulBits(Op, OpUsefulBits, Depth + 1);
3107 // The interesting part was at zero in the argument
3108 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
3109 }
3110
3111 UsefulBits &= OpUsefulBits;
3112}
3113
3114static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
3115 unsigned Depth) {
3116 uint64_t Imm =
3117 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
3118 uint64_t MSB =
3119 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3120
3121 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
3122}
3123
3125 unsigned Depth) {
3126 uint64_t ShiftTypeAndValue =
3127 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3128 APInt Mask(UsefulBits);
3129 Mask.clearAllBits();
3130 Mask.flipAllBits();
3131
3132 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
3133 // Shift Left
3134 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3135 Mask <<= ShiftAmt;
3136 getUsefulBits(Op, Mask, Depth + 1);
3137 Mask.lshrInPlace(ShiftAmt);
3138 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
3139 // Shift Right
3140 // We do not handle AArch64_AM::ASR, because the sign will change the
3141 // number of useful bits
3142 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3143 Mask.lshrInPlace(ShiftAmt);
3144 getUsefulBits(Op, Mask, Depth + 1);
3145 Mask <<= ShiftAmt;
3146 } else
3147 return;
3148
3149 UsefulBits &= Mask;
3150}
3151
3152static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3153 unsigned Depth) {
3154 uint64_t Imm =
3155 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3156 uint64_t MSB =
3157 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3158
3159 APInt OpUsefulBits(UsefulBits);
3160 OpUsefulBits = 1;
3161
3162 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3163 ResultUsefulBits.flipAllBits();
3164 APInt Mask(UsefulBits.getBitWidth(), 0);
3165
3166 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3167
3168 if (MSB >= Imm) {
3169 // The instruction is a BFXIL.
3170 uint64_t Width = MSB - Imm + 1;
3171 uint64_t LSB = Imm;
3172
3173 OpUsefulBits <<= Width;
3174 --OpUsefulBits;
3175
3176 if (Op.getOperand(1) == Orig) {
3177 // Copy the low bits from the result to bits starting from LSB.
3178 Mask = ResultUsefulBits & OpUsefulBits;
3179 Mask <<= LSB;
3180 }
3181
3182 if (Op.getOperand(0) == Orig)
3183 // Bits starting from LSB in the input contribute to the result.
3184 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3185 } else {
3186 // The instruction is a BFI.
3187 uint64_t Width = MSB + 1;
3188 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3189
3190 OpUsefulBits <<= Width;
3191 --OpUsefulBits;
3192 OpUsefulBits <<= LSB;
3193
3194 if (Op.getOperand(1) == Orig) {
3195 // Copy the bits from the result to the zero bits.
3196 Mask = ResultUsefulBits & OpUsefulBits;
3197 Mask.lshrInPlace(LSB);
3198 }
3199
3200 if (Op.getOperand(0) == Orig)
3201 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3202 }
3203
3204 UsefulBits &= Mask;
3205}
3206
3207static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3208 SDValue Orig, unsigned Depth) {
3209
3210 // Users of this node should have already been instruction selected
3211 // FIXME: Can we turn that into an assert?
3212 if (!UserNode->isMachineOpcode())
3213 return;
3214
3215 switch (UserNode->getMachineOpcode()) {
3216 default:
3217 return;
3218 case AArch64::ANDSWri:
3219 case AArch64::ANDSXri:
3220 case AArch64::ANDWri:
3221 case AArch64::ANDXri:
3222 // We increment Depth only when we call the getUsefulBits
3223 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3224 Depth);
3225 case AArch64::UBFMWri:
3226 case AArch64::UBFMXri:
3227 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3228
3229 case AArch64::ORRWrs:
3230 case AArch64::ORRXrs:
3231 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3232 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3233 Depth);
3234 return;
3235 case AArch64::BFMWri:
3236 case AArch64::BFMXri:
3237 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3238
3239 case AArch64::STRBBui:
3240 case AArch64::STURBBi:
3241 if (UserNode->getOperand(0) != Orig)
3242 return;
3243 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3244 return;
3245
3246 case AArch64::STRHHui:
3247 case AArch64::STURHHi:
3248 if (UserNode->getOperand(0) != Orig)
3249 return;
3250 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3251 return;
3252 }
3253}
3254
3255static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3257 return;
3258 // Initialize UsefulBits
3259 if (!Depth) {
3260 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3261 // At the beginning, assume every produced bits is useful
3262 UsefulBits = APInt(Bitwidth, 0);
3263 UsefulBits.flipAllBits();
3264 }
3265 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3266
3267 for (SDNode *Node : Op.getNode()->users()) {
3268 // A use cannot produce useful bits
3269 APInt UsefulBitsForUse = APInt(UsefulBits);
3270 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3271 UsersUsefulBits |= UsefulBitsForUse;
3272 }
3273 // UsefulBits contains the produced bits that are meaningful for the
3274 // current definition, thus a user cannot make a bit meaningful at
3275 // this point
3276 UsefulBits &= UsersUsefulBits;
3277}
3278
3279/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3280/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3281/// 0, return Op unchanged.
3282static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3283 if (ShlAmount == 0)
3284 return Op;
3285
3286 EVT VT = Op.getValueType();
3287 SDLoc dl(Op);
3288 unsigned BitWidth = VT.getSizeInBits();
3289 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3290
3291 SDNode *ShiftNode;
3292 if (ShlAmount > 0) {
3293 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3294 ShiftNode = CurDAG->getMachineNode(
3295 UBFMOpc, dl, VT, Op,
3296 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3297 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3298 } else {
3299 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3300 assert(ShlAmount < 0 && "expected right shift");
3301 int ShrAmount = -ShlAmount;
3302 ShiftNode = CurDAG->getMachineNode(
3303 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3304 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3305 }
3306
3307 return SDValue(ShiftNode, 0);
3308}
3309
3310// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3311static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3312 bool BiggerPattern,
3313 const uint64_t NonZeroBits,
3314 SDValue &Src, int &DstLSB,
3315 int &Width);
3316
3317// For bit-field-positioning pattern "shl VAL, N)".
3318static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3319 bool BiggerPattern,
3320 const uint64_t NonZeroBits,
3321 SDValue &Src, int &DstLSB,
3322 int &Width);
3323
3324/// Does this tree qualify as an attempt to move a bitfield into position,
3325/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3327 bool BiggerPattern, SDValue &Src,
3328 int &DstLSB, int &Width) {
3329 EVT VT = Op.getValueType();
3330 unsigned BitWidth = VT.getSizeInBits();
3331 (void)BitWidth;
3332 assert(BitWidth == 32 || BitWidth == 64);
3333
3334 KnownBits Known = CurDAG->computeKnownBits(Op);
3335
3336 // Non-zero in the sense that they're not provably zero, which is the key
3337 // point if we want to use this value
3338 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3339 if (!isShiftedMask_64(NonZeroBits))
3340 return false;
3341
3342 switch (Op.getOpcode()) {
3343 default:
3344 break;
3345 case ISD::AND:
3346 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3347 NonZeroBits, Src, DstLSB, Width);
3348 case ISD::SHL:
3349 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3350 NonZeroBits, Src, DstLSB, Width);
3351 }
3352
3353 return false;
3354}
3355
3357 bool BiggerPattern,
3358 const uint64_t NonZeroBits,
3359 SDValue &Src, int &DstLSB,
3360 int &Width) {
3361 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3362
3363 EVT VT = Op.getValueType();
3364 assert((VT == MVT::i32 || VT == MVT::i64) &&
3365 "Caller guarantees VT is one of i32 or i64");
3366 (void)VT;
3367
3368 uint64_t AndImm;
3369 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3370 return false;
3371
3372 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3373 // 1) (AndImm & (1 << POS) == 0)
3374 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3375 //
3376 // 1) and 2) don't agree so something must be wrong (e.g., in
3377 // 'SelectionDAG::computeKnownBits')
3378 assert((~AndImm & NonZeroBits) == 0 &&
3379 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3380
3381 SDValue AndOp0 = Op.getOperand(0);
3382
3383 uint64_t ShlImm;
3384 SDValue ShlOp0;
3385 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3386 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3387 ShlOp0 = AndOp0.getOperand(0);
3388 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3390 ShlImm)) {
3391 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3392
3393 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3394 SDValue ShlVal = AndOp0.getOperand(0);
3395
3396 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3397 // expect VT to be MVT::i32.
3398 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3399
3400 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3401 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3402 } else
3403 return false;
3404
3405 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3406 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3407 // AndOp0+AND.
3408 if (!BiggerPattern && !AndOp0.hasOneUse())
3409 return false;
3410
3411 DstLSB = llvm::countr_zero(NonZeroBits);
3412 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3413
3414 // Bail out on large Width. This happens when no proper combining / constant
3415 // folding was performed.
3416 if (Width >= (int)VT.getSizeInBits()) {
3417 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3418 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3419 // "val".
3420 // If VT is i32, what Width >= 32 means:
3421 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3422 // demands at least 'Width' bits (after dag-combiner). This together with
3423 // `any_extend` Op (undefined higher bits) indicates missed combination
3424 // when lowering the 'and' IR instruction to an machine IR instruction.
3425 LLVM_DEBUG(
3426 dbgs()
3427 << "Found large Width in bit-field-positioning -- this indicates no "
3428 "proper combining / constant folding was performed\n");
3429 return false;
3430 }
3431
3432 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3433 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3434 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3435 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3436 // which case it is not profitable to insert an extra shift.
3437 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3438 return false;
3439
3440 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3441 return true;
3442}
3443
3444// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3445// UBFIZ.
3447 SDValue &Src, int &DstLSB,
3448 int &Width) {
3449 // Caller should have verified that N is a left shift with constant shift
3450 // amount; asserts that.
3451 assert(Op.getOpcode() == ISD::SHL &&
3452 "Op.getNode() should be a SHL node to call this function");
3453 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3454 "Op.getNode() should shift ShlImm to call this function");
3455
3456 uint64_t AndImm = 0;
3457 SDValue Op0 = Op.getOperand(0);
3458 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3459 return false;
3460
3461 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3462 if (isMask_64(ShiftedAndImm)) {
3463 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3464 // should end with Mask, and could be prefixed with random bits if those
3465 // bits are shifted out.
3466 //
3467 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3468 // the AND result corresponding to those bits are shifted out, so it's fine
3469 // to not extract them.
3470 Width = llvm::countr_one(ShiftedAndImm);
3471 DstLSB = ShlImm;
3472 Src = Op0.getOperand(0);
3473 return true;
3474 }
3475 return false;
3476}
3477
3479 bool BiggerPattern,
3480 const uint64_t NonZeroBits,
3481 SDValue &Src, int &DstLSB,
3482 int &Width) {
3483 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3484
3485 EVT VT = Op.getValueType();
3486 assert((VT == MVT::i32 || VT == MVT::i64) &&
3487 "Caller guarantees that type is i32 or i64");
3488 (void)VT;
3489
3490 uint64_t ShlImm;
3491 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3492 return false;
3493
3494 if (!BiggerPattern && !Op.hasOneUse())
3495 return false;
3496
3497 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3498 return true;
3499
3500 DstLSB = llvm::countr_zero(NonZeroBits);
3501 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3502
3503 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3504 return false;
3505
3506 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3507 return true;
3508}
3509
3510static bool isShiftedMask(uint64_t Mask, EVT VT) {
3511 assert(VT == MVT::i32 || VT == MVT::i64);
3512 if (VT == MVT::i32)
3513 return isShiftedMask_32(Mask);
3514 return isShiftedMask_64(Mask);
3515}
3516
3517// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3518// inserted only sets known zero bits.
3520 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3521
3522 EVT VT = N->getValueType(0);
3523 if (VT != MVT::i32 && VT != MVT::i64)
3524 return false;
3525
3526 unsigned BitWidth = VT.getSizeInBits();
3527
3528 uint64_t OrImm;
3529 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3530 return false;
3531
3532 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3533 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3534 // performance neutral.
3536 return false;
3537
3538 uint64_t MaskImm;
3539 SDValue And = N->getOperand(0);
3540 // Must be a single use AND with an immediate operand.
3541 if (!And.hasOneUse() ||
3542 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3543 return false;
3544
3545 // Compute the Known Zero for the AND as this allows us to catch more general
3546 // cases than just looking for AND with imm.
3547 KnownBits Known = CurDAG->computeKnownBits(And);
3548
3549 // Non-zero in the sense that they're not provably zero, which is the key
3550 // point if we want to use this value.
3551 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3552
3553 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3554 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3555 return false;
3556
3557 // The bits being inserted must only set those bits that are known to be zero.
3558 if ((OrImm & NotKnownZero) != 0) {
3559 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3560 // currently handle this case.
3561 return false;
3562 }
3563
3564 // BFI/BFXIL dst, src, #lsb, #width.
3565 int LSB = llvm::countr_one(NotKnownZero);
3566 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3567
3568 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3569 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3570 unsigned ImmS = Width - 1;
3571
3572 // If we're creating a BFI instruction avoid cases where we need more
3573 // instructions to materialize the BFI constant as compared to the original
3574 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3575 // should be no worse in this case.
3576 bool IsBFI = LSB != 0;
3577 uint64_t BFIImm = OrImm >> LSB;
3578 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3579 // We have a BFI instruction and we know the constant can't be materialized
3580 // with a ORR-immediate with the zero register.
3581 unsigned OrChunks = 0, BFIChunks = 0;
3582 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3583 if (((OrImm >> Shift) & 0xFFFF) != 0)
3584 ++OrChunks;
3585 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3586 ++BFIChunks;
3587 }
3588 if (BFIChunks > OrChunks)
3589 return false;
3590 }
3591
3592 // Materialize the constant to be inserted.
3593 SDLoc DL(N);
3594 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3595 SDNode *MOVI = CurDAG->getMachineNode(
3596 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3597
3598 // Create the BFI/BFXIL instruction.
3599 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3600 CurDAG->getTargetConstant(ImmR, DL, VT),
3601 CurDAG->getTargetConstant(ImmS, DL, VT)};
3602 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3603 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3604 return true;
3605}
3606
3608 SDValue &ShiftedOperand,
3609 uint64_t &EncodedShiftImm) {
3610 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3611 if (!Dst.hasOneUse())
3612 return false;
3613
3614 EVT VT = Dst.getValueType();
3615 assert((VT == MVT::i32 || VT == MVT::i64) &&
3616 "Caller should guarantee that VT is one of i32 or i64");
3617 const unsigned SizeInBits = VT.getSizeInBits();
3618
3619 SDLoc DL(Dst.getNode());
3620 uint64_t AndImm, ShlImm;
3621 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3622 isShiftedMask_64(AndImm)) {
3623 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3624 SDValue DstOp0 = Dst.getOperand(0);
3625 if (!DstOp0.hasOneUse())
3626 return false;
3627
3628 // An example to illustrate the transformation
3629 // From:
3630 // lsr x8, x1, #1
3631 // and x8, x8, #0x3f80
3632 // bfxil x8, x1, #0, #7
3633 // To:
3634 // and x8, x23, #0x7f
3635 // ubfx x9, x23, #8, #7
3636 // orr x23, x8, x9, lsl #7
3637 //
3638 // The number of instructions remains the same, but ORR is faster than BFXIL
3639 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3640 // the dependency chain is improved after the transformation.
3641 uint64_t SrlImm;
3642 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3643 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3644 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3645 unsigned MaskWidth =
3646 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3647 unsigned UBFMOpc =
3648 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3649 SDNode *UBFMNode = CurDAG->getMachineNode(
3650 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3651 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3652 VT),
3653 CurDAG->getTargetConstant(
3654 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3655 ShiftedOperand = SDValue(UBFMNode, 0);
3656 EncodedShiftImm = AArch64_AM::getShifterImm(
3657 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3658 return true;
3659 }
3660 }
3661 return false;
3662 }
3663
3664 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3665 ShiftedOperand = Dst.getOperand(0);
3666 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3667 return true;
3668 }
3669
3670 uint64_t SrlImm;
3671 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3672 ShiftedOperand = Dst.getOperand(0);
3673 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3674 return true;
3675 }
3676 return false;
3677}
3678
3679// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3680// the operands and select it to AArch64::ORR with shifted registers if
3681// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3682static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3683 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3684 const bool BiggerPattern) {
3685 EVT VT = N->getValueType(0);
3686 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3687 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3688 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3689 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3690 assert((VT == MVT::i32 || VT == MVT::i64) &&
3691 "Expect result type to be i32 or i64 since N is combinable to BFM");
3692 SDLoc DL(N);
3693
3694 // Bail out if BFM simplifies away one node in BFM Dst.
3695 if (OrOpd1 != Dst)
3696 return false;
3697
3698 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3699 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3700 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3701 if (BiggerPattern) {
3702 uint64_t SrcAndImm;
3703 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3704 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3705 // OrOpd0 = AND Src, #Mask
3706 // So BFM simplifies away one AND node from Src and doesn't simplify away
3707 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3708 // one node (from Rd), ORR is better since it has higher throughput and
3709 // smaller latency than BFM on many AArch64 processors (and for the rest
3710 // ORR is at least as good as BFM).
3711 SDValue ShiftedOperand;
3712 uint64_t EncodedShiftImm;
3713 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3714 EncodedShiftImm)) {
3715 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3716 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3717 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3718 return true;
3719 }
3720 }
3721 return false;
3722 }
3723
3724 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3725
3726 uint64_t ShlImm;
3727 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3728 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3729 SDValue Ops[] = {
3730 Dst, Src,
3731 CurDAG->getTargetConstant(
3733 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3734 return true;
3735 }
3736
3737 // Select the following pattern to left-shifted operand rather than BFI.
3738 // %val1 = op ..
3739 // %val2 = shl %val1, #imm
3740 // %res = or %val1, %val2
3741 //
3742 // If N is selected to be BFI, we know that
3743 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3744 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3745 //
3746 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3747 if (OrOpd0.getOperand(0) == OrOpd1) {
3748 SDValue Ops[] = {
3749 OrOpd1, OrOpd1,
3750 CurDAG->getTargetConstant(
3752 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3753 return true;
3754 }
3755 }
3756
3757 uint64_t SrlImm;
3758 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3759 // Select the following pattern to right-shifted operand rather than BFXIL.
3760 // %val1 = op ..
3761 // %val2 = lshr %val1, #imm
3762 // %res = or %val1, %val2
3763 //
3764 // If N is selected to be BFXIL, we know that
3765 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3766 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3767 //
3768 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3769 if (OrOpd0.getOperand(0) == OrOpd1) {
3770 SDValue Ops[] = {
3771 OrOpd1, OrOpd1,
3772 CurDAG->getTargetConstant(
3774 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3775 return true;
3776 }
3777 }
3778
3779 return false;
3780}
3781
3782static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3783 SelectionDAG *CurDAG) {
3784 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3785
3786 EVT VT = N->getValueType(0);
3787 if (VT != MVT::i32 && VT != MVT::i64)
3788 return false;
3789
3790 unsigned BitWidth = VT.getSizeInBits();
3791
3792 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3793 // have the expected shape. Try to undo that.
3794
3795 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3796 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3797
3798 // Given a OR operation, check if we have the following pattern
3799 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3800 // isBitfieldExtractOp)
3801 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3802 // countTrailingZeros(mask2) == imm2 - imm + 1
3803 // f = d | c
3804 // if yes, replace the OR instruction with:
3805 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3806
3807 // OR is commutative, check all combinations of operand order and values of
3808 // BiggerPattern, i.e.
3809 // Opd0, Opd1, BiggerPattern=false
3810 // Opd1, Opd0, BiggerPattern=false
3811 // Opd0, Opd1, BiggerPattern=true
3812 // Opd1, Opd0, BiggerPattern=true
3813 // Several of these combinations may match, so check with BiggerPattern=false
3814 // first since that will produce better results by matching more instructions
3815 // and/or inserting fewer extra instructions.
3816 for (int I = 0; I < 4; ++I) {
3817
3818 SDValue Dst, Src;
3819 unsigned ImmR, ImmS;
3820 bool BiggerPattern = I / 2;
3821 SDValue OrOpd0Val = N->getOperand(I % 2);
3822 SDNode *OrOpd0 = OrOpd0Val.getNode();
3823 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3824 SDNode *OrOpd1 = OrOpd1Val.getNode();
3825
3826 unsigned BFXOpc;
3827 int DstLSB, Width;
3828 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3829 NumberOfIgnoredLowBits, BiggerPattern)) {
3830 // Check that the returned opcode is compatible with the pattern,
3831 // i.e., same type and zero extended (U and not S)
3832 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3833 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3834 continue;
3835
3836 // Compute the width of the bitfield insertion
3837 DstLSB = 0;
3838 Width = ImmS - ImmR + 1;
3839 // FIXME: This constraint is to catch bitfield insertion we may
3840 // want to widen the pattern if we want to grab general bitfield
3841 // move case
3842 if (Width <= 0)
3843 continue;
3844
3845 // If the mask on the insertee is correct, we have a BFXIL operation. We
3846 // can share the ImmR and ImmS values from the already-computed UBFM.
3847 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3848 BiggerPattern,
3849 Src, DstLSB, Width)) {
3850 ImmR = (BitWidth - DstLSB) % BitWidth;
3851 ImmS = Width - 1;
3852 } else
3853 continue;
3854
3855 // Check the second part of the pattern
3856 EVT VT = OrOpd1Val.getValueType();
3857 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3858
3859 // Compute the Known Zero for the candidate of the first operand.
3860 // This allows to catch more general case than just looking for
3861 // AND with imm. Indeed, simplify-demanded-bits may have removed
3862 // the AND instruction because it proves it was useless.
3863 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3864
3865 // Check if there is enough room for the second operand to appear
3866 // in the first one
3867 APInt BitsToBeInserted =
3868 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3869
3870 if ((BitsToBeInserted & ~Known.Zero) != 0)
3871 continue;
3872
3873 // Set the first operand
3874 uint64_t Imm;
3875 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3876 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3877 // In that case, we can eliminate the AND
3878 Dst = OrOpd1->getOperand(0);
3879 else
3880 // Maybe the AND has been removed by simplify-demanded-bits
3881 // or is useful because it discards more bits
3882 Dst = OrOpd1Val;
3883
3884 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3885 // with shifted operand is more efficient.
3886 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3887 BiggerPattern))
3888 return true;
3889
3890 // both parts match
3891 SDLoc DL(N);
3892 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3893 CurDAG->getTargetConstant(ImmS, DL, VT)};
3894 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3895 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3896 return true;
3897 }
3898
3899 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3900 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3901 // mask (e.g., 0x000ffff0).
3902 uint64_t Mask0Imm, Mask1Imm;
3903 SDValue And0 = N->getOperand(0);
3904 SDValue And1 = N->getOperand(1);
3905 if (And0.hasOneUse() && And1.hasOneUse() &&
3906 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3907 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3908 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3909 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3910
3911 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3912 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3913 // bits to be inserted.
3914 if (isShiftedMask(Mask0Imm, VT)) {
3915 std::swap(And0, And1);
3916 std::swap(Mask0Imm, Mask1Imm);
3917 }
3918
3919 SDValue Src = And1->getOperand(0);
3920 SDValue Dst = And0->getOperand(0);
3921 unsigned LSB = llvm::countr_zero(Mask1Imm);
3922 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3923
3924 // The BFXIL inserts the low-order bits from a source register, so right
3925 // shift the needed bits into place.
3926 SDLoc DL(N);
3927 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3928 uint64_t LsrImm = LSB;
3929 if (Src->hasOneUse() &&
3930 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3931 (LsrImm + LSB) < BitWidth) {
3932 Src = Src->getOperand(0);
3933 LsrImm += LSB;
3934 }
3935
3936 SDNode *LSR = CurDAG->getMachineNode(
3937 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3938 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3939
3940 // BFXIL is an alias of BFM, so translate to BFM operands.
3941 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3942 unsigned ImmS = Width - 1;
3943
3944 // Create the BFXIL instruction.
3945 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3946 CurDAG->getTargetConstant(ImmR, DL, VT),
3947 CurDAG->getTargetConstant(ImmS, DL, VT)};
3948 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3949 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3950 return true;
3951 }
3952
3953 return false;
3954}
3955
3956bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3957 if (N->getOpcode() != ISD::OR)
3958 return false;
3959
3960 APInt NUsefulBits;
3961 getUsefulBits(SDValue(N, 0), NUsefulBits);
3962
3963 // If all bits are not useful, just return UNDEF.
3964 if (!NUsefulBits) {
3965 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3966 return true;
3967 }
3968
3969 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3970 return true;
3971
3972 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3973}
3974
3975/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3976/// equivalent of a left shift by a constant amount followed by an and masking
3977/// out a contiguous set of bits.
3978bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3979 if (N->getOpcode() != ISD::AND)
3980 return false;
3981
3982 EVT VT = N->getValueType(0);
3983 if (VT != MVT::i32 && VT != MVT::i64)
3984 return false;
3985
3986 SDValue Op0;
3987 int DstLSB, Width;
3988 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3989 Op0, DstLSB, Width))
3990 return false;
3991
3992 // ImmR is the rotate right amount.
3993 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3994 // ImmS is the most significant bit of the source to be moved.
3995 unsigned ImmS = Width - 1;
3996
3997 SDLoc DL(N);
3998 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3999 CurDAG->getTargetConstant(ImmS, DL, VT)};
4000 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
4001 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4002 return true;
4003}
4004
4005/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
4006/// variable shift/rotate instructions.
4007bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
4008 EVT VT = N->getValueType(0);
4009
4010 unsigned Opc;
4011 switch (N->getOpcode()) {
4012 case ISD::ROTR:
4013 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
4014 break;
4015 case ISD::SHL:
4016 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
4017 break;
4018 case ISD::SRL:
4019 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
4020 break;
4021 case ISD::SRA:
4022 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
4023 break;
4024 default:
4025 return false;
4026 }
4027
4028 uint64_t Size;
4029 uint64_t Bits;
4030 if (VT == MVT::i32) {
4031 Bits = 5;
4032 Size = 32;
4033 } else if (VT == MVT::i64) {
4034 Bits = 6;
4035 Size = 64;
4036 } else
4037 return false;
4038
4039 SDValue ShiftAmt = N->getOperand(1);
4040 SDLoc DL(N);
4041 SDValue NewShiftAmt;
4042
4043 // Skip over an extend of the shift amount.
4044 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
4045 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
4046 ShiftAmt = ShiftAmt->getOperand(0);
4047
4048 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
4049 SDValue Add0 = ShiftAmt->getOperand(0);
4050 SDValue Add1 = ShiftAmt->getOperand(1);
4051 uint64_t Add0Imm;
4052 uint64_t Add1Imm;
4053 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
4054 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
4055 // to avoid the ADD/SUB.
4056 NewShiftAmt = Add0;
4057 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
4058 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
4059 (Add0Imm % Size == 0)) {
4060 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
4061 // to generate a NEG instead of a SUB from a constant.
4062 unsigned NegOpc;
4063 unsigned ZeroReg;
4064 EVT SubVT = ShiftAmt->getValueType(0);
4065 if (SubVT == MVT::i32) {
4066 NegOpc = AArch64::SUBWrr;
4067 ZeroReg = AArch64::WZR;
4068 } else {
4069 assert(SubVT == MVT::i64);
4070 NegOpc = AArch64::SUBXrr;
4071 ZeroReg = AArch64::XZR;
4072 }
4073 SDValue Zero =
4074 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
4075 MachineSDNode *Neg =
4076 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
4077 NewShiftAmt = SDValue(Neg, 0);
4078 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
4079 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
4080 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
4081 // to generate a NOT instead of a SUB from a constant.
4082 unsigned NotOpc;
4083 unsigned ZeroReg;
4084 EVT SubVT = ShiftAmt->getValueType(0);
4085 if (SubVT == MVT::i32) {
4086 NotOpc = AArch64::ORNWrr;
4087 ZeroReg = AArch64::WZR;
4088 } else {
4089 assert(SubVT == MVT::i64);
4090 NotOpc = AArch64::ORNXrr;
4091 ZeroReg = AArch64::XZR;
4092 }
4093 SDValue Zero =
4094 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
4095 MachineSDNode *Not =
4096 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
4097 NewShiftAmt = SDValue(Not, 0);
4098 } else
4099 return false;
4100 } else {
4101 // If the shift amount is masked with an AND, check that the mask covers the
4102 // bits that are implicitly ANDed off by the above opcodes and if so, skip
4103 // the AND.
4104 uint64_t MaskImm;
4105 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
4106 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
4107 return false;
4108
4109 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
4110 return false;
4111
4112 NewShiftAmt = ShiftAmt->getOperand(0);
4113 }
4114
4115 // Narrow/widen the shift amount to match the size of the shift operation.
4116 if (VT == MVT::i32)
4117 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
4118 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
4119 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
4120 MachineSDNode *Ext = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT,
4121 NewShiftAmt, SubReg);
4122 NewShiftAmt = SDValue(Ext, 0);
4123 }
4124
4125 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
4126 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4127 return true;
4128}
4129
4131 SDValue &FixedPos,
4132 unsigned RegWidth,
4133 bool isReciprocal) {
4134 APFloat FVal(0.0);
4136 FVal = CN->getValueAPF();
4137 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
4138 // Some otherwise illegal constants are allowed in this case.
4139 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
4140 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
4141 return false;
4142
4143 ConstantPoolSDNode *CN =
4144 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
4145 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
4146 } else
4147 return false;
4148
4149 if (unsigned FBits =
4150 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4151 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4152 return true;
4153 }
4154
4155 return false;
4156}
4157
4159 SDValue N,
4160 SDValue &FixedPos,
4161 unsigned RegWidth,
4162 bool isReciprocal) {
4163 if ((N.getOpcode() == AArch64ISD::NVCAST || N.getOpcode() == ISD::BITCAST) &&
4164 N.getValueType().getScalarSizeInBits() ==
4165 N.getOperand(0).getValueType().getScalarSizeInBits())
4166 N = N.getOperand(0);
4167
4168 auto ImmToFloat = [RegWidth](APInt Imm) {
4169 switch (RegWidth) {
4170 case 16:
4171 return APFloat(APFloat::IEEEhalf(), Imm);
4172 case 32:
4173 return APFloat(APFloat::IEEEsingle(), Imm);
4174 case 64:
4175 return APFloat(APFloat::IEEEdouble(), Imm);
4176 default:
4177 llvm_unreachable("Unexpected RegWidth!");
4178 };
4179 };
4180
4181 APFloat FVal(0.0);
4182 switch (N->getOpcode()) {
4183 case AArch64ISD::MOVIshift:
4184 FVal = ImmToFloat(APInt(RegWidth, N.getConstantOperandVal(0)
4185 << N.getConstantOperandVal(1)));
4186 break;
4187 case AArch64ISD::FMOV:
4188 FVal = ImmToFloat(DecodeFMOVImm(N.getConstantOperandVal(0), RegWidth));
4189 break;
4190 case AArch64ISD::DUP:
4191 if (isa<ConstantSDNode>(N.getOperand(0)))
4192 FVal = ImmToFloat(N.getConstantOperandAPInt(0).trunc(RegWidth));
4193 else
4194 return false;
4195 break;
4196 default:
4197 return false;
4198 }
4199
4200 if (unsigned FBits =
4201 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4202 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4203 return true;
4204 }
4205
4206 return false;
4207}
4208
4209bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4210 unsigned RegWidth) {
4211 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4212 /*isReciprocal*/ false);
4213}
4214
4215bool AArch64DAGToDAGISel::SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos,
4216 unsigned RegWidth) {
4218 CurDAG, N, FixedPos, RegWidth, /*isReciprocal*/ false);
4219}
4220
4221bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperandVec(SDValue N,
4222 SDValue &FixedPos,
4223 unsigned RegWidth) {
4225 CurDAG, N, FixedPos, RegWidth, /*isReciprocal*/ true);
4226}
4227
4228bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4229 SDValue &FixedPos,
4230 unsigned RegWidth) {
4231 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4232 /*isReciprocal*/ true);
4233}
4234
4235// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4236// of the string and obtains the integer values from them and combines these
4237// into a single value to be used in the MRS/MSR instruction.
4240 RegString.split(Fields, ':');
4241
4242 if (Fields.size() == 1)
4243 return -1;
4244
4245 assert(Fields.size() == 5
4246 && "Invalid number of fields in read register string");
4247
4249 bool AllIntFields = true;
4250
4251 for (StringRef Field : Fields) {
4252 unsigned IntField;
4253 AllIntFields &= !Field.getAsInteger(10, IntField);
4254 Ops.push_back(IntField);
4255 }
4256
4257 assert(AllIntFields &&
4258 "Unexpected non-integer value in special register string.");
4259 (void)AllIntFields;
4260
4261 // Need to combine the integer fields of the string into a single value
4262 // based on the bit encoding of MRS/MSR instruction.
4263 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4264 (Ops[3] << 3) | (Ops[4]);
4265}
4266
4267// Lower the read_register intrinsic to an MRS instruction node if the special
4268// register string argument is either of the form detailed in the ALCE (the
4269// form described in getIntOperandsFromRegisterString) or is a named register
4270// known by the MRS SysReg mapper.
4271bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4272 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4273 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4274 SDLoc DL(N);
4275
4276 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4277
4278 unsigned Opcode64Bit = AArch64::MRS;
4279 int Imm = getIntOperandFromRegisterString(RegString->getString());
4280 if (Imm == -1) {
4281 // No match, Use the sysreg mapper to map the remaining possible strings to
4282 // the value for the register to be used for the instruction operand.
4283 const auto *TheReg =
4284 AArch64SysReg::lookupSysRegByName(RegString->getString());
4285 if (TheReg && TheReg->Readable &&
4286 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4287 Imm = TheReg->Encoding;
4288 else
4289 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4290
4291 if (Imm == -1) {
4292 // Still no match, see if this is "pc" or give up.
4293 if (!ReadIs128Bit && RegString->getString() == "pc") {
4294 Opcode64Bit = AArch64::ADR;
4295 Imm = 0;
4296 } else {
4297 return false;
4298 }
4299 }
4300 }
4301
4302 SDValue InChain = N->getOperand(0);
4303 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4304 if (!ReadIs128Bit) {
4305 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4306 {SysRegImm, InChain});
4307 } else {
4308 SDNode *MRRS = CurDAG->getMachineNode(
4309 AArch64::MRRS, DL,
4310 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4311 {SysRegImm, InChain});
4312
4313 // Sysregs are not endian. The even register always contains the low half
4314 // of the register.
4315 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4316 SDValue(MRRS, 0));
4317 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4318 SDValue(MRRS, 0));
4319 SDValue OutChain = SDValue(MRRS, 1);
4320
4321 ReplaceUses(SDValue(N, 0), Lo);
4322 ReplaceUses(SDValue(N, 1), Hi);
4323 ReplaceUses(SDValue(N, 2), OutChain);
4324 };
4325 return true;
4326}
4327
4328// Lower the write_register intrinsic to an MSR instruction node if the special
4329// register string argument is either of the form detailed in the ALCE (the
4330// form described in getIntOperandsFromRegisterString) or is a named register
4331// known by the MSR SysReg mapper.
4332bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4333 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4334 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4335 SDLoc DL(N);
4336
4337 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4338
4339 if (!WriteIs128Bit) {
4340 // Check if the register was one of those allowed as the pstatefield value
4341 // in the MSR (immediate) instruction. To accept the values allowed in the
4342 // pstatefield for the MSR (immediate) instruction, we also require that an
4343 // immediate value has been provided as an argument, we know that this is
4344 // the case as it has been ensured by semantic checking.
4345 auto trySelectPState = [&](auto PMapper, unsigned State) {
4346 if (PMapper) {
4347 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4348 "Expected a constant integer expression.");
4349 unsigned Reg = PMapper->Encoding;
4350 uint64_t Immed = N->getConstantOperandVal(2);
4351 CurDAG->SelectNodeTo(
4352 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4353 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4354 return true;
4355 }
4356 return false;
4357 };
4358
4359 if (trySelectPState(
4360 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4361 AArch64::MSRpstateImm4))
4362 return true;
4363 if (trySelectPState(
4364 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4365 AArch64::MSRpstateImm1))
4366 return true;
4367 }
4368
4369 int Imm = getIntOperandFromRegisterString(RegString->getString());
4370 if (Imm == -1) {
4371 // Use the sysreg mapper to attempt to map the remaining possible strings
4372 // to the value for the register to be used for the MSR (register)
4373 // instruction operand.
4374 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4375 if (TheReg && TheReg->Writeable &&
4376 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4377 Imm = TheReg->Encoding;
4378 else
4379 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4380
4381 if (Imm == -1)
4382 return false;
4383 }
4384
4385 SDValue InChain = N->getOperand(0);
4386 if (!WriteIs128Bit) {
4387 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4388 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4389 N->getOperand(2), InChain);
4390 } else {
4391 // No endian swap. The lower half always goes into the even subreg, and the
4392 // higher half always into the odd supreg.
4393 SDNode *Pair = CurDAG->getMachineNode(
4394 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4395 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4396 MVT::i32),
4397 N->getOperand(2),
4398 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4399 N->getOperand(3),
4400 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4401
4402 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4403 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4404 SDValue(Pair, 0), InChain);
4405 }
4406
4407 return true;
4408}
4409
4410/// We've got special pseudo-instructions for these
4411bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4412 unsigned Opcode;
4413 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4414
4415 // Leave IR for LSE if subtarget supports it.
4416 if (Subtarget->hasLSE()) return false;
4417
4418 if (MemTy == MVT::i8)
4419 Opcode = AArch64::CMP_SWAP_8;
4420 else if (MemTy == MVT::i16)
4421 Opcode = AArch64::CMP_SWAP_16;
4422 else if (MemTy == MVT::i32)
4423 Opcode = AArch64::CMP_SWAP_32;
4424 else if (MemTy == MVT::i64)
4425 Opcode = AArch64::CMP_SWAP_64;
4426 else
4427 llvm_unreachable("Unknown AtomicCmpSwap type");
4428
4429 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4430 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4431 N->getOperand(0)};
4432 SDNode *CmpSwap = CurDAG->getMachineNode(
4433 Opcode, SDLoc(N),
4434 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4435
4436 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4437 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4438
4439 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4440 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4441 CurDAG->RemoveDeadNode(N);
4442
4443 return true;
4444}
4445
4446bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4447 SDValue &Shift, bool Negate) {
4448 if (!isa<ConstantSDNode>(N))
4449 return false;
4450
4451 APInt Val =
4452 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4453
4454 return SelectSVEAddSubImm(SDLoc(N), Val, VT, Imm, Shift, Negate);
4455}
4456
4457bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDLoc DL, APInt Val, MVT VT,
4458 SDValue &Imm, SDValue &Shift,
4459 bool Negate) {
4460 if (Negate)
4461 Val = -Val;
4462
4463 switch (VT.SimpleTy) {
4464 case MVT::i8:
4465 // All immediates are supported.
4466 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4467 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4468 return true;
4469 case MVT::i16:
4470 case MVT::i32:
4471 case MVT::i64:
4472 // Support 8bit unsigned immediates.
4473 if ((Val & ~0xff) == 0) {
4474 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4475 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4476 return true;
4477 }
4478 // Support 16bit unsigned immediates that are a multiple of 256.
4479 if ((Val & ~0xff00) == 0) {
4480 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4481 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4482 return true;
4483 }
4484 break;
4485 default:
4486 break;
4487 }
4488
4489 return false;
4490}
4491
4492bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4493 SDValue &Imm, SDValue &Shift,
4494 bool Negate) {
4495 if (!isa<ConstantSDNode>(N))
4496 return false;
4497
4498 SDLoc DL(N);
4499 int64_t Val = cast<ConstantSDNode>(N)
4500 ->getAPIntValue()
4502 .getSExtValue();
4503
4504 if (Negate)
4505 Val = -Val;
4506
4507 // Signed saturating instructions treat their immediate operand as unsigned,
4508 // whereas the related intrinsics define their operands to be signed. This
4509 // means we can only use the immediate form when the operand is non-negative.
4510 if (Val < 0)
4511 return false;
4512
4513 switch (VT.SimpleTy) {
4514 case MVT::i8:
4515 // All positive immediates are supported.
4516 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4517 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4518 return true;
4519 case MVT::i16:
4520 case MVT::i32:
4521 case MVT::i64:
4522 // Support 8bit positive immediates.
4523 if (Val <= 255) {
4524 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4525 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4526 return true;
4527 }
4528 // Support 16bit positive immediates that are a multiple of 256.
4529 if (Val <= 65280 && Val % 256 == 0) {
4530 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4531 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4532 return true;
4533 }
4534 break;
4535 default:
4536 break;
4537 }
4538
4539 return false;
4540}
4541
4542bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4543 SDValue &Shift) {
4544 if (!isa<ConstantSDNode>(N))
4545 return false;
4546
4547 SDLoc DL(N);
4548 int64_t Val = cast<ConstantSDNode>(N)
4549 ->getAPIntValue()
4550 .trunc(VT.getFixedSizeInBits())
4551 .getSExtValue();
4552 int32_t ImmVal, ShiftVal;
4553 if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
4554 ShiftVal))
4555 return false;
4556
4557 Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
4558 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4559 return true;
4560}
4561
4562bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4563 if (auto CNode = dyn_cast<ConstantSDNode>(N))
4564 return SelectSVESignedArithImm(SDLoc(N), CNode->getAPIntValue(), Imm);
4565 return false;
4566}
4567
4568bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDLoc DL, APInt Val,
4569 SDValue &Imm) {
4570 int64_t ImmVal = Val.getSExtValue();
4571 if (ImmVal >= -128 && ImmVal < 128) {
4572 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4573 return true;
4574 }
4575 return false;
4576}
4577
4578bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4579 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4580 uint64_t ImmVal = CNode->getZExtValue();
4581
4582 switch (VT.SimpleTy) {
4583 case MVT::i8:
4584 ImmVal &= 0xFF;
4585 break;
4586 case MVT::i16:
4587 ImmVal &= 0xFFFF;
4588 break;
4589 case MVT::i32:
4590 ImmVal &= 0xFFFFFFFF;
4591 break;
4592 case MVT::i64:
4593 break;
4594 default:
4595 llvm_unreachable("Unexpected type");
4596 }
4597
4598 if (ImmVal < 256) {
4599 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4600 return true;
4601 }
4602 }
4603 return false;
4604}
4605
4606bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4607 bool Invert) {
4608 uint64_t ImmVal;
4609 if (auto CI = dyn_cast<ConstantSDNode>(N))
4610 ImmVal = CI->getZExtValue();
4611 else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
4612 ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4613 else
4614 return false;
4615
4616 if (Invert)
4617 ImmVal = ~ImmVal;
4618
4619 uint64_t encoding;
4620 if (!AArch64_AM::isSVELogicalImm(VT.getScalarSizeInBits(), ImmVal, encoding))
4621 return false;
4622
4623 Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
4624 return true;
4625}
4626
4627// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4628// Rather than attempt to normalise everything we can sometimes saturate the
4629// shift amount during selection. This function also allows for consistent
4630// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4631// required by the instructions.
4632bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4633 uint64_t High, bool AllowSaturation,
4634 SDValue &Imm) {
4635 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4636 uint64_t ImmVal = CN->getZExtValue();
4637
4638 // Reject shift amounts that are too small.
4639 if (ImmVal < Low)
4640 return false;
4641
4642 // Reject or saturate shift amounts that are too big.
4643 if (ImmVal > High) {
4644 if (!AllowSaturation)
4645 return false;
4646 ImmVal = High;
4647 }
4648
4649 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4650 return true;
4651 }
4652
4653 return false;
4654}
4655
4656bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4657 // tagp(FrameIndex, IRGstack, tag_offset):
4658 // since the offset between FrameIndex and IRGstack is a compile-time
4659 // constant, this can be lowered to a single ADDG instruction.
4660 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4661 return false;
4662 }
4663
4664 SDValue IRG_SP = N->getOperand(2);
4665 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4666 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4667 return false;
4668 }
4669
4670 const TargetLowering *TLI = getTargetLowering();
4671 SDLoc DL(N);
4672 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4673 SDValue FiOp = CurDAG->getTargetFrameIndex(
4674 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4675 int TagOffset = N->getConstantOperandVal(3);
4676
4677 SDNode *Out = CurDAG->getMachineNode(
4678 AArch64::TAGPstack, DL, MVT::i64,
4679 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4680 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4681 ReplaceNode(N, Out);
4682 return true;
4683}
4684
4685void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4686 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4687 "llvm.aarch64.tagp third argument must be an immediate");
4688 if (trySelectStackSlotTagP(N))
4689 return;
4690 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4691 // compile-time constant, not just for stack allocations.
4692
4693 // General case for unrelated pointers in Op1 and Op2.
4694 SDLoc DL(N);
4695 int TagOffset = N->getConstantOperandVal(3);
4696 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4697 {N->getOperand(1), N->getOperand(2)});
4698 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4699 {SDValue(N1, 0), N->getOperand(2)});
4700 SDNode *N3 = CurDAG->getMachineNode(
4701 AArch64::ADDG, DL, MVT::i64,
4702 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4703 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4704 ReplaceNode(N, N3);
4705}
4706
4707bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4708 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4709
4710 // Bail when not a "cast" like insert_subvector.
4711 if (N->getConstantOperandVal(2) != 0)
4712 return false;
4713 if (!N->getOperand(0).isUndef())
4714 return false;
4715
4716 // Bail when normal isel should do the job.
4717 EVT VT = N->getValueType(0);
4718 EVT InVT = N->getOperand(1).getValueType();
4719 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4720 return false;
4721 if (InVT.getSizeInBits() <= 128)
4722 return false;
4723
4724 // NOTE: We can only get here when doing fixed length SVE code generation.
4725 // We do manual selection because the types involved are not linked to real
4726 // registers (despite being legal) and must be coerced into SVE registers.
4727
4729 "Expected to insert into a packed scalable vector!");
4730
4731 SDLoc DL(N);
4732 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4733 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4734 N->getOperand(1), RC));
4735 return true;
4736}
4737
4738bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4739 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4740
4741 // Bail when not a "cast" like extract_subvector.
4742 if (N->getConstantOperandVal(1) != 0)
4743 return false;
4744
4745 // Bail when normal isel can do the job.
4746 EVT VT = N->getValueType(0);
4747 EVT InVT = N->getOperand(0).getValueType();
4748 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4749 return false;
4750 if (VT.getSizeInBits() <= 128)
4751 return false;
4752
4753 // NOTE: We can only get here when doing fixed length SVE code generation.
4754 // We do manual selection because the types involved are not linked to real
4755 // registers (despite being legal) and must be coerced into SVE registers.
4756
4758 "Expected to extract from a packed scalable vector!");
4759
4760 SDLoc DL(N);
4761 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4762 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4763 N->getOperand(0), RC));
4764 return true;
4765}
4766
4767bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4768 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4769
4770 SDValue N0 = N->getOperand(0);
4771 SDValue N1 = N->getOperand(1);
4772
4773 EVT VT = N->getValueType(0);
4774 SDLoc DL(N);
4775
4776 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4777 // Rotate by a constant is a funnel shift in IR which is expanded to
4778 // an OR with shifted operands.
4779 // We do the following transform:
4780 // OR N0, N1 -> xar (x, y, imm)
4781 // Where:
4782 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4783 // N0 = SHL_PRED true, V, splat(bits-imm)
4784 // V = (xor x, y)
4785 if (VT.isScalableVector() &&
4786 (Subtarget->hasSVE2() ||
4787 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4788 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4789 N1.getOpcode() != AArch64ISD::SRL_PRED)
4790 std::swap(N0, N1);
4791 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4792 N1.getOpcode() != AArch64ISD::SRL_PRED)
4793 return false;
4794
4795 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4796 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4797 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4798 return false;
4799
4800 if (N0.getOperand(1) != N1.getOperand(1))
4801 return false;
4802
4803 SDValue R1, R2;
4804 bool IsXOROperand = true;
4805 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4806 IsXOROperand = false;
4807 } else {
4808 R1 = N0.getOperand(1).getOperand(0);
4809 R2 = N1.getOperand(1).getOperand(1);
4810 }
4811
4812 APInt ShlAmt, ShrAmt;
4813 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4815 return false;
4816
4817 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4818 return false;
4819
4820 if (!IsXOROperand) {
4821 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4822 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4823 SDValue MOVIV = SDValue(MOV, 0);
4824
4825 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4826 SDNode *SubRegToReg =
4827 CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT, MOVIV, ZSub);
4828
4829 R1 = N1->getOperand(1);
4830 R2 = SDValue(SubRegToReg, 0);
4831 }
4832
4833 SDValue Imm =
4834 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4835
4836 SDValue Ops[] = {R1, R2, Imm};
4838 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4839 AArch64::XAR_ZZZI_D})) {
4840 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4841 return true;
4842 }
4843 return false;
4844 }
4845
4846 // We have Neon SHA3 XAR operation for v2i64 but for types
4847 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4848 // is available.
4849 EVT SVT;
4850 switch (VT.getSimpleVT().SimpleTy) {
4851 case MVT::v4i32:
4852 case MVT::v2i32:
4853 SVT = MVT::nxv4i32;
4854 break;
4855 case MVT::v8i16:
4856 case MVT::v4i16:
4857 SVT = MVT::nxv8i16;
4858 break;
4859 case MVT::v16i8:
4860 case MVT::v8i8:
4861 SVT = MVT::nxv16i8;
4862 break;
4863 case MVT::v2i64:
4864 case MVT::v1i64:
4865 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4866 break;
4867 default:
4868 return false;
4869 }
4870
4871 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4872 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4873 return false;
4874
4875 if (N0->getOpcode() != AArch64ISD::VSHL ||
4876 N1->getOpcode() != AArch64ISD::VLSHR)
4877 return false;
4878
4879 if (N0->getOperand(0) != N1->getOperand(0))
4880 return false;
4881
4882 SDValue R1, R2;
4883 bool IsXOROperand = true;
4884 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4885 IsXOROperand = false;
4886 } else {
4887 SDValue XOR = N0.getOperand(0);
4888 R1 = XOR.getOperand(0);
4889 R2 = XOR.getOperand(1);
4890 }
4891
4892 unsigned HsAmt = N0.getConstantOperandVal(1);
4893 unsigned ShAmt = N1.getConstantOperandVal(1);
4894
4895 SDValue Imm = CurDAG->getTargetConstant(
4896 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4897
4898 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4899 if (ShAmt + HsAmt != VTSizeInBits)
4900 return false;
4901
4902 if (!IsXOROperand) {
4903 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4904 SDNode *MOV =
4905 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4906 SDValue MOVIV = SDValue(MOV, 0);
4907
4908 R1 = N1->getOperand(0);
4909 R2 = MOVIV;
4910 }
4911
4912 if (SVT != VT) {
4913 SDValue Undef =
4914 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4915
4916 if (SVT.isScalableVector() && VT.is64BitVector()) {
4917 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4918
4919 SDValue UndefQ = SDValue(
4920 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4921 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4922
4923 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4924 UndefQ, R1, DSub),
4925 0);
4926 if (R2.getValueType() == VT)
4927 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4928 UndefQ, R2, DSub),
4929 0);
4930 }
4931
4932 SDValue SubReg = CurDAG->getTargetConstant(
4933 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4934
4935 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4936 R1, SubReg),
4937 0);
4938
4939 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4940 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4941 Undef, R2, SubReg),
4942 0);
4943 }
4944
4945 SDValue Ops[] = {R1, R2, Imm};
4946 SDNode *XAR = nullptr;
4947
4948 if (SVT.isScalableVector()) {
4950 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4951 AArch64::XAR_ZZZI_D}))
4952 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4953 } else {
4954 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4955 }
4956
4957 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4958
4959 if (SVT != VT) {
4960 if (VT.is64BitVector() && SVT.isScalableVector()) {
4961 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4962
4963 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4964 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4965 SDValue(XAR, 0), ZSub);
4966
4967 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4968 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4969 SDValue(Q, 0), DSub);
4970 } else {
4971 SDValue SubReg = CurDAG->getTargetConstant(
4972 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4973 MVT::i32);
4974 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4975 SDValue(XAR, 0), SubReg);
4976 }
4977 }
4978 ReplaceNode(N, XAR);
4979 return true;
4980}
4981
4982void AArch64DAGToDAGISel::Select(SDNode *Node) {
4983 // If we have a custom node, we already have selected!
4984 if (Node->isMachineOpcode()) {
4985 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4986 Node->setNodeId(-1);
4987 return;
4988 }
4989
4990 // Few custom selection stuff.
4991 EVT VT = Node->getValueType(0);
4992
4993 switch (Node->getOpcode()) {
4994 default:
4995 break;
4996
4998 if (SelectCMP_SWAP(Node))
4999 return;
5000 break;
5001
5002 case ISD::READ_REGISTER:
5003 case AArch64ISD::MRRS:
5004 if (tryReadRegister(Node))
5005 return;
5006 break;
5007
5009 case AArch64ISD::MSRR:
5010 if (tryWriteRegister(Node))
5011 return;
5012 break;
5013
5014 case ISD::LOAD: {
5015 // Try to select as an indexed load. Fall through to normal processing
5016 // if we can't.
5017 if (tryIndexedLoad(Node))
5018 return;
5019 break;
5020 }
5021
5022 case ISD::SRL:
5023 case ISD::AND:
5024 case ISD::SRA:
5026 if (tryBitfieldExtractOp(Node))
5027 return;
5028 if (tryBitfieldInsertInZeroOp(Node))
5029 return;
5030 [[fallthrough]];
5031 case ISD::ROTR:
5032 case ISD::SHL:
5033 if (tryShiftAmountMod(Node))
5034 return;
5035 break;
5036
5037 case ISD::SIGN_EXTEND:
5038 if (tryBitfieldExtractOpFromSExt(Node))
5039 return;
5040 break;
5041
5042 case ISD::OR:
5043 if (tryBitfieldInsertOp(Node))
5044 return;
5045 if (trySelectXAR(Node))
5046 return;
5047 break;
5048
5050 if (trySelectCastScalableToFixedLengthVector(Node))
5051 return;
5052 break;
5053 }
5054
5055 case ISD::INSERT_SUBVECTOR: {
5056 if (trySelectCastFixedLengthToScalableVector(Node))
5057 return;
5058 break;
5059 }
5060
5061 case ISD::Constant: {
5062 // Materialize zero constants as copies from WZR/XZR. This allows
5063 // the coalescer to propagate these into other instructions.
5064 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
5065 if (ConstNode->isZero()) {
5066 if (VT == MVT::i32) {
5067 SDValue New = CurDAG->getCopyFromReg(
5068 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
5069 ReplaceNode(Node, New.getNode());
5070 return;
5071 } else if (VT == MVT::i64) {
5072 SDValue New = CurDAG->getCopyFromReg(
5073 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
5074 ReplaceNode(Node, New.getNode());
5075 return;
5076 }
5077 }
5078 break;
5079 }
5080
5081 case ISD::FrameIndex: {
5082 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
5083 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
5084 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
5085 const TargetLowering *TLI = getTargetLowering();
5086 SDValue TFI = CurDAG->getTargetFrameIndex(
5087 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
5088 SDLoc DL(Node);
5089 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
5090 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
5091 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
5092 return;
5093 }
5095 unsigned IntNo = Node->getConstantOperandVal(1);
5096 switch (IntNo) {
5097 default:
5098 break;
5099 case Intrinsic::aarch64_gcsss: {
5100 SDLoc DL(Node);
5101 SDValue Chain = Node->getOperand(0);
5102 SDValue Val = Node->getOperand(2);
5103 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
5104 SDNode *SS1 =
5105 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
5106 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
5107 MVT::Other, Zero, SDValue(SS1, 0));
5108 ReplaceNode(Node, SS2);
5109 return;
5110 }
5111 case Intrinsic::aarch64_ldaxp:
5112 case Intrinsic::aarch64_ldxp: {
5113 unsigned Op =
5114 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
5115 SDValue MemAddr = Node->getOperand(2);
5116 SDLoc DL(Node);
5117 SDValue Chain = Node->getOperand(0);
5118
5119 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
5120 MVT::Other, MemAddr, Chain);
5121
5122 // Transfer memoperands.
5123 MachineMemOperand *MemOp =
5124 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
5125 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
5126 ReplaceNode(Node, Ld);
5127 return;
5128 }
5129 case Intrinsic::aarch64_stlxp:
5130 case Intrinsic::aarch64_stxp: {
5131 unsigned Op =
5132 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
5133 SDLoc DL(Node);
5134 SDValue Chain = Node->getOperand(0);
5135 SDValue ValLo = Node->getOperand(2);
5136 SDValue ValHi = Node->getOperand(3);
5137 SDValue MemAddr = Node->getOperand(4);
5138
5139 // Place arguments in the right order.
5140 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
5141
5142 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
5143 // Transfer memoperands.
5144 MachineMemOperand *MemOp =
5145 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
5146 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
5147
5148 ReplaceNode(Node, St);
5149 return;
5150 }
5151 case Intrinsic::aarch64_neon_ld1x2:
5152 if (VT == MVT::v8i8) {
5153 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
5154 return;
5155 } else if (VT == MVT::v16i8) {
5156 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
5157 return;
5158 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5159 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
5160 return;
5161 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5162 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
5163 return;
5164 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5165 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
5166 return;
5167 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5168 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
5169 return;
5170 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5171 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5172 return;
5173 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5174 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
5175 return;
5176 }
5177 break;
5178 case Intrinsic::aarch64_neon_ld1x3:
5179 if (VT == MVT::v8i8) {
5180 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
5181 return;
5182 } else if (VT == MVT::v16i8) {
5183 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
5184 return;
5185 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5186 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5187 return;
5188 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5189 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5190 return;
5191 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5192 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5193 return;
5194 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5195 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5196 return;
5197 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5198 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5199 return;
5200 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5201 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5202 return;
5203 }
5204 break;
5205 case Intrinsic::aarch64_neon_ld1x4:
5206 if (VT == MVT::v8i8) {
5207 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5208 return;
5209 } else if (VT == MVT::v16i8) {
5210 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5211 return;
5212 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5213 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5214 return;
5215 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5216 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5217 return;
5218 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5219 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5220 return;
5221 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5222 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5223 return;
5224 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5225 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5226 return;
5227 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5228 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5229 return;
5230 }
5231 break;
5232 case Intrinsic::aarch64_neon_ld2:
5233 if (VT == MVT::v8i8) {
5234 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5235 return;
5236 } else if (VT == MVT::v16i8) {
5237 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5238 return;
5239 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5240 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5241 return;
5242 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5243 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5244 return;
5245 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5246 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5247 return;
5248 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5249 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5250 return;
5251 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5252 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5253 return;
5254 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5255 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5256 return;
5257 }
5258 break;
5259 case Intrinsic::aarch64_neon_ld3:
5260 if (VT == MVT::v8i8) {
5261 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5262 return;
5263 } else if (VT == MVT::v16i8) {
5264 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5265 return;
5266 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5267 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5268 return;
5269 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5270 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5271 return;
5272 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5273 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5274 return;
5275 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5276 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5277 return;
5278 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5279 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5280 return;
5281 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5282 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5283 return;
5284 }
5285 break;
5286 case Intrinsic::aarch64_neon_ld4:
5287 if (VT == MVT::v8i8) {
5288 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5289 return;
5290 } else if (VT == MVT::v16i8) {
5291 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5292 return;
5293 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5294 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5295 return;
5296 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5297 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5298 return;
5299 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5300 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5301 return;
5302 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5303 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5304 return;
5305 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5306 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5307 return;
5308 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5309 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5310 return;
5311 }
5312 break;
5313 case Intrinsic::aarch64_neon_ld2r:
5314 if (VT == MVT::v8i8) {
5315 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5316 return;
5317 } else if (VT == MVT::v16i8) {
5318 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5319 return;
5320 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5321 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5322 return;
5323 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5324 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5325 return;
5326 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5327 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5328 return;
5329 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5330 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5331 return;
5332 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5333 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5334 return;
5335 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5336 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5337 return;
5338 }
5339 break;
5340 case Intrinsic::aarch64_neon_ld3r:
5341 if (VT == MVT::v8i8) {
5342 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5343 return;
5344 } else if (VT == MVT::v16i8) {
5345 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5346 return;
5347 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5348 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5349 return;
5350 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5351 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5352 return;
5353 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5354 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5355 return;
5356 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5357 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5358 return;
5359 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5360 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5361 return;
5362 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5363 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5364 return;
5365 }
5366 break;
5367 case Intrinsic::aarch64_neon_ld4r:
5368 if (VT == MVT::v8i8) {
5369 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5370 return;
5371 } else if (VT == MVT::v16i8) {
5372 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5373 return;
5374 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5375 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5376 return;
5377 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5378 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5379 return;
5380 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5381 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5382 return;
5383 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5384 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5385 return;
5386 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5387 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5388 return;
5389 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5390 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5391 return;
5392 }
5393 break;
5394 case Intrinsic::aarch64_neon_ld2lane:
5395 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5396 SelectLoadLane(Node, 2, AArch64::LD2i8);
5397 return;
5398 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5399 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5400 SelectLoadLane(Node, 2, AArch64::LD2i16);
5401 return;
5402 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5403 VT == MVT::v2f32) {
5404 SelectLoadLane(Node, 2, AArch64::LD2i32);
5405 return;
5406 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5407 VT == MVT::v1f64) {
5408 SelectLoadLane(Node, 2, AArch64::LD2i64);
5409 return;
5410 }
5411 break;
5412 case Intrinsic::aarch64_neon_ld3lane:
5413 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5414 SelectLoadLane(Node, 3, AArch64::LD3i8);
5415 return;
5416 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5417 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5418 SelectLoadLane(Node, 3, AArch64::LD3i16);
5419 return;
5420 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5421 VT == MVT::v2f32) {
5422 SelectLoadLane(Node, 3, AArch64::LD3i32);
5423 return;
5424 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5425 VT == MVT::v1f64) {
5426 SelectLoadLane(Node, 3, AArch64::LD3i64);
5427 return;
5428 }
5429 break;
5430 case Intrinsic::aarch64_neon_ld4lane:
5431 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5432 SelectLoadLane(Node, 4, AArch64::LD4i8);
5433 return;
5434 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5435 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5436 SelectLoadLane(Node, 4, AArch64::LD4i16);
5437 return;
5438 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5439 VT == MVT::v2f32) {
5440 SelectLoadLane(Node, 4, AArch64::LD4i32);
5441 return;
5442 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5443 VT == MVT::v1f64) {
5444 SelectLoadLane(Node, 4, AArch64::LD4i64);
5445 return;
5446 }
5447 break;
5448 case Intrinsic::aarch64_ld64b:
5449 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5450 return;
5451 case Intrinsic::aarch64_sve_ld2q_sret: {
5452 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5453 return;
5454 }
5455 case Intrinsic::aarch64_sve_ld3q_sret: {
5456 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5457 return;
5458 }
5459 case Intrinsic::aarch64_sve_ld4q_sret: {
5460 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5461 return;
5462 }
5463 case Intrinsic::aarch64_sve_ld2_sret: {
5464 if (VT == MVT::nxv16i8) {
5465 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5466 true);
5467 return;
5468 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5469 VT == MVT::nxv8bf16) {
5470 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5471 true);
5472 return;
5473 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5474 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5475 true);
5476 return;
5477 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5478 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5479 true);
5480 return;
5481 }
5482 break;
5483 }
5484 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5485 if (VT == MVT::nxv16i8) {
5486 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5487 SelectContiguousMultiVectorLoad(
5488 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5489 else if (Subtarget->hasSVE2p1())
5490 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5491 AArch64::LD1B_2Z);
5492 else
5493 break;
5494 return;
5495 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5496 VT == MVT::nxv8bf16) {
5497 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5498 SelectContiguousMultiVectorLoad(
5499 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5500 else if (Subtarget->hasSVE2p1())
5501 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5502 AArch64::LD1H_2Z);
5503 else
5504 break;
5505 return;
5506 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5507 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5508 SelectContiguousMultiVectorLoad(
5509 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5510 else if (Subtarget->hasSVE2p1())
5511 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5512 AArch64::LD1W_2Z);
5513 else
5514 break;
5515 return;
5516 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5517 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5518 SelectContiguousMultiVectorLoad(
5519 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5520 else if (Subtarget->hasSVE2p1())
5521 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5522 AArch64::LD1D_2Z);
5523 else
5524 break;
5525 return;
5526 }
5527 break;
5528 }
5529 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5530 if (VT == MVT::nxv16i8) {
5531 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5532 SelectContiguousMultiVectorLoad(
5533 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5534 else if (Subtarget->hasSVE2p1())
5535 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5536 AArch64::LD1B_4Z);
5537 else
5538 break;
5539 return;
5540 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5541 VT == MVT::nxv8bf16) {
5542 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5543 SelectContiguousMultiVectorLoad(
5544 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5545 else if (Subtarget->hasSVE2p1())
5546 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5547 AArch64::LD1H_4Z);
5548 else
5549 break;
5550 return;
5551 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5552 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5553 SelectContiguousMultiVectorLoad(
5554 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5555 else if (Subtarget->hasSVE2p1())
5556 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5557 AArch64::LD1W_4Z);
5558 else
5559 break;
5560 return;
5561 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5562 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5563 SelectContiguousMultiVectorLoad(
5564 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5565 else if (Subtarget->hasSVE2p1())
5566 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5567 AArch64::LD1D_4Z);
5568 else
5569 break;
5570 return;
5571 }
5572 break;
5573 }
5574 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5575 if (VT == MVT::nxv16i8) {
5576 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5577 SelectContiguousMultiVectorLoad(Node, 2, 0,
5578 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5579 AArch64::LDNT1B_2Z_PSEUDO);
5580 else if (Subtarget->hasSVE2p1())
5581 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5582 AArch64::LDNT1B_2Z);
5583 else
5584 break;
5585 return;
5586 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5587 VT == MVT::nxv8bf16) {
5588 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5589 SelectContiguousMultiVectorLoad(Node, 2, 1,
5590 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5591 AArch64::LDNT1H_2Z_PSEUDO);
5592 else if (Subtarget->hasSVE2p1())
5593 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5594 AArch64::LDNT1H_2Z);
5595 else
5596 break;
5597 return;
5598 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5599 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5600 SelectContiguousMultiVectorLoad(Node, 2, 2,
5601 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5602 AArch64::LDNT1W_2Z_PSEUDO);
5603 else if (Subtarget->hasSVE2p1())
5604 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5605 AArch64::LDNT1W_2Z);
5606 else
5607 break;
5608 return;
5609 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5610 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5611 SelectContiguousMultiVectorLoad(Node, 2, 3,
5612 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5613 AArch64::LDNT1D_2Z_PSEUDO);
5614 else if (Subtarget->hasSVE2p1())
5615 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5616 AArch64::LDNT1D_2Z);
5617 else
5618 break;
5619 return;
5620 }
5621 break;
5622 }
5623 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5624 if (VT == MVT::nxv16i8) {
5625 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5626 SelectContiguousMultiVectorLoad(Node, 4, 0,
5627 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5628 AArch64::LDNT1B_4Z_PSEUDO);
5629 else if (Subtarget->hasSVE2p1())
5630 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5631 AArch64::LDNT1B_4Z);
5632 else
5633 break;
5634 return;
5635 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5636 VT == MVT::nxv8bf16) {
5637 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5638 SelectContiguousMultiVectorLoad(Node, 4, 1,
5639 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5640 AArch64::LDNT1H_4Z_PSEUDO);
5641 else if (Subtarget->hasSVE2p1())
5642 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5643 AArch64::LDNT1H_4Z);
5644 else
5645 break;
5646 return;
5647 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5648 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5649 SelectContiguousMultiVectorLoad(Node, 4, 2,
5650 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5651 AArch64::LDNT1W_4Z_PSEUDO);
5652 else if (Subtarget->hasSVE2p1())
5653 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5654 AArch64::LDNT1W_4Z);
5655 else
5656 break;
5657 return;
5658 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5659 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5660 SelectContiguousMultiVectorLoad(Node, 4, 3,
5661 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5662 AArch64::LDNT1D_4Z_PSEUDO);
5663 else if (Subtarget->hasSVE2p1())
5664 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5665 AArch64::LDNT1D_4Z);
5666 else
5667 break;
5668 return;
5669 }
5670 break;
5671 }
5672 case Intrinsic::aarch64_sve_ld3_sret: {
5673 if (VT == MVT::nxv16i8) {
5674 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5675 true);
5676 return;
5677 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5678 VT == MVT::nxv8bf16) {
5679 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5680 true);
5681 return;
5682 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5683 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5684 true);
5685 return;
5686 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5687 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5688 true);
5689 return;
5690 }
5691 break;
5692 }
5693 case Intrinsic::aarch64_sve_ld4_sret: {
5694 if (VT == MVT::nxv16i8) {
5695 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5696 true);
5697 return;
5698 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5699 VT == MVT::nxv8bf16) {
5700 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5701 true);
5702 return;
5703 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5704 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5705 true);
5706 return;
5707 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5708 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5709 true);
5710 return;
5711 }
5712 break;
5713 }
5714 case Intrinsic::aarch64_sme_read_hor_vg2: {
5715 if (VT == MVT::nxv16i8) {
5716 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5717 AArch64::MOVA_2ZMXI_H_B);
5718 return;
5719 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5720 VT == MVT::nxv8bf16) {
5721 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5722 AArch64::MOVA_2ZMXI_H_H);
5723 return;
5724 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5725 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5726 AArch64::MOVA_2ZMXI_H_S);
5727 return;
5728 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5729 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5730 AArch64::MOVA_2ZMXI_H_D);
5731 return;
5732 }
5733 break;
5734 }
5735 case Intrinsic::aarch64_sme_read_ver_vg2: {
5736 if (VT == MVT::nxv16i8) {
5737 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5738 AArch64::MOVA_2ZMXI_V_B);
5739 return;
5740 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5741 VT == MVT::nxv8bf16) {
5742 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5743 AArch64::MOVA_2ZMXI_V_H);
5744 return;
5745 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5746 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5747 AArch64::MOVA_2ZMXI_V_S);
5748 return;
5749 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5750 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5751 AArch64::MOVA_2ZMXI_V_D);
5752 return;
5753 }
5754 break;
5755 }
5756 case Intrinsic::aarch64_sme_read_hor_vg4: {
5757 if (VT == MVT::nxv16i8) {
5758 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5759 AArch64::MOVA_4ZMXI_H_B);
5760 return;
5761 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5762 VT == MVT::nxv8bf16) {
5763 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5764 AArch64::MOVA_4ZMXI_H_H);
5765 return;
5766 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5767 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5768 AArch64::MOVA_4ZMXI_H_S);
5769 return;
5770 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5771 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5772 AArch64::MOVA_4ZMXI_H_D);
5773 return;
5774 }
5775 break;
5776 }
5777 case Intrinsic::aarch64_sme_read_ver_vg4: {
5778 if (VT == MVT::nxv16i8) {
5779 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5780 AArch64::MOVA_4ZMXI_V_B);
5781 return;
5782 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5783 VT == MVT::nxv8bf16) {
5784 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5785 AArch64::MOVA_4ZMXI_V_H);
5786 return;
5787 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5788 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5789 AArch64::MOVA_4ZMXI_V_S);
5790 return;
5791 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5792 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5793 AArch64::MOVA_4ZMXI_V_D);
5794 return;
5795 }
5796 break;
5797 }
5798 case Intrinsic::aarch64_sme_read_vg1x2: {
5799 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5800 AArch64::MOVA_VG2_2ZMXI);
5801 return;
5802 }
5803 case Intrinsic::aarch64_sme_read_vg1x4: {
5804 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5805 AArch64::MOVA_VG4_4ZMXI);
5806 return;
5807 }
5808 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5809 if (VT == MVT::nxv16i8) {
5810 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5811 return;
5812 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5813 VT == MVT::nxv8bf16) {
5814 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5815 return;
5816 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5817 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5818 return;
5819 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5820 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5821 return;
5822 }
5823 break;
5824 }
5825 case Intrinsic::aarch64_sme_readz_vert_x2: {
5826 if (VT == MVT::nxv16i8) {
5827 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5828 return;
5829 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5830 VT == MVT::nxv8bf16) {
5831 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5832 return;
5833 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5834 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5835 return;
5836 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5837 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5838 return;
5839 }
5840 break;
5841 }
5842 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5843 if (VT == MVT::nxv16i8) {
5844 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5845 return;
5846 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5847 VT == MVT::nxv8bf16) {
5848 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5849 return;
5850 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5851 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5852 return;
5853 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5854 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5855 return;
5856 }
5857 break;
5858 }
5859 case Intrinsic::aarch64_sme_readz_vert_x4: {
5860 if (VT == MVT::nxv16i8) {
5861 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5862 return;
5863 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5864 VT == MVT::nxv8bf16) {
5865 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5866 return;
5867 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5868 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5869 return;
5870 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5871 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5872 return;
5873 }
5874 break;
5875 }
5876 case Intrinsic::aarch64_sme_readz_x2: {
5877 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5878 AArch64::ZA);
5879 return;
5880 }
5881 case Intrinsic::aarch64_sme_readz_x4: {
5882 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5883 AArch64::ZA);
5884 return;
5885 }
5886 case Intrinsic::swift_async_context_addr: {
5887 SDLoc DL(Node);
5888 SDValue Chain = Node->getOperand(0);
5889 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5890 SDValue Res = SDValue(
5891 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5892 CurDAG->getTargetConstant(8, DL, MVT::i32),
5893 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5894 0);
5895 ReplaceUses(SDValue(Node, 0), Res);
5896 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5897 CurDAG->RemoveDeadNode(Node);
5898
5899 auto &MF = CurDAG->getMachineFunction();
5900 MF.getFrameInfo().setFrameAddressIsTaken(true);
5901 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5902 return;
5903 }
5904 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5906 Node->getValueType(0),
5907 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5908 AArch64::LUTI2_4ZTZI_S}))
5909 // Second Immediate must be <= 3:
5910 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5911 return;
5912 }
5913 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5915 Node->getValueType(0),
5916 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5917 // Second Immediate must be <= 1:
5918 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5919 return;
5920 }
5921 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5923 Node->getValueType(0),
5924 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5925 AArch64::LUTI2_2ZTZI_S}))
5926 // Second Immediate must be <= 7:
5927 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5928 return;
5929 }
5930 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5932 Node->getValueType(0),
5933 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5934 AArch64::LUTI4_2ZTZI_S}))
5935 // Second Immediate must be <= 3:
5936 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5937 return;
5938 }
5939 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5940 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5941 return;
5942 }
5943 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5945 Node->getValueType(0),
5946 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5947 SelectCVTIntrinsicFP8(Node, 2, Opc);
5948 return;
5949 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5951 Node->getValueType(0),
5952 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5953 SelectCVTIntrinsicFP8(Node, 2, Opc);
5954 return;
5955 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5957 Node->getValueType(0),
5958 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5959 SelectCVTIntrinsicFP8(Node, 2, Opc);
5960 return;
5961 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5963 Node->getValueType(0),
5964 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5965 SelectCVTIntrinsicFP8(Node, 2, Opc);
5966 return;
5967 case Intrinsic::ptrauth_resign_load_relative:
5968 SelectPtrauthResign(Node);
5969 return;
5970 }
5971 } break;
5973 unsigned IntNo = Node->getConstantOperandVal(0);
5974 switch (IntNo) {
5975 default:
5976 break;
5977 case Intrinsic::aarch64_tagp:
5978 SelectTagP(Node);
5979 return;
5980
5981 case Intrinsic::ptrauth_auth:
5982 SelectPtrauthAuth(Node);
5983 return;
5984
5985 case Intrinsic::ptrauth_resign:
5986 SelectPtrauthResign(Node);
5987 return;
5988
5989 case Intrinsic::aarch64_neon_tbl2:
5990 SelectTable(Node, 2,
5991 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5992 false);
5993 return;
5994 case Intrinsic::aarch64_neon_tbl3:
5995 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5996 : AArch64::TBLv16i8Three,
5997 false);
5998 return;
5999 case Intrinsic::aarch64_neon_tbl4:
6000 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
6001 : AArch64::TBLv16i8Four,
6002 false);
6003 return;
6004 case Intrinsic::aarch64_neon_tbx2:
6005 SelectTable(Node, 2,
6006 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
6007 true);
6008 return;
6009 case Intrinsic::aarch64_neon_tbx3:
6010 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
6011 : AArch64::TBXv16i8Three,
6012 true);
6013 return;
6014 case Intrinsic::aarch64_neon_tbx4:
6015 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
6016 : AArch64::TBXv16i8Four,
6017 true);
6018 return;
6019 case Intrinsic::aarch64_sve_srshl_single_x2:
6021 Node->getValueType(0),
6022 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
6023 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
6024 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6025 return;
6026 case Intrinsic::aarch64_sve_srshl_single_x4:
6028 Node->getValueType(0),
6029 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
6030 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
6031 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6032 return;
6033 case Intrinsic::aarch64_sve_urshl_single_x2:
6035 Node->getValueType(0),
6036 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
6037 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
6038 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6039 return;
6040 case Intrinsic::aarch64_sve_urshl_single_x4:
6042 Node->getValueType(0),
6043 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
6044 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
6045 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6046 return;
6047 case Intrinsic::aarch64_sve_srshl_x2:
6049 Node->getValueType(0),
6050 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
6051 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
6052 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6053 return;
6054 case Intrinsic::aarch64_sve_srshl_x4:
6056 Node->getValueType(0),
6057 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
6058 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
6059 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6060 return;
6061 case Intrinsic::aarch64_sve_urshl_x2:
6063 Node->getValueType(0),
6064 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
6065 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
6066 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6067 return;
6068 case Intrinsic::aarch64_sve_urshl_x4:
6070 Node->getValueType(0),
6071 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
6072 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
6073 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6074 return;
6075 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
6077 Node->getValueType(0),
6078 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
6079 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
6080 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6081 return;
6082 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
6084 Node->getValueType(0),
6085 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
6086 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
6087 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6088 return;
6089 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
6091 Node->getValueType(0),
6092 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
6093 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
6094 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6095 return;
6096 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
6098 Node->getValueType(0),
6099 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
6100 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
6101 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6102 return;
6103 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
6105 Node->getValueType(0),
6106 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
6107 AArch64::FSCALE_2ZZ_D}))
6108 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6109 return;
6110 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
6112 Node->getValueType(0),
6113 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
6114 AArch64::FSCALE_4ZZ_D}))
6115 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6116 return;
6117 case Intrinsic::aarch64_sme_fp8_scale_x2:
6119 Node->getValueType(0),
6120 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
6121 AArch64::FSCALE_2Z2Z_D}))
6122 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6123 return;
6124 case Intrinsic::aarch64_sme_fp8_scale_x4:
6126 Node->getValueType(0),
6127 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
6128 AArch64::FSCALE_4Z4Z_D}))
6129 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6130 return;
6131 case Intrinsic::aarch64_sve_whilege_x2:
6133 Node->getValueType(0),
6134 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
6135 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
6136 SelectWhilePair(Node, Op);
6137 return;
6138 case Intrinsic::aarch64_sve_whilegt_x2:
6140 Node->getValueType(0),
6141 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
6142 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
6143 SelectWhilePair(Node, Op);
6144 return;
6145 case Intrinsic::aarch64_sve_whilehi_x2:
6147 Node->getValueType(0),
6148 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
6149 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
6150 SelectWhilePair(Node, Op);
6151 return;
6152 case Intrinsic::aarch64_sve_whilehs_x2:
6154 Node->getValueType(0),
6155 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
6156 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
6157 SelectWhilePair(Node, Op);
6158 return;
6159 case Intrinsic::aarch64_sve_whilele_x2:
6161 Node->getValueType(0),
6162 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
6163 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
6164 SelectWhilePair(Node, Op);
6165 return;
6166 case Intrinsic::aarch64_sve_whilelo_x2:
6168 Node->getValueType(0),
6169 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
6170 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
6171 SelectWhilePair(Node, Op);
6172 return;
6173 case Intrinsic::aarch64_sve_whilels_x2:
6175 Node->getValueType(0),
6176 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
6177 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
6178 SelectWhilePair(Node, Op);
6179 return;
6180 case Intrinsic::aarch64_sve_whilelt_x2:
6182 Node->getValueType(0),
6183 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
6184 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
6185 SelectWhilePair(Node, Op);
6186 return;
6187 case Intrinsic::aarch64_sve_smax_single_x2:
6189 Node->getValueType(0),
6190 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6191 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6192 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6193 return;
6194 case Intrinsic::aarch64_sve_umax_single_x2:
6196 Node->getValueType(0),
6197 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6198 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6199 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6200 return;
6201 case Intrinsic::aarch64_sve_fmax_single_x2:
6203 Node->getValueType(0),
6204 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6205 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6206 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6207 return;
6208 case Intrinsic::aarch64_sve_smax_single_x4:
6210 Node->getValueType(0),
6211 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6212 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6213 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6214 return;
6215 case Intrinsic::aarch64_sve_umax_single_x4:
6217 Node->getValueType(0),
6218 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6219 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6220 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6221 return;
6222 case Intrinsic::aarch64_sve_fmax_single_x4:
6224 Node->getValueType(0),
6225 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6226 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6227 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6228 return;
6229 case Intrinsic::aarch64_sve_smin_single_x2:
6231 Node->getValueType(0),
6232 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6233 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6234 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6235 return;
6236 case Intrinsic::aarch64_sve_umin_single_x2:
6238 Node->getValueType(0),
6239 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6240 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6241 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6242 return;
6243 case Intrinsic::aarch64_sve_fmin_single_x2:
6245 Node->getValueType(0),
6246 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6247 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6248 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6249 return;
6250 case Intrinsic::aarch64_sve_smin_single_x4:
6252 Node->getValueType(0),
6253 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6254 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6255 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6256 return;
6257 case Intrinsic::aarch64_sve_umin_single_x4:
6259 Node->getValueType(0),
6260 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6261 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6262 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6263 return;
6264 case Intrinsic::aarch64_sve_fmin_single_x4:
6266 Node->getValueType(0),
6267 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6268 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6269 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6270 return;
6271 case Intrinsic::aarch64_sve_smax_x2:
6273 Node->getValueType(0),
6274 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6275 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6276 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6277 return;
6278 case Intrinsic::aarch64_sve_umax_x2:
6280 Node->getValueType(0),
6281 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6282 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6283 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6284 return;
6285 case Intrinsic::aarch64_sve_fmax_x2:
6287 Node->getValueType(0),
6288 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6289 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6290 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6291 return;
6292 case Intrinsic::aarch64_sve_smax_x4:
6294 Node->getValueType(0),
6295 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6296 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6297 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6298 return;
6299 case Intrinsic::aarch64_sve_umax_x4:
6301 Node->getValueType(0),
6302 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6303 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6304 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6305 return;
6306 case Intrinsic::aarch64_sve_fmax_x4:
6308 Node->getValueType(0),
6309 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6310 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6311 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6312 return;
6313 case Intrinsic::aarch64_sme_famax_x2:
6315 Node->getValueType(0),
6316 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6317 AArch64::FAMAX_2Z2Z_D}))
6318 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6319 return;
6320 case Intrinsic::aarch64_sme_famax_x4:
6322 Node->getValueType(0),
6323 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6324 AArch64::FAMAX_4Z4Z_D}))
6325 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6326 return;
6327 case Intrinsic::aarch64_sme_famin_x2:
6329 Node->getValueType(0),
6330 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6331 AArch64::FAMIN_2Z2Z_D}))
6332 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6333 return;
6334 case Intrinsic::aarch64_sme_famin_x4:
6336 Node->getValueType(0),
6337 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6338 AArch64::FAMIN_4Z4Z_D}))
6339 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6340 return;
6341 case Intrinsic::aarch64_sve_smin_x2:
6343 Node->getValueType(0),
6344 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6345 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6346 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6347 return;
6348 case Intrinsic::aarch64_sve_umin_x2:
6350 Node->getValueType(0),
6351 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6352 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6353 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6354 return;
6355 case Intrinsic::aarch64_sve_fmin_x2:
6357 Node->getValueType(0),
6358 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6359 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6360 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6361 return;
6362 case Intrinsic::aarch64_sve_smin_x4:
6364 Node->getValueType(0),
6365 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6366 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6367 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6368 return;
6369 case Intrinsic::aarch64_sve_umin_x4:
6371 Node->getValueType(0),
6372 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6373 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6374 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6375 return;
6376 case Intrinsic::aarch64_sve_fmin_x4:
6378 Node->getValueType(0),
6379 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6380 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6381 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6382 return;
6383 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6385 Node->getValueType(0),
6386 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6387 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6388 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6389 return;
6390 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6392 Node->getValueType(0),
6393 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6394 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6395 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6396 return;
6397 case Intrinsic::aarch64_sve_fminnm_single_x2:
6399 Node->getValueType(0),
6400 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6401 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6402 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6403 return;
6404 case Intrinsic::aarch64_sve_fminnm_single_x4:
6406 Node->getValueType(0),
6407 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6408 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6409 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6410 return;
6411 case Intrinsic::aarch64_sve_fscale_single_x4:
6412 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::BFSCALE_4ZZ);
6413 return;
6414 case Intrinsic::aarch64_sve_fscale_single_x2:
6415 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::BFSCALE_2ZZ);
6416 return;
6417 case Intrinsic::aarch64_sve_fmul_single_x4:
6419 Node->getValueType(0),
6420 {AArch64::BFMUL_4ZZ, AArch64::FMUL_4ZZ_H, AArch64::FMUL_4ZZ_S,
6421 AArch64::FMUL_4ZZ_D}))
6422 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6423 return;
6424 case Intrinsic::aarch64_sve_fmul_single_x2:
6426 Node->getValueType(0),
6427 {AArch64::BFMUL_2ZZ, AArch64::FMUL_2ZZ_H, AArch64::FMUL_2ZZ_S,
6428 AArch64::FMUL_2ZZ_D}))
6429 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6430 return;
6431 case Intrinsic::aarch64_sve_fmaxnm_x2:
6433 Node->getValueType(0),
6434 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6435 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6436 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6437 return;
6438 case Intrinsic::aarch64_sve_fmaxnm_x4:
6440 Node->getValueType(0),
6441 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6442 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6443 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6444 return;
6445 case Intrinsic::aarch64_sve_fminnm_x2:
6447 Node->getValueType(0),
6448 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6449 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6450 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6451 return;
6452 case Intrinsic::aarch64_sve_fminnm_x4:
6454 Node->getValueType(0),
6455 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6456 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6457 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6458 return;
6459 case Intrinsic::aarch64_sve_aese_lane_x2:
6460 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESE_2ZZI_B);
6461 return;
6462 case Intrinsic::aarch64_sve_aesd_lane_x2:
6463 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESD_2ZZI_B);
6464 return;
6465 case Intrinsic::aarch64_sve_aesemc_lane_x2:
6466 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESEMC_2ZZI_B);
6467 return;
6468 case Intrinsic::aarch64_sve_aesdimc_lane_x2:
6469 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESDIMC_2ZZI_B);
6470 return;
6471 case Intrinsic::aarch64_sve_aese_lane_x4:
6472 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESE_4ZZI_B);
6473 return;
6474 case Intrinsic::aarch64_sve_aesd_lane_x4:
6475 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESD_4ZZI_B);
6476 return;
6477 case Intrinsic::aarch64_sve_aesemc_lane_x4:
6478 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESEMC_4ZZI_B);
6479 return;
6480 case Intrinsic::aarch64_sve_aesdimc_lane_x4:
6481 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESDIMC_4ZZI_B);
6482 return;
6483 case Intrinsic::aarch64_sve_pmlal_pair_x2:
6484 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::PMLAL_2ZZZ_Q);
6485 return;
6486 case Intrinsic::aarch64_sve_pmull_pair_x2: {
6487 SDLoc DL(Node);
6488 SmallVector<SDValue, 4> Regs(Node->ops().slice(1, 2));
6489 SDNode *Res =
6490 CurDAG->getMachineNode(AArch64::PMULL_2ZZZ_Q, DL, MVT::Untyped, Regs);
6491 SDValue SuperReg = SDValue(Res, 0);
6492 for (unsigned I = 0; I < 2; I++)
6493 ReplaceUses(SDValue(Node, I),
6494 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
6495 SuperReg));
6496 CurDAG->RemoveDeadNode(Node);
6497 return;
6498 }
6499 case Intrinsic::aarch64_sve_fscale_x4:
6500 SelectDestructiveMultiIntrinsic(Node, 4, true, AArch64::BFSCALE_4Z4Z);
6501 return;
6502 case Intrinsic::aarch64_sve_fscale_x2:
6503 SelectDestructiveMultiIntrinsic(Node, 2, true, AArch64::BFSCALE_2Z2Z);
6504 return;
6505 case Intrinsic::aarch64_sve_fmul_x4:
6507 Node->getValueType(0),
6508 {AArch64::BFMUL_4Z4Z, AArch64::FMUL_4Z4Z_H, AArch64::FMUL_4Z4Z_S,
6509 AArch64::FMUL_4Z4Z_D}))
6510 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6511 return;
6512 case Intrinsic::aarch64_sve_fmul_x2:
6514 Node->getValueType(0),
6515 {AArch64::BFMUL_2Z2Z, AArch64::FMUL_2Z2Z_H, AArch64::FMUL_2Z2Z_S,
6516 AArch64::FMUL_2Z2Z_D}))
6517 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6518 return;
6519 case Intrinsic::aarch64_sve_fcvtzs_x2:
6520 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6521 return;
6522 case Intrinsic::aarch64_sve_scvtf_x2:
6523 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6524 return;
6525 case Intrinsic::aarch64_sve_fcvtzu_x2:
6526 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6527 return;
6528 case Intrinsic::aarch64_sve_ucvtf_x2:
6529 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6530 return;
6531 case Intrinsic::aarch64_sve_fcvtzs_x4:
6532 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6533 return;
6534 case Intrinsic::aarch64_sve_scvtf_x4:
6535 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6536 return;
6537 case Intrinsic::aarch64_sve_fcvtzu_x4:
6538 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6539 return;
6540 case Intrinsic::aarch64_sve_ucvtf_x4:
6541 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6542 return;
6543 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6544 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6545 return;
6546 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6547 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6548 return;
6549 case Intrinsic::aarch64_sve_sclamp_single_x2:
6551 Node->getValueType(0),
6552 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6553 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6554 SelectClamp(Node, 2, Op);
6555 return;
6556 case Intrinsic::aarch64_sve_uclamp_single_x2:
6558 Node->getValueType(0),
6559 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6560 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6561 SelectClamp(Node, 2, Op);
6562 return;
6563 case Intrinsic::aarch64_sve_fclamp_single_x2:
6565 Node->getValueType(0),
6566 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6567 AArch64::FCLAMP_VG2_2Z2Z_D}))
6568 SelectClamp(Node, 2, Op);
6569 return;
6570 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6571 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6572 return;
6573 case Intrinsic::aarch64_sve_sclamp_single_x4:
6575 Node->getValueType(0),
6576 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6577 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6578 SelectClamp(Node, 4, Op);
6579 return;
6580 case Intrinsic::aarch64_sve_uclamp_single_x4:
6582 Node->getValueType(0),
6583 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6584 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6585 SelectClamp(Node, 4, Op);
6586 return;
6587 case Intrinsic::aarch64_sve_fclamp_single_x4:
6589 Node->getValueType(0),
6590 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6591 AArch64::FCLAMP_VG4_4Z4Z_D}))
6592 SelectClamp(Node, 4, Op);
6593 return;
6594 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6595 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6596 return;
6597 case Intrinsic::aarch64_sve_add_single_x2:
6599 Node->getValueType(0),
6600 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6601 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6602 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6603 return;
6604 case Intrinsic::aarch64_sve_add_single_x4:
6606 Node->getValueType(0),
6607 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6608 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6609 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6610 return;
6611 case Intrinsic::aarch64_sve_zip_x2:
6613 Node->getValueType(0),
6614 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6615 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6616 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6617 return;
6618 case Intrinsic::aarch64_sve_zipq_x2:
6619 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6620 AArch64::ZIP_VG2_2ZZZ_Q);
6621 return;
6622 case Intrinsic::aarch64_sve_zip_x4:
6624 Node->getValueType(0),
6625 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6626 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6627 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6628 return;
6629 case Intrinsic::aarch64_sve_zipq_x4:
6630 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6631 AArch64::ZIP_VG4_4Z4Z_Q);
6632 return;
6633 case Intrinsic::aarch64_sve_uzp_x2:
6635 Node->getValueType(0),
6636 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6637 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6638 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6639 return;
6640 case Intrinsic::aarch64_sve_uzpq_x2:
6641 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6642 AArch64::UZP_VG2_2ZZZ_Q);
6643 return;
6644 case Intrinsic::aarch64_sve_uzp_x4:
6646 Node->getValueType(0),
6647 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6648 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6649 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6650 return;
6651 case Intrinsic::aarch64_sve_uzpq_x4:
6652 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6653 AArch64::UZP_VG4_4Z4Z_Q);
6654 return;
6655 case Intrinsic::aarch64_sve_sel_x2:
6657 Node->getValueType(0),
6658 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6659 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6660 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6661 return;
6662 case Intrinsic::aarch64_sve_sel_x4:
6664 Node->getValueType(0),
6665 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6666 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6667 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6668 return;
6669 case Intrinsic::aarch64_sve_frinta_x2:
6670 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6671 return;
6672 case Intrinsic::aarch64_sve_frinta_x4:
6673 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6674 return;
6675 case Intrinsic::aarch64_sve_frintm_x2:
6676 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6677 return;
6678 case Intrinsic::aarch64_sve_frintm_x4:
6679 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6680 return;
6681 case Intrinsic::aarch64_sve_frintn_x2:
6682 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6683 return;
6684 case Intrinsic::aarch64_sve_frintn_x4:
6685 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6686 return;
6687 case Intrinsic::aarch64_sve_frintp_x2:
6688 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6689 return;
6690 case Intrinsic::aarch64_sve_frintp_x4:
6691 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6692 return;
6693 case Intrinsic::aarch64_sve_sunpk_x2:
6695 Node->getValueType(0),
6696 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6697 AArch64::SUNPK_VG2_2ZZ_D}))
6698 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6699 return;
6700 case Intrinsic::aarch64_sve_uunpk_x2:
6702 Node->getValueType(0),
6703 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6704 AArch64::UUNPK_VG2_2ZZ_D}))
6705 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6706 return;
6707 case Intrinsic::aarch64_sve_sunpk_x4:
6709 Node->getValueType(0),
6710 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6711 AArch64::SUNPK_VG4_4Z2Z_D}))
6712 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6713 return;
6714 case Intrinsic::aarch64_sve_uunpk_x4:
6716 Node->getValueType(0),
6717 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6718 AArch64::UUNPK_VG4_4Z2Z_D}))
6719 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6720 return;
6721 case Intrinsic::aarch64_sve_pext_x2: {
6723 Node->getValueType(0),
6724 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6725 AArch64::PEXT_2PCI_D}))
6726 SelectPExtPair(Node, Op);
6727 return;
6728 }
6729 }
6730 break;
6731 }
6732 case ISD::INTRINSIC_VOID: {
6733 unsigned IntNo = Node->getConstantOperandVal(1);
6734 if (Node->getNumOperands() >= 3)
6735 VT = Node->getOperand(2)->getValueType(0);
6736 switch (IntNo) {
6737 default:
6738 break;
6739 case Intrinsic::aarch64_neon_st1x2: {
6740 if (VT == MVT::v8i8) {
6741 SelectStore(Node, 2, AArch64::ST1Twov8b);
6742 return;
6743 } else if (VT == MVT::v16i8) {
6744 SelectStore(Node, 2, AArch64::ST1Twov16b);
6745 return;
6746 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6747 VT == MVT::v4bf16) {
6748 SelectStore(Node, 2, AArch64::ST1Twov4h);
6749 return;
6750 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6751 VT == MVT::v8bf16) {
6752 SelectStore(Node, 2, AArch64::ST1Twov8h);
6753 return;
6754 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6755 SelectStore(Node, 2, AArch64::ST1Twov2s);
6756 return;
6757 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6758 SelectStore(Node, 2, AArch64::ST1Twov4s);
6759 return;
6760 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6761 SelectStore(Node, 2, AArch64::ST1Twov2d);
6762 return;
6763 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6764 SelectStore(Node, 2, AArch64::ST1Twov1d);
6765 return;
6766 }
6767 break;
6768 }
6769 case Intrinsic::aarch64_neon_st1x3: {
6770 if (VT == MVT::v8i8) {
6771 SelectStore(Node, 3, AArch64::ST1Threev8b);
6772 return;
6773 } else if (VT == MVT::v16i8) {
6774 SelectStore(Node, 3, AArch64::ST1Threev16b);
6775 return;
6776 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6777 VT == MVT::v4bf16) {
6778 SelectStore(Node, 3, AArch64::ST1Threev4h);
6779 return;
6780 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6781 VT == MVT::v8bf16) {
6782 SelectStore(Node, 3, AArch64::ST1Threev8h);
6783 return;
6784 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6785 SelectStore(Node, 3, AArch64::ST1Threev2s);
6786 return;
6787 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6788 SelectStore(Node, 3, AArch64::ST1Threev4s);
6789 return;
6790 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6791 SelectStore(Node, 3, AArch64::ST1Threev2d);
6792 return;
6793 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6794 SelectStore(Node, 3, AArch64::ST1Threev1d);
6795 return;
6796 }
6797 break;
6798 }
6799 case Intrinsic::aarch64_neon_st1x4: {
6800 if (VT == MVT::v8i8) {
6801 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6802 return;
6803 } else if (VT == MVT::v16i8) {
6804 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6805 return;
6806 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6807 VT == MVT::v4bf16) {
6808 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6809 return;
6810 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6811 VT == MVT::v8bf16) {
6812 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6813 return;
6814 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6815 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6816 return;
6817 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6818 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6819 return;
6820 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6821 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6822 return;
6823 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6824 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6825 return;
6826 }
6827 break;
6828 }
6829 case Intrinsic::aarch64_neon_st2: {
6830 if (VT == MVT::v8i8) {
6831 SelectStore(Node, 2, AArch64::ST2Twov8b);
6832 return;
6833 } else if (VT == MVT::v16i8) {
6834 SelectStore(Node, 2, AArch64::ST2Twov16b);
6835 return;
6836 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6837 VT == MVT::v4bf16) {
6838 SelectStore(Node, 2, AArch64::ST2Twov4h);
6839 return;
6840 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6841 VT == MVT::v8bf16) {
6842 SelectStore(Node, 2, AArch64::ST2Twov8h);
6843 return;
6844 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6845 SelectStore(Node, 2, AArch64::ST2Twov2s);
6846 return;
6847 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6848 SelectStore(Node, 2, AArch64::ST2Twov4s);
6849 return;
6850 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6851 SelectStore(Node, 2, AArch64::ST2Twov2d);
6852 return;
6853 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6854 SelectStore(Node, 2, AArch64::ST1Twov1d);
6855 return;
6856 }
6857 break;
6858 }
6859 case Intrinsic::aarch64_neon_st3: {
6860 if (VT == MVT::v8i8) {
6861 SelectStore(Node, 3, AArch64::ST3Threev8b);
6862 return;
6863 } else if (VT == MVT::v16i8) {
6864 SelectStore(Node, 3, AArch64::ST3Threev16b);
6865 return;
6866 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6867 VT == MVT::v4bf16) {
6868 SelectStore(Node, 3, AArch64::ST3Threev4h);
6869 return;
6870 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6871 VT == MVT::v8bf16) {
6872 SelectStore(Node, 3, AArch64::ST3Threev8h);
6873 return;
6874 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6875 SelectStore(Node, 3, AArch64::ST3Threev2s);
6876 return;
6877 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6878 SelectStore(Node, 3, AArch64::ST3Threev4s);
6879 return;
6880 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6881 SelectStore(Node, 3, AArch64::ST3Threev2d);
6882 return;
6883 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6884 SelectStore(Node, 3, AArch64::ST1Threev1d);
6885 return;
6886 }
6887 break;
6888 }
6889 case Intrinsic::aarch64_neon_st4: {
6890 if (VT == MVT::v8i8) {
6891 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6892 return;
6893 } else if (VT == MVT::v16i8) {
6894 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6895 return;
6896 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6897 VT == MVT::v4bf16) {
6898 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6899 return;
6900 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6901 VT == MVT::v8bf16) {
6902 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6903 return;
6904 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6905 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6906 return;
6907 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6908 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6909 return;
6910 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6911 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6912 return;
6913 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6914 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6915 return;
6916 }
6917 break;
6918 }
6919 case Intrinsic::aarch64_neon_st2lane: {
6920 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6921 SelectStoreLane(Node, 2, AArch64::ST2i8);
6922 return;
6923 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6924 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6925 SelectStoreLane(Node, 2, AArch64::ST2i16);
6926 return;
6927 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6928 VT == MVT::v2f32) {
6929 SelectStoreLane(Node, 2, AArch64::ST2i32);
6930 return;
6931 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6932 VT == MVT::v1f64) {
6933 SelectStoreLane(Node, 2, AArch64::ST2i64);
6934 return;
6935 }
6936 break;
6937 }
6938 case Intrinsic::aarch64_neon_st3lane: {
6939 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6940 SelectStoreLane(Node, 3, AArch64::ST3i8);
6941 return;
6942 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6943 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6944 SelectStoreLane(Node, 3, AArch64::ST3i16);
6945 return;
6946 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6947 VT == MVT::v2f32) {
6948 SelectStoreLane(Node, 3, AArch64::ST3i32);
6949 return;
6950 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6951 VT == MVT::v1f64) {
6952 SelectStoreLane(Node, 3, AArch64::ST3i64);
6953 return;
6954 }
6955 break;
6956 }
6957 case Intrinsic::aarch64_neon_st4lane: {
6958 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6959 SelectStoreLane(Node, 4, AArch64::ST4i8);
6960 return;
6961 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6962 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6963 SelectStoreLane(Node, 4, AArch64::ST4i16);
6964 return;
6965 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6966 VT == MVT::v2f32) {
6967 SelectStoreLane(Node, 4, AArch64::ST4i32);
6968 return;
6969 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6970 VT == MVT::v1f64) {
6971 SelectStoreLane(Node, 4, AArch64::ST4i64);
6972 return;
6973 }
6974 break;
6975 }
6976 case Intrinsic::aarch64_sve_st2q: {
6977 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6978 return;
6979 }
6980 case Intrinsic::aarch64_sve_st3q: {
6981 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6982 return;
6983 }
6984 case Intrinsic::aarch64_sve_st4q: {
6985 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6986 return;
6987 }
6988 case Intrinsic::aarch64_sve_st2: {
6989 if (VT == MVT::nxv16i8) {
6990 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6991 return;
6992 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6993 VT == MVT::nxv8bf16) {
6994 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6995 return;
6996 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6997 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6998 return;
6999 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7000 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
7001 return;
7002 }
7003 break;
7004 }
7005 case Intrinsic::aarch64_sve_st3: {
7006 if (VT == MVT::nxv16i8) {
7007 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
7008 return;
7009 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7010 VT == MVT::nxv8bf16) {
7011 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
7012 return;
7013 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7014 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
7015 return;
7016 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7017 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
7018 return;
7019 }
7020 break;
7021 }
7022 case Intrinsic::aarch64_sve_st4: {
7023 if (VT == MVT::nxv16i8) {
7024 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
7025 return;
7026 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7027 VT == MVT::nxv8bf16) {
7028 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
7029 return;
7030 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7031 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
7032 return;
7033 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7034 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
7035 return;
7036 }
7037 break;
7038 }
7039 }
7040 break;
7041 }
7042 case AArch64ISD::LD2post: {
7043 if (VT == MVT::v8i8) {
7044 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
7045 return;
7046 } else if (VT == MVT::v16i8) {
7047 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
7048 return;
7049 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7050 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
7051 return;
7052 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7053 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
7054 return;
7055 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7056 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
7057 return;
7058 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7059 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
7060 return;
7061 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7062 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
7063 return;
7064 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7065 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
7066 return;
7067 }
7068 break;
7069 }
7070 case AArch64ISD::LD3post: {
7071 if (VT == MVT::v8i8) {
7072 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
7073 return;
7074 } else if (VT == MVT::v16i8) {
7075 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
7076 return;
7077 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7078 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
7079 return;
7080 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7081 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
7082 return;
7083 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7084 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
7085 return;
7086 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7087 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
7088 return;
7089 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7090 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
7091 return;
7092 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7093 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
7094 return;
7095 }
7096 break;
7097 }
7098 case AArch64ISD::LD4post: {
7099 if (VT == MVT::v8i8) {
7100 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
7101 return;
7102 } else if (VT == MVT::v16i8) {
7103 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
7104 return;
7105 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7106 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
7107 return;
7108 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7109 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
7110 return;
7111 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7112 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
7113 return;
7114 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7115 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
7116 return;
7117 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7118 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7119 return;
7120 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7121 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
7122 return;
7123 }
7124 break;
7125 }
7126 case AArch64ISD::LD1x2post: {
7127 if (VT == MVT::v8i8) {
7128 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
7129 return;
7130 } else if (VT == MVT::v16i8) {
7131 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
7132 return;
7133 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7134 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
7135 return;
7136 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7137 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
7138 return;
7139 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7140 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
7141 return;
7142 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7143 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
7144 return;
7145 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7146 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
7147 return;
7148 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7149 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
7150 return;
7151 }
7152 break;
7153 }
7154 case AArch64ISD::LD1x3post: {
7155 if (VT == MVT::v8i8) {
7156 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
7157 return;
7158 } else if (VT == MVT::v16i8) {
7159 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
7160 return;
7161 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7162 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
7163 return;
7164 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7165 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
7166 return;
7167 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7168 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
7169 return;
7170 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7171 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
7172 return;
7173 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7174 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
7175 return;
7176 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7177 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
7178 return;
7179 }
7180 break;
7181 }
7182 case AArch64ISD::LD1x4post: {
7183 if (VT == MVT::v8i8) {
7184 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
7185 return;
7186 } else if (VT == MVT::v16i8) {
7187 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
7188 return;
7189 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7190 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
7191 return;
7192 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7193 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
7194 return;
7195 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7196 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
7197 return;
7198 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7199 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
7200 return;
7201 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7202 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7203 return;
7204 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7205 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
7206 return;
7207 }
7208 break;
7209 }
7210 case AArch64ISD::LD1DUPpost: {
7211 if (VT == MVT::v8i8) {
7212 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
7213 return;
7214 } else if (VT == MVT::v16i8) {
7215 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
7216 return;
7217 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7218 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
7219 return;
7220 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7221 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
7222 return;
7223 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7224 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
7225 return;
7226 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7227 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
7228 return;
7229 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7230 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
7231 return;
7232 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7233 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
7234 return;
7235 }
7236 break;
7237 }
7238 case AArch64ISD::LD2DUPpost: {
7239 if (VT == MVT::v8i8) {
7240 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
7241 return;
7242 } else if (VT == MVT::v16i8) {
7243 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
7244 return;
7245 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7246 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
7247 return;
7248 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7249 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
7250 return;
7251 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7252 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
7253 return;
7254 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7255 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
7256 return;
7257 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7258 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
7259 return;
7260 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7261 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
7262 return;
7263 }
7264 break;
7265 }
7266 case AArch64ISD::LD3DUPpost: {
7267 if (VT == MVT::v8i8) {
7268 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7269 return;
7270 } else if (VT == MVT::v16i8) {
7271 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7272 return;
7273 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7274 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7275 return;
7276 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7277 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7278 return;
7279 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7280 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7281 return;
7282 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7283 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7284 return;
7285 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7286 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7287 return;
7288 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7289 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7290 return;
7291 }
7292 break;
7293 }
7294 case AArch64ISD::LD4DUPpost: {
7295 if (VT == MVT::v8i8) {
7296 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7297 return;
7298 } else if (VT == MVT::v16i8) {
7299 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7300 return;
7301 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7302 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7303 return;
7304 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7305 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7306 return;
7307 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7308 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7309 return;
7310 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7311 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7312 return;
7313 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7314 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7315 return;
7316 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7317 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7318 return;
7319 }
7320 break;
7321 }
7322 case AArch64ISD::LD1LANEpost: {
7323 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7324 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7325 return;
7326 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7327 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7328 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7329 return;
7330 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7331 VT == MVT::v2f32) {
7332 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7333 return;
7334 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7335 VT == MVT::v1f64) {
7336 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7337 return;
7338 }
7339 break;
7340 }
7341 case AArch64ISD::LD2LANEpost: {
7342 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7343 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7344 return;
7345 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7346 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7347 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7348 return;
7349 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7350 VT == MVT::v2f32) {
7351 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7352 return;
7353 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7354 VT == MVT::v1f64) {
7355 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7356 return;
7357 }
7358 break;
7359 }
7360 case AArch64ISD::LD3LANEpost: {
7361 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7362 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7363 return;
7364 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7365 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7366 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7367 return;
7368 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7369 VT == MVT::v2f32) {
7370 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7371 return;
7372 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7373 VT == MVT::v1f64) {
7374 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7375 return;
7376 }
7377 break;
7378 }
7379 case AArch64ISD::LD4LANEpost: {
7380 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7381 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7382 return;
7383 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7384 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7385 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7386 return;
7387 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7388 VT == MVT::v2f32) {
7389 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7390 return;
7391 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7392 VT == MVT::v1f64) {
7393 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7394 return;
7395 }
7396 break;
7397 }
7398 case AArch64ISD::ST2post: {
7399 VT = Node->getOperand(1).getValueType();
7400 if (VT == MVT::v8i8) {
7401 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7402 return;
7403 } else if (VT == MVT::v16i8) {
7404 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7405 return;
7406 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7407 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7408 return;
7409 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7410 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7411 return;
7412 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7413 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7414 return;
7415 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7416 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7417 return;
7418 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7419 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7420 return;
7421 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7422 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7423 return;
7424 }
7425 break;
7426 }
7427 case AArch64ISD::ST3post: {
7428 VT = Node->getOperand(1).getValueType();
7429 if (VT == MVT::v8i8) {
7430 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7431 return;
7432 } else if (VT == MVT::v16i8) {
7433 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7434 return;
7435 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7436 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7437 return;
7438 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7439 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7440 return;
7441 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7442 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7443 return;
7444 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7445 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7446 return;
7447 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7448 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7449 return;
7450 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7451 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7452 return;
7453 }
7454 break;
7455 }
7456 case AArch64ISD::ST4post: {
7457 VT = Node->getOperand(1).getValueType();
7458 if (VT == MVT::v8i8) {
7459 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7460 return;
7461 } else if (VT == MVT::v16i8) {
7462 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7463 return;
7464 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7465 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7466 return;
7467 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7468 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7469 return;
7470 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7471 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7472 return;
7473 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7474 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7475 return;
7476 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7477 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7478 return;
7479 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7480 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7481 return;
7482 }
7483 break;
7484 }
7485 case AArch64ISD::ST1x2post: {
7486 VT = Node->getOperand(1).getValueType();
7487 if (VT == MVT::v8i8) {
7488 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7489 return;
7490 } else if (VT == MVT::v16i8) {
7491 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7492 return;
7493 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7494 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7495 return;
7496 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7497 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7498 return;
7499 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7500 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7501 return;
7502 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7503 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7504 return;
7505 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7506 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7507 return;
7508 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7509 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7510 return;
7511 }
7512 break;
7513 }
7514 case AArch64ISD::ST1x3post: {
7515 VT = Node->getOperand(1).getValueType();
7516 if (VT == MVT::v8i8) {
7517 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7518 return;
7519 } else if (VT == MVT::v16i8) {
7520 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7521 return;
7522 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7523 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7524 return;
7525 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7526 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7527 return;
7528 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7529 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7530 return;
7531 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7532 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7533 return;
7534 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7535 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7536 return;
7537 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7538 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7539 return;
7540 }
7541 break;
7542 }
7543 case AArch64ISD::ST1x4post: {
7544 VT = Node->getOperand(1).getValueType();
7545 if (VT == MVT::v8i8) {
7546 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7547 return;
7548 } else if (VT == MVT::v16i8) {
7549 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7550 return;
7551 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7552 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7553 return;
7554 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7555 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7556 return;
7557 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7558 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7559 return;
7560 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7561 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7562 return;
7563 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7564 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7565 return;
7566 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7567 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7568 return;
7569 }
7570 break;
7571 }
7572 case AArch64ISD::ST2LANEpost: {
7573 VT = Node->getOperand(1).getValueType();
7574 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7575 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7576 return;
7577 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7578 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7579 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7580 return;
7581 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7582 VT == MVT::v2f32) {
7583 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7584 return;
7585 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7586 VT == MVT::v1f64) {
7587 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7588 return;
7589 }
7590 break;
7591 }
7592 case AArch64ISD::ST3LANEpost: {
7593 VT = Node->getOperand(1).getValueType();
7594 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7595 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7596 return;
7597 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7598 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7599 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7600 return;
7601 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7602 VT == MVT::v2f32) {
7603 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7604 return;
7605 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7606 VT == MVT::v1f64) {
7607 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7608 return;
7609 }
7610 break;
7611 }
7612 case AArch64ISD::ST4LANEpost: {
7613 VT = Node->getOperand(1).getValueType();
7614 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7615 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7616 return;
7617 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7618 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7619 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7620 return;
7621 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7622 VT == MVT::v2f32) {
7623 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7624 return;
7625 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7626 VT == MVT::v1f64) {
7627 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7628 return;
7629 }
7630 break;
7631 }
7632 }
7633
7634 // Select the default instruction
7635 SelectCode(Node);
7636}
7637
7638/// createAArch64ISelDag - This pass converts a legalized DAG into a
7639/// AArch64-specific DAG, ready for instruction scheduling.
7641 CodeGenOptLevel OptLevel) {
7642 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7643}
7644
7645/// When \p PredVT is a scalable vector predicate in the form
7646/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7647/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7648/// structured vectors (NumVec >1), the output data type is
7649/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7650/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7651/// EVT.
7653 unsigned NumVec) {
7654 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7655 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7656 return EVT();
7657
7658 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7659 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7660 return EVT();
7661
7662 ElementCount EC = PredVT.getVectorElementCount();
7663 EVT ScalarVT =
7664 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7665 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7666
7667 return MemVT;
7668}
7669
7670/// Return the EVT of the data associated to a memory operation in \p
7671/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7673 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7674 return MemIntr->getMemoryVT();
7675
7676 if (isa<MemSDNode>(Root)) {
7677 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7678
7679 EVT DataVT;
7680 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7681 DataVT = Load->getValueType(0);
7682 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7683 DataVT = Load->getValueType(0);
7684 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7685 DataVT = Store->getValue().getValueType();
7686 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7687 DataVT = Store->getValue().getValueType();
7688 else
7689 llvm_unreachable("Unexpected MemSDNode!");
7690
7691 return DataVT.changeVectorElementType(Ctx, MemVT.getVectorElementType());
7692 }
7693
7694 const unsigned Opcode = Root->getOpcode();
7695 // For custom ISD nodes, we have to look at them individually to extract the
7696 // type of the data moved to/from memory.
7697 switch (Opcode) {
7698 case AArch64ISD::LD1_MERGE_ZERO:
7699 case AArch64ISD::LD1S_MERGE_ZERO:
7700 case AArch64ISD::LDNF1_MERGE_ZERO:
7701 case AArch64ISD::LDNF1S_MERGE_ZERO:
7702 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7703 case AArch64ISD::ST1_PRED:
7704 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7705 default:
7706 break;
7707 }
7708
7709 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7710 return EVT();
7711
7712 switch (Root->getConstantOperandVal(1)) {
7713 default:
7714 return EVT();
7715 case Intrinsic::aarch64_sme_ldr:
7716 case Intrinsic::aarch64_sme_str:
7717 return MVT::nxv16i8;
7718 case Intrinsic::aarch64_sve_prf:
7719 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7720 // width of the predicate.
7722 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7723 case Intrinsic::aarch64_sve_ld2_sret:
7724 case Intrinsic::aarch64_sve_ld2q_sret:
7726 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7727 case Intrinsic::aarch64_sve_st2q:
7729 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7730 case Intrinsic::aarch64_sve_ld3_sret:
7731 case Intrinsic::aarch64_sve_ld3q_sret:
7733 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7734 case Intrinsic::aarch64_sve_st3q:
7736 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7737 case Intrinsic::aarch64_sve_ld4_sret:
7738 case Intrinsic::aarch64_sve_ld4q_sret:
7740 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7741 case Intrinsic::aarch64_sve_st4q:
7743 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7744 case Intrinsic::aarch64_sve_ld1udq:
7745 case Intrinsic::aarch64_sve_st1dq:
7746 return EVT(MVT::nxv1i64);
7747 case Intrinsic::aarch64_sve_ld1uwq:
7748 case Intrinsic::aarch64_sve_st1wq:
7749 return EVT(MVT::nxv1i32);
7750 }
7751}
7752
7753/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7754/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7755/// where Root is the memory access using N for its address.
7756template <int64_t Min, int64_t Max>
7757bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7758 SDValue &Base,
7759 SDValue &OffImm) {
7760 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7761 const DataLayout &DL = CurDAG->getDataLayout();
7762 const MachineFrameInfo &MFI = MF->getFrameInfo();
7763
7764 if (N.getOpcode() == ISD::FrameIndex) {
7765 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7766 // We can only encode VL scaled offsets, so only fold in frame indexes
7767 // referencing SVE objects.
7768 if (MFI.hasScalableStackID(FI)) {
7769 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7770 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7771 return true;
7772 }
7773
7774 return false;
7775 }
7776
7777 if (MemVT == EVT())
7778 return false;
7779
7780 if (N.getOpcode() != ISD::ADD)
7781 return false;
7782
7783 SDValue VScale = N.getOperand(1);
7784 int64_t MulImm = std::numeric_limits<int64_t>::max();
7785 if (VScale.getOpcode() == ISD::VSCALE) {
7786 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7787 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7788 int64_t ByteOffset = C->getSExtValue();
7789 const auto KnownVScale =
7791
7792 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7793 return false;
7794
7795 MulImm = ByteOffset / KnownVScale;
7796 } else
7797 return false;
7798
7799 TypeSize TS = MemVT.getSizeInBits();
7800 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7801
7802 if ((MulImm % MemWidthBytes) != 0)
7803 return false;
7804
7805 int64_t Offset = MulImm / MemWidthBytes;
7807 return false;
7808
7809 Base = N.getOperand(0);
7810 if (Base.getOpcode() == ISD::FrameIndex) {
7811 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7812 // We can only encode VL scaled offsets, so only fold in frame indexes
7813 // referencing SVE objects.
7814 if (MFI.hasScalableStackID(FI))
7815 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7816 }
7817
7818 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7819 return true;
7820}
7821
7822/// Select register plus register addressing mode for SVE, with scaled
7823/// offset.
7824bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7825 SDValue &Base,
7826 SDValue &Offset) {
7827 if (N.getOpcode() != ISD::ADD)
7828 return false;
7829
7830 // Process an ADD node.
7831 const SDValue LHS = N.getOperand(0);
7832 const SDValue RHS = N.getOperand(1);
7833
7834 // 8 bit data does not come with the SHL node, so it is treated
7835 // separately.
7836 if (Scale == 0) {
7837 Base = LHS;
7838 Offset = RHS;
7839 return true;
7840 }
7841
7842 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7843 int64_t ImmOff = C->getSExtValue();
7844 unsigned Size = 1 << Scale;
7845
7846 // To use the reg+reg addressing mode, the immediate must be a multiple of
7847 // the vector element's byte size.
7848 if (ImmOff % Size)
7849 return false;
7850
7851 SDLoc DL(N);
7852 Base = LHS;
7853 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7854 SDValue Ops[] = {Offset};
7855 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7856 Offset = SDValue(MI, 0);
7857 return true;
7858 }
7859
7860 // Check if the RHS is a shift node with a constant.
7861 if (RHS.getOpcode() != ISD::SHL)
7862 return false;
7863
7864 const SDValue ShiftRHS = RHS.getOperand(1);
7865 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7866 if (C->getZExtValue() == Scale) {
7867 Base = LHS;
7868 Offset = RHS.getOperand(0);
7869 return true;
7870 }
7871
7872 return false;
7873}
7874
7875bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7876 const AArch64TargetLowering *TLI =
7877 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7878
7879 return TLI->isAllActivePredicate(*CurDAG, N);
7880}
7881
7882bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7883 EVT VT = N.getValueType();
7884 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7885}
7886
7887bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7889 unsigned Scale) {
7890 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7891 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7892 int64_t ImmOff = C->getSExtValue();
7893 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7894 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7895 }
7896 return SDValue();
7897 };
7898
7899 if (SDValue C = MatchConstantOffset(N)) {
7900 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7901 Offset = C;
7902 return true;
7903 }
7904
7905 // Try to untangle an ADD node into a 'reg + offset'
7906 if (CurDAG->isBaseWithConstantOffset(N)) {
7907 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7908 Base = N.getOperand(0);
7909 Offset = C;
7910 return true;
7911 }
7912 }
7913
7914 // By default, just match reg + 0.
7915 Base = N;
7916 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7917 return true;
7918}
7919
7920bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7921 SDValue &Imm) {
7923 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7924 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7925 // Check conservatively if the immediate fits the valid range [0, 64).
7926 // Immediate variants for GE and HS definitely need to be decremented
7927 // when lowering the pseudos later, so an immediate of 1 would become 0.
7928 // For the inverse conditions LT and LO we don't know for sure if they
7929 // will need a decrement but should the decision be made to reverse the
7930 // branch condition, we again end up with the need to decrement.
7931 // The same argument holds for LE, LS, GT and HI and possibly
7932 // incremented immediates. This can lead to slightly less optimal
7933 // codegen, e.g. we never codegen the legal case
7934 // cblt w0, #63, A
7935 // because we could end up with the illegal case
7936 // cbge w0, #64, B
7937 // should the decision to reverse the branch direction be made. For the
7938 // lower bound cases this is no problem since we can express comparisons
7939 // against 0 with either tbz/tnbz or using wzr/xzr.
7940 uint64_t LowerBound = 0, UpperBound = 64;
7941 switch (CC) {
7942 case AArch64CC::GE:
7943 case AArch64CC::HS:
7944 case AArch64CC::LT:
7945 case AArch64CC::LO:
7946 LowerBound = 1;
7947 break;
7948 case AArch64CC::LE:
7949 case AArch64CC::LS:
7950 case AArch64CC::GT:
7951 case AArch64CC::HI:
7952 UpperBound = 63;
7953 break;
7954 default:
7955 break;
7956 }
7957
7958 if (CN->getAPIntValue().uge(LowerBound) &&
7959 CN->getAPIntValue().ult(UpperBound)) {
7960 SDLoc DL(N);
7961 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
7962 return true;
7963 }
7964 }
7965
7966 return false;
7967}
7968
7969template <bool MatchCBB>
7970bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
7971 SDValue &ExtType) {
7972
7973 // Use an invalid shift-extend value to indicate we don't need to extend later
7974 if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) {
7975 EVT Ty = cast<VTSDNode>(N.getOperand(1))->getVT();
7976 if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
7977 return false;
7978 Reg = N.getOperand(0);
7979 ExtType = CurDAG->getSignedTargetConstant(AArch64_AM::InvalidShiftExtend,
7980 SDLoc(N), MVT::i32);
7981 return true;
7982 }
7983
7985
7986 if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) ||
7987 (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) {
7988 Reg = N.getOperand(0);
7989 ExtType =
7990 CurDAG->getTargetConstant(getExtendEncoding(ET), SDLoc(N), MVT::i32);
7991 return true;
7992 }
7993
7994 return false;
7995}
7996
7997void AArch64DAGToDAGISel::PreprocessISelDAG() {
7998 bool MadeChange = false;
7999 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
8000 if (N.use_empty())
8001 continue;
8002
8004 switch (N.getOpcode()) {
8005 case ISD::SCALAR_TO_VECTOR: {
8006 EVT ScalarTy = N.getValueType(0).getVectorElementType();
8007 if ((ScalarTy == MVT::i32 || ScalarTy == MVT::i64) &&
8008 ScalarTy == N.getOperand(0).getValueType())
8009 Result = addBitcastHints(*CurDAG, N);
8010
8011 break;
8012 }
8013 default:
8014 break;
8015 }
8016
8017 if (Result) {
8018 LLVM_DEBUG(dbgs() << "AArch64 DAG preprocessing replacing:\nOld: ");
8019 LLVM_DEBUG(N.dump(CurDAG));
8020 LLVM_DEBUG(dbgs() << "\nNew: ");
8021 LLVM_DEBUG(Result.dump(CurDAG));
8022 LLVM_DEBUG(dbgs() << "\n");
8023
8024 CurDAG->ReplaceAllUsesOfValueWith(SDValue(&N, 0), Result);
8025 MadeChange = true;
8026 }
8027 }
8028
8029 if (MadeChange)
8030 CurDAG->RemoveDeadNodes();
8031
8033}
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static std::optional< APInt > GetNEONSplatValue(SDValue N)
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static SDValue addBitcastHints(SelectionDAG &DAG, SDNode &N)
addBitcastHints - This method adds bitcast hints to the operands of a node to help instruction select...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static std::optional< APInt > DecodeNEONSplat(SDValue N)
static bool checkCVTFixedPointOperandWithFBitsForVectors(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
Value * RHS
Value * LHS
const AArch64RegisterInfo * getRegisterInfo() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1693
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1075
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:967
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1475
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasScalableStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
LLVM Value Representation.
Definition Value.h:75
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:964
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t decodeAdvSIMDModImmType12(uint8_t Imm)
static uint64_t decodeAdvSIMDModImmType11(uint8_t Imm)
unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET)
Mapping from extend bits to required operation: shifter: 000 ==> uxtb 001 ==> uxth 010 ==> uxtw 011 =...
static uint64_t decodeAdvSIMDModImmType10(uint8_t Imm)
static bool isSVELogicalImm(unsigned SizeInBits, uint64_t ImmVal, uint64_t &Encoding)
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
isSignExtendShiftType - Returns true if Type is sign extending.
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:557
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
unsigned CheckFixedPointOperandConstant(APFloat &FVal, unsigned RegWidth, bool isReciprocal)
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2025
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
Extended Value Type.
Definition ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:479
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:367
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:215
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
bool isFixedLengthVector() const
Definition ValueTypes.h:189
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:182
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:210
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.