LLVM 19.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34//===--------------------------------------------------------------------===//
35/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
36/// instructions for SelectionDAG operations.
37///
38namespace {
39
40class AArch64DAGToDAGISel : public SelectionDAGISel {
41
42 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
43 /// make the right decision when generating code for different targets.
44 const AArch64Subtarget *Subtarget;
45
46public:
47 AArch64DAGToDAGISel() = delete;
48
49 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
50 CodeGenOptLevel OptLevel)
51 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
52
53 bool runOnMachineFunction(MachineFunction &MF) override {
54 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
56 }
57
58 void Select(SDNode *Node) override;
59
60 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
61 /// inline asm expressions.
63 InlineAsm::ConstraintCode ConstraintID,
64 std::vector<SDValue> &OutOps) override;
65
66 template <signed Low, signed High, signed Scale>
67 bool SelectRDVLImm(SDValue N, SDValue &Imm);
68
69 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
70 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
71 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
72 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
74 return SelectShiftedRegister(N, false, Reg, Shift);
75 }
76 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77 return SelectShiftedRegister(N, true, Reg, Shift);
78 }
79 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
80 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
81 }
82 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
83 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
84 }
85 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
86 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
87 }
88 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
89 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
90 }
91 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
92 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
93 }
94 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
95 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
96 }
97 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
98 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
99 }
100 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
101 return SelectAddrModeIndexed(N, 1, Base, OffImm);
102 }
103 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
104 return SelectAddrModeIndexed(N, 2, Base, OffImm);
105 }
106 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
107 return SelectAddrModeIndexed(N, 4, Base, OffImm);
108 }
109 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
110 return SelectAddrModeIndexed(N, 8, Base, OffImm);
111 }
112 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
113 return SelectAddrModeIndexed(N, 16, Base, OffImm);
114 }
115 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
116 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
117 }
118 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
119 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
120 }
121 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
122 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
123 }
124 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
125 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
126 }
127 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
128 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
129 }
130 template <unsigned Size, unsigned Max>
131 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
132 // Test if there is an appropriate addressing mode and check if the
133 // immediate fits.
134 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
135 if (Found) {
136 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
137 int64_t C = CI->getSExtValue();
138 if (C <= Max)
139 return true;
140 }
141 }
142
143 // Otherwise, base only, materialize address in register.
144 Base = N;
145 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
146 return true;
147 }
148
149 template<int Width>
150 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
151 SDValue &SignExtend, SDValue &DoShift) {
152 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
153 }
154
155 template<int Width>
156 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
157 SDValue &SignExtend, SDValue &DoShift) {
158 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
159 }
160
161 bool SelectExtractHigh(SDValue N, SDValue &Res) {
162 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
163 N = N->getOperand(0);
164 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
165 !isa<ConstantSDNode>(N->getOperand(1)))
166 return false;
167 EVT VT = N->getValueType(0);
168 EVT LVT = N->getOperand(0).getValueType();
169 unsigned Index = N->getConstantOperandVal(1);
170 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
172 return false;
173 Res = N->getOperand(0);
174 return true;
175 }
176
177 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
178 if (N.getOpcode() != AArch64ISD::VLSHR)
179 return false;
180 SDValue Op = N->getOperand(0);
181 EVT VT = Op.getValueType();
182 unsigned ShtAmt = N->getConstantOperandVal(1);
183 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
184 return false;
185
186 APInt Imm;
187 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
188 Imm = APInt(VT.getScalarSizeInBits(),
189 Op.getOperand(1).getConstantOperandVal(0)
190 << Op.getOperand(1).getConstantOperandVal(1));
191 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
192 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
193 Imm = APInt(VT.getScalarSizeInBits(),
194 Op.getOperand(1).getConstantOperandVal(0));
195 else
196 return false;
197
198 if (Imm != 1ULL << (ShtAmt - 1))
199 return false;
200
201 Res1 = Op.getOperand(0);
202 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
203 return true;
204 }
205
206 bool SelectDupZeroOrUndef(SDValue N) {
207 switch(N->getOpcode()) {
208 case ISD::UNDEF:
209 return true;
210 case AArch64ISD::DUP:
211 case ISD::SPLAT_VECTOR: {
212 auto Opnd0 = N->getOperand(0);
213 if (isNullConstant(Opnd0))
214 return true;
215 if (isNullFPConstant(Opnd0))
216 return true;
217 break;
218 }
219 default:
220 break;
221 }
222
223 return false;
224 }
225
226 bool SelectDupZero(SDValue N) {
227 switch(N->getOpcode()) {
228 case AArch64ISD::DUP:
229 case ISD::SPLAT_VECTOR: {
230 auto Opnd0 = N->getOperand(0);
231 if (isNullConstant(Opnd0))
232 return true;
233 if (isNullFPConstant(Opnd0))
234 return true;
235 break;
236 }
237 }
238
239 return false;
240 }
241
242 bool SelectDupNegativeZero(SDValue N) {
243 switch(N->getOpcode()) {
244 case AArch64ISD::DUP:
245 case ISD::SPLAT_VECTOR: {
246 ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
247 return Const && Const->isZero() && Const->isNegative();
248 }
249 }
250
251 return false;
252 }
253
254 template<MVT::SimpleValueType VT>
255 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
256 return SelectSVEAddSubImm(N, VT, Imm, Shift);
257 }
258
259 template <MVT::SimpleValueType VT, bool Negate>
260 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
261 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
262 }
263
264 template <MVT::SimpleValueType VT>
265 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
266 return SelectSVECpyDupImm(N, VT, Imm, Shift);
267 }
268
269 template <MVT::SimpleValueType VT, bool Invert = false>
270 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
271 return SelectSVELogicalImm(N, VT, Imm, Invert);
272 }
273
274 template <MVT::SimpleValueType VT>
275 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
276 return SelectSVEArithImm(N, VT, Imm);
277 }
278
279 template <unsigned Low, unsigned High, bool AllowSaturation = false>
280 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
281 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
282 }
283
284 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
285 if (N->getOpcode() != ISD::SPLAT_VECTOR)
286 return false;
287
288 EVT EltVT = N->getValueType(0).getVectorElementType();
289 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
290 /* High */ EltVT.getFixedSizeInBits(),
291 /* AllowSaturation */ true, Imm);
292 }
293
294 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
295 template<signed Min, signed Max, signed Scale, bool Shift>
296 bool SelectCntImm(SDValue N, SDValue &Imm) {
297 if (!isa<ConstantSDNode>(N))
298 return false;
299
300 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
301 if (Shift)
302 MulImm = 1LL << MulImm;
303
304 if ((MulImm % std::abs(Scale)) != 0)
305 return false;
306
307 MulImm /= Scale;
308 if ((MulImm >= Min) && (MulImm <= Max)) {
309 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
310 return true;
311 }
312
313 return false;
314 }
315
316 template <signed Max, signed Scale>
317 bool SelectEXTImm(SDValue N, SDValue &Imm) {
318 if (!isa<ConstantSDNode>(N))
319 return false;
320
321 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
322
323 if (MulImm >= 0 && MulImm <= Max) {
324 MulImm *= Scale;
325 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
326 return true;
327 }
328
329 return false;
330 }
331
332 template <unsigned BaseReg, unsigned Max>
333 bool ImmToReg(SDValue N, SDValue &Imm) {
334 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
335 uint64_t C = CI->getZExtValue();
336
337 if (C > Max)
338 return false;
339
340 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
341 return true;
342 }
343 return false;
344 }
345
346 /// Form sequences of consecutive 64/128-bit registers for use in NEON
347 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
348 /// between 1 and 4 elements. If it contains a single element that is returned
349 /// unchanged; otherwise a REG_SEQUENCE value is returned.
352 // Form a sequence of SVE registers for instructions using list of vectors,
353 // e.g. structured loads and stores (ldN, stN).
354 SDValue createZTuple(ArrayRef<SDValue> Vecs);
355
356 // Similar to above, except the register must start at a multiple of the
357 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
358 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
359
360 /// Generic helper for the createDTuple/createQTuple
361 /// functions. Those should almost always be called instead.
362 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
363 const unsigned SubRegs[]);
364
365 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
366
367 bool tryIndexedLoad(SDNode *N);
368
369 bool trySelectStackSlotTagP(SDNode *N);
370 void SelectTagP(SDNode *N);
371
372 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
373 unsigned SubRegIdx);
374 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
375 unsigned SubRegIdx);
376 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
377 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
378 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
379 unsigned Opc_rr, unsigned Opc_ri,
380 bool IsIntr = false);
381 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
382 unsigned Scale, unsigned Opc_ri,
383 unsigned Opc_rr);
384 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
385 bool IsZmMulti, unsigned Opcode,
386 bool HasPred = false);
387 void SelectPExtPair(SDNode *N, unsigned Opc);
388 void SelectWhilePair(SDNode *N, unsigned Opc);
389 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
390 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
391 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
392 bool IsTupleInput, unsigned Opc);
393 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
394
395 template <unsigned MaxIdx, unsigned Scale>
396 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
397 unsigned Op);
398 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs, unsigned Op,
399 unsigned MaxIdx, unsigned Scale);
400 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
401 /// SVE Reg+Imm addressing mode.
402 template <int64_t Min, int64_t Max>
403 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
404 SDValue &OffImm);
405 /// SVE Reg+Reg address mode.
406 template <unsigned Scale>
407 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
408 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
409 }
410
411 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc,
412 uint32_t MaxImm);
413
414 template <unsigned MaxIdx, unsigned Scale>
415 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
416 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
417 }
418
419 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
420 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
421 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
422 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
423 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
424 unsigned Opc_rr, unsigned Opc_ri);
425 std::tuple<unsigned, SDValue, SDValue>
426 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
427 const SDValue &OldBase, const SDValue &OldOffset,
428 unsigned Scale);
429
430 bool tryBitfieldExtractOp(SDNode *N);
431 bool tryBitfieldExtractOpFromSExt(SDNode *N);
432 bool tryBitfieldInsertOp(SDNode *N);
433 bool tryBitfieldInsertInZeroOp(SDNode *N);
434 bool tryShiftAmountMod(SDNode *N);
435
436 bool tryReadRegister(SDNode *N);
437 bool tryWriteRegister(SDNode *N);
438
439 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
440 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
441
442 bool trySelectXAR(SDNode *N);
443
444// Include the pieces autogenerated from the target description.
445#include "AArch64GenDAGISel.inc"
446
447private:
448 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
449 SDValue &Shift);
450 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
451 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
452 SDValue &OffImm) {
453 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
454 }
455 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
456 unsigned Size, SDValue &Base,
457 SDValue &OffImm);
458 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
459 SDValue &OffImm);
460 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
461 SDValue &OffImm);
462 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
463 SDValue &Offset, SDValue &SignExtend,
464 SDValue &DoShift);
465 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
466 SDValue &Offset, SDValue &SignExtend,
467 SDValue &DoShift);
468 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
469 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
470 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
471 SDValue &Offset, SDValue &SignExtend);
472
473 template<unsigned RegWidth>
474 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
475 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
476 }
477
478 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
479
480 template<unsigned RegWidth>
481 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
482 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
483 }
484
485 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
486 unsigned Width);
487
488 bool SelectCMP_SWAP(SDNode *N);
489
490 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
491 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
492 bool Negate);
493 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
494 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
495
496 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
497 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
498 bool AllowSaturation, SDValue &Imm);
499
500 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
501 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
502 SDValue &Offset);
503 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
504 SDValue &Offset, unsigned Scale = 1);
505
506 bool SelectAllActivePredicate(SDValue N);
507 bool SelectAnyPredicate(SDValue N);
508};
509
510class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
511public:
512 static char ID;
513 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
514 CodeGenOptLevel OptLevel)
516 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
517};
518} // end anonymous namespace
519
520char AArch64DAGToDAGISelLegacy::ID = 0;
521
522INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
523
524/// isIntImmediate - This method tests to see if the node is a constant
525/// operand. If so Imm will receive the 32-bit value.
526static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
527 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
528 Imm = C->getZExtValue();
529 return true;
530 }
531 return false;
532}
533
534// isIntImmediate - This method tests to see if a constant operand.
535// If so Imm will receive the value.
536static bool isIntImmediate(SDValue N, uint64_t &Imm) {
537 return isIntImmediate(N.getNode(), Imm);
538}
539
540// isOpcWithIntImmediate - This method tests to see if the node is a specific
541// opcode and that it has a immediate integer right operand.
542// If so Imm will receive the 32 bit value.
543static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
544 uint64_t &Imm) {
545 return N->getOpcode() == Opc &&
546 isIntImmediate(N->getOperand(1).getNode(), Imm);
547}
548
549// isIntImmediateEq - This method tests to see if N is a constant operand that
550// is equivalent to 'ImmExpected'.
551#ifndef NDEBUG
552static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
553 uint64_t Imm;
554 if (!isIntImmediate(N.getNode(), Imm))
555 return false;
556 return Imm == ImmExpected;
557}
558#endif
559
560bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
561 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
562 std::vector<SDValue> &OutOps) {
563 switch(ConstraintID) {
564 default:
565 llvm_unreachable("Unexpected asm memory constraint");
566 case InlineAsm::ConstraintCode::m:
567 case InlineAsm::ConstraintCode::o:
568 case InlineAsm::ConstraintCode::Q:
569 // We need to make sure that this one operand does not end up in XZR, thus
570 // require the address to be in a PointerRegClass register.
571 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
572 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
573 SDLoc dl(Op);
574 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
575 SDValue NewOp =
576 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
577 dl, Op.getValueType(),
578 Op, RC), 0);
579 OutOps.push_back(NewOp);
580 return false;
581 }
582 return true;
583}
584
585/// SelectArithImmed - Select an immediate value that can be represented as
586/// a 12-bit value shifted left by either 0 or 12. If so, return true with
587/// Val set to the 12-bit value and Shift set to the shifter operand.
588bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
589 SDValue &Shift) {
590 // This function is called from the addsub_shifted_imm ComplexPattern,
591 // which lists [imm] as the list of opcode it's interested in, however
592 // we still need to check whether the operand is actually an immediate
593 // here because the ComplexPattern opcode list is only used in
594 // root-level opcode matching.
595 if (!isa<ConstantSDNode>(N.getNode()))
596 return false;
597
598 uint64_t Immed = N.getNode()->getAsZExtVal();
599 unsigned ShiftAmt;
600
601 if (Immed >> 12 == 0) {
602 ShiftAmt = 0;
603 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
604 ShiftAmt = 12;
605 Immed = Immed >> 12;
606 } else
607 return false;
608
609 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
610 SDLoc dl(N);
611 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
612 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
613 return true;
614}
615
616/// SelectNegArithImmed - As above, but negates the value before trying to
617/// select it.
618bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
619 SDValue &Shift) {
620 // This function is called from the addsub_shifted_imm ComplexPattern,
621 // which lists [imm] as the list of opcode it's interested in, however
622 // we still need to check whether the operand is actually an immediate
623 // here because the ComplexPattern opcode list is only used in
624 // root-level opcode matching.
625 if (!isa<ConstantSDNode>(N.getNode()))
626 return false;
627
628 // The immediate operand must be a 24-bit zero-extended immediate.
629 uint64_t Immed = N.getNode()->getAsZExtVal();
630
631 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
632 // have the opposite effect on the C flag, so this pattern mustn't match under
633 // those circumstances.
634 if (Immed == 0)
635 return false;
636
637 if (N.getValueType() == MVT::i32)
638 Immed = ~((uint32_t)Immed) + 1;
639 else
640 Immed = ~Immed + 1ULL;
641 if (Immed & 0xFFFFFFFFFF000000ULL)
642 return false;
643
644 Immed &= 0xFFFFFFULL;
645 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
646 Shift);
647}
648
649/// getShiftTypeForNode - Translate a shift node to the corresponding
650/// ShiftType value.
652 switch (N.getOpcode()) {
653 default:
655 case ISD::SHL:
656 return AArch64_AM::LSL;
657 case ISD::SRL:
658 return AArch64_AM::LSR;
659 case ISD::SRA:
660 return AArch64_AM::ASR;
661 case ISD::ROTR:
662 return AArch64_AM::ROR;
663 }
664}
665
666/// Determine whether it is worth it to fold SHL into the addressing
667/// mode.
669 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
670 // It is worth folding logical shift of up to three places.
671 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
672 if (!CSD)
673 return false;
674 unsigned ShiftVal = CSD->getZExtValue();
675 if (ShiftVal > 3)
676 return false;
677
678 // Check if this particular node is reused in any non-memory related
679 // operation. If yes, do not try to fold this node into the address
680 // computation, since the computation will be kept.
681 const SDNode *Node = V.getNode();
682 for (SDNode *UI : Node->uses())
683 if (!isa<MemSDNode>(*UI))
684 for (SDNode *UII : UI->uses())
685 if (!isa<MemSDNode>(*UII))
686 return false;
687 return true;
688}
689
690/// Determine whether it is worth to fold V into an extended register addressing
691/// mode.
692bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
693 // Trivial if we are optimizing for code size or if there is only
694 // one use of the value.
695 if (CurDAG->shouldOptForSize() || V.hasOneUse())
696 return true;
697
698 // If a subtarget has a slow shift, folding a shift into multiple loads
699 // costs additional micro-ops.
700 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
701 return false;
702
703 // Check whether we're going to emit the address arithmetic anyway because
704 // it's used by a non-address operation.
705 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
706 return true;
707 if (V.getOpcode() == ISD::ADD) {
708 const SDValue LHS = V.getOperand(0);
709 const SDValue RHS = V.getOperand(1);
710 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
711 return true;
712 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
713 return true;
714 }
715
716 // It hurts otherwise, since the value will be reused.
717 return false;
718}
719
720/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
721/// to select more shifted register
722bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
723 SDValue &Shift) {
724 EVT VT = N.getValueType();
725 if (VT != MVT::i32 && VT != MVT::i64)
726 return false;
727
728 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
729 return false;
730 SDValue LHS = N.getOperand(0);
731 if (!LHS->hasOneUse())
732 return false;
733
734 unsigned LHSOpcode = LHS->getOpcode();
735 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
736 return false;
737
738 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
739 if (!ShiftAmtNode)
740 return false;
741
742 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
743 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
744 if (!RHSC)
745 return false;
746
747 APInt AndMask = RHSC->getAPIntValue();
748 unsigned LowZBits, MaskLen;
749 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
750 return false;
751
752 unsigned BitWidth = N.getValueSizeInBits();
753 SDLoc DL(LHS);
754 uint64_t NewShiftC;
755 unsigned NewShiftOp;
756 if (LHSOpcode == ISD::SHL) {
757 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
758 // BitWidth != LowZBits + MaskLen doesn't match the pattern
759 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
760 return false;
761
762 NewShiftC = LowZBits - ShiftAmtC;
763 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
764 } else {
765 if (LowZBits == 0)
766 return false;
767
768 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
769 NewShiftC = LowZBits + ShiftAmtC;
770 if (NewShiftC >= BitWidth)
771 return false;
772
773 // SRA need all high bits
774 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
775 return false;
776
777 // SRL high bits can be 0 or 1
778 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
779 return false;
780
781 if (LHSOpcode == ISD::SRL)
782 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
783 else
784 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
785 }
786
787 assert(NewShiftC < BitWidth && "Invalid shift amount");
788 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
789 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
790 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
791 NewShiftAmt, BitWidthMinus1),
792 0);
793 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
794 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
795 return true;
796}
797
798/// getExtendTypeForNode - Translate an extend node to the corresponding
799/// ExtendType value.
801getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
802 if (N.getOpcode() == ISD::SIGN_EXTEND ||
803 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
804 EVT SrcVT;
805 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
806 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
807 else
808 SrcVT = N.getOperand(0).getValueType();
809
810 if (!IsLoadStore && SrcVT == MVT::i8)
811 return AArch64_AM::SXTB;
812 else if (!IsLoadStore && SrcVT == MVT::i16)
813 return AArch64_AM::SXTH;
814 else if (SrcVT == MVT::i32)
815 return AArch64_AM::SXTW;
816 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
817
819 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
820 N.getOpcode() == ISD::ANY_EXTEND) {
821 EVT SrcVT = N.getOperand(0).getValueType();
822 if (!IsLoadStore && SrcVT == MVT::i8)
823 return AArch64_AM::UXTB;
824 else if (!IsLoadStore && SrcVT == MVT::i16)
825 return AArch64_AM::UXTH;
826 else if (SrcVT == MVT::i32)
827 return AArch64_AM::UXTW;
828 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
829
831 } else if (N.getOpcode() == ISD::AND) {
832 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
833 if (!CSD)
835 uint64_t AndMask = CSD->getZExtValue();
836
837 switch (AndMask) {
838 default:
840 case 0xFF:
841 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
842 case 0xFFFF:
843 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
844 case 0xFFFFFFFF:
845 return AArch64_AM::UXTW;
846 }
847 }
848
850}
851
852/// Determine whether it is worth to fold V into an extended register of an
853/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
854/// instruction, and the shift should be treated as worth folding even if has
855/// multiple uses.
856bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
857 // Trivial if we are optimizing for code size or if there is only
858 // one use of the value.
859 if (CurDAG->shouldOptForSize() || V.hasOneUse())
860 return true;
861
862 // If a subtarget has a fastpath LSL we can fold a logical shift into
863 // the add/sub and save a cycle.
864 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
865 V.getConstantOperandVal(1) <= 4 &&
867 return true;
868
869 // It hurts otherwise, since the value will be reused.
870 return false;
871}
872
873/// SelectShiftedRegister - Select a "shifted register" operand. If the value
874/// is not shifted, set the Shift operand to default of "LSL 0". The logical
875/// instructions allow the shifted register to be rotated, but the arithmetic
876/// instructions do not. The AllowROR parameter specifies whether ROR is
877/// supported.
878bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
879 SDValue &Reg, SDValue &Shift) {
880 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
881 return true;
882
884 if (ShType == AArch64_AM::InvalidShiftExtend)
885 return false;
886 if (!AllowROR && ShType == AArch64_AM::ROR)
887 return false;
888
889 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
890 unsigned BitSize = N.getValueSizeInBits();
891 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
892 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
893
894 Reg = N.getOperand(0);
895 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
896 return isWorthFoldingALU(N, true);
897 }
898
899 return false;
900}
901
902/// Instructions that accept extend modifiers like UXTW expect the register
903/// being extended to be a GPR32, but the incoming DAG might be acting on a
904/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
905/// this is the case.
907 if (N.getValueType() == MVT::i32)
908 return N;
909
910 SDLoc dl(N);
911 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
912}
913
914// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
915template<signed Low, signed High, signed Scale>
916bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
917 if (!isa<ConstantSDNode>(N))
918 return false;
919
920 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
921 if ((MulImm % std::abs(Scale)) == 0) {
922 int64_t RDVLImm = MulImm / Scale;
923 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
924 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
925 return true;
926 }
927 }
928
929 return false;
930}
931
932/// SelectArithExtendedRegister - Select a "extended register" operand. This
933/// operand folds in an extend followed by an optional left shift.
934bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
935 SDValue &Shift) {
936 unsigned ShiftVal = 0;
938
939 if (N.getOpcode() == ISD::SHL) {
940 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
941 if (!CSD)
942 return false;
943 ShiftVal = CSD->getZExtValue();
944 if (ShiftVal > 4)
945 return false;
946
947 Ext = getExtendTypeForNode(N.getOperand(0));
949 return false;
950
951 Reg = N.getOperand(0).getOperand(0);
952 } else {
955 return false;
956
957 Reg = N.getOperand(0);
958
959 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
960 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
961 auto isDef32 = [](SDValue N) {
962 unsigned Opc = N.getOpcode();
963 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
964 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
965 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
966 Opc != ISD::FREEZE;
967 };
968 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
969 isDef32(Reg))
970 return false;
971 }
972
973 // AArch64 mandates that the RHS of the operation must use the smallest
974 // register class that could contain the size being extended from. Thus,
975 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
976 // there might not be an actual 32-bit value in the program. We can
977 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
978 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
979 Reg = narrowIfNeeded(CurDAG, Reg);
980 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
981 MVT::i32);
982 return isWorthFoldingALU(N);
983}
984
985/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
986/// operand is refered by the instructions have SP operand
987bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
988 SDValue &Shift) {
989 unsigned ShiftVal = 0;
991
992 if (N.getOpcode() != ISD::SHL)
993 return false;
994
995 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
996 if (!CSD)
997 return false;
998 ShiftVal = CSD->getZExtValue();
999 if (ShiftVal > 4)
1000 return false;
1001
1003 Reg = N.getOperand(0);
1004 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1005 MVT::i32);
1006 return isWorthFoldingALU(N);
1007}
1008
1009/// If there's a use of this ADDlow that's not itself a load/store then we'll
1010/// need to create a real ADD instruction from it anyway and there's no point in
1011/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1012/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1013/// leads to duplicated ADRP instructions.
1015 for (auto *Use : N->uses()) {
1016 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
1017 Use->getOpcode() != ISD::ATOMIC_LOAD &&
1018 Use->getOpcode() != ISD::ATOMIC_STORE)
1019 return false;
1020
1021 // ldar and stlr have much more restrictive addressing modes (just a
1022 // register).
1023 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering()))
1024 return false;
1025 }
1026
1027 return true;
1028}
1029
1030/// Check if the immediate offset is valid as a scaled immediate.
1031static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1032 unsigned Size) {
1033 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1034 Offset < (Range << Log2_32(Size)))
1035 return true;
1036 return false;
1037}
1038
1039/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1040/// immediate" address. The "Size" argument is the size in bytes of the memory
1041/// reference, which determines the scale.
1042bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1043 unsigned BW, unsigned Size,
1044 SDValue &Base,
1045 SDValue &OffImm) {
1046 SDLoc dl(N);
1047 const DataLayout &DL = CurDAG->getDataLayout();
1048 const TargetLowering *TLI = getTargetLowering();
1049 if (N.getOpcode() == ISD::FrameIndex) {
1050 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1051 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1052 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1053 return true;
1054 }
1055
1056 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1057 // selected here doesn't support labels/immediates, only base+offset.
1058 if (CurDAG->isBaseWithConstantOffset(N)) {
1059 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1060 if (IsSignedImm) {
1061 int64_t RHSC = RHS->getSExtValue();
1062 unsigned Scale = Log2_32(Size);
1063 int64_t Range = 0x1LL << (BW - 1);
1064
1065 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1066 RHSC < (Range << Scale)) {
1067 Base = N.getOperand(0);
1068 if (Base.getOpcode() == ISD::FrameIndex) {
1069 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1070 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1071 }
1072 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1073 return true;
1074 }
1075 } else {
1076 // unsigned Immediate
1077 uint64_t RHSC = RHS->getZExtValue();
1078 unsigned Scale = Log2_32(Size);
1079 uint64_t Range = 0x1ULL << BW;
1080
1081 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1082 Base = N.getOperand(0);
1083 if (Base.getOpcode() == ISD::FrameIndex) {
1084 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1085 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1086 }
1087 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1088 return true;
1089 }
1090 }
1091 }
1092 }
1093 // Base only. The address will be materialized into a register before
1094 // the memory is accessed.
1095 // add x0, Xbase, #offset
1096 // stp x1, x2, [x0]
1097 Base = N;
1098 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1099 return true;
1100}
1101
1102/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1103/// immediate" address. The "Size" argument is the size in bytes of the memory
1104/// reference, which determines the scale.
1105bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1106 SDValue &Base, SDValue &OffImm) {
1107 SDLoc dl(N);
1108 const DataLayout &DL = CurDAG->getDataLayout();
1109 const TargetLowering *TLI = getTargetLowering();
1110 if (N.getOpcode() == ISD::FrameIndex) {
1111 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1112 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1113 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1114 return true;
1115 }
1116
1117 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1118 GlobalAddressSDNode *GAN =
1119 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1120 Base = N.getOperand(0);
1121 OffImm = N.getOperand(1);
1122 if (!GAN)
1123 return true;
1124
1125 if (GAN->getOffset() % Size == 0 &&
1127 return true;
1128 }
1129
1130 if (CurDAG->isBaseWithConstantOffset(N)) {
1131 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1132 int64_t RHSC = (int64_t)RHS->getZExtValue();
1133 unsigned Scale = Log2_32(Size);
1134 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1135 Base = N.getOperand(0);
1136 if (Base.getOpcode() == ISD::FrameIndex) {
1137 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1138 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1139 }
1140 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1141 return true;
1142 }
1143 }
1144 }
1145
1146 // Before falling back to our general case, check if the unscaled
1147 // instructions can handle this. If so, that's preferable.
1148 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1149 return false;
1150
1151 // Base only. The address will be materialized into a register before
1152 // the memory is accessed.
1153 // add x0, Xbase, #offset
1154 // ldr x0, [x0]
1155 Base = N;
1156 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1157 return true;
1158}
1159
1160/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1161/// immediate" address. This should only match when there is an offset that
1162/// is not valid for a scaled immediate addressing mode. The "Size" argument
1163/// is the size in bytes of the memory reference, which is needed here to know
1164/// what is valid for a scaled immediate.
1165bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1166 SDValue &Base,
1167 SDValue &OffImm) {
1168 if (!CurDAG->isBaseWithConstantOffset(N))
1169 return false;
1170 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1171 int64_t RHSC = RHS->getSExtValue();
1172 if (RHSC >= -256 && RHSC < 256) {
1173 Base = N.getOperand(0);
1174 if (Base.getOpcode() == ISD::FrameIndex) {
1175 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1176 const TargetLowering *TLI = getTargetLowering();
1177 Base = CurDAG->getTargetFrameIndex(
1178 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1179 }
1180 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1181 return true;
1182 }
1183 }
1184 return false;
1185}
1186
1188 SDLoc dl(N);
1189 SDValue ImpDef = SDValue(
1190 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1191 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1192 N);
1193}
1194
1195/// Check if the given SHL node (\p N), can be used to form an
1196/// extended register for an addressing mode.
1197bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1198 bool WantExtend, SDValue &Offset,
1199 SDValue &SignExtend) {
1200 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1201 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1202 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1203 return false;
1204
1205 SDLoc dl(N);
1206 if (WantExtend) {
1208 getExtendTypeForNode(N.getOperand(0), true);
1210 return false;
1211
1212 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1213 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1214 MVT::i32);
1215 } else {
1216 Offset = N.getOperand(0);
1217 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1218 }
1219
1220 unsigned LegalShiftVal = Log2_32(Size);
1221 unsigned ShiftVal = CSD->getZExtValue();
1222
1223 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1224 return false;
1225
1226 return isWorthFoldingAddr(N, Size);
1227}
1228
1229bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1231 SDValue &SignExtend,
1232 SDValue &DoShift) {
1233 if (N.getOpcode() != ISD::ADD)
1234 return false;
1235 SDValue LHS = N.getOperand(0);
1236 SDValue RHS = N.getOperand(1);
1237 SDLoc dl(N);
1238
1239 // We don't want to match immediate adds here, because they are better lowered
1240 // to the register-immediate addressing modes.
1241 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1242 return false;
1243
1244 // Check if this particular node is reused in any non-memory related
1245 // operation. If yes, do not try to fold this node into the address
1246 // computation, since the computation will be kept.
1247 const SDNode *Node = N.getNode();
1248 for (SDNode *UI : Node->uses()) {
1249 if (!isa<MemSDNode>(*UI))
1250 return false;
1251 }
1252
1253 // Remember if it is worth folding N when it produces extended register.
1254 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1255
1256 // Try to match a shifted extend on the RHS.
1257 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1258 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1259 Base = LHS;
1260 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1261 return true;
1262 }
1263
1264 // Try to match a shifted extend on the LHS.
1265 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1266 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1267 Base = RHS;
1268 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1269 return true;
1270 }
1271
1272 // There was no shift, whatever else we find.
1273 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1274
1276 // Try to match an unshifted extend on the LHS.
1277 if (IsExtendedRegisterWorthFolding &&
1278 (Ext = getExtendTypeForNode(LHS, true)) !=
1280 Base = RHS;
1281 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1282 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1283 MVT::i32);
1284 if (isWorthFoldingAddr(LHS, Size))
1285 return true;
1286 }
1287
1288 // Try to match an unshifted extend on the RHS.
1289 if (IsExtendedRegisterWorthFolding &&
1290 (Ext = getExtendTypeForNode(RHS, true)) !=
1292 Base = LHS;
1293 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1294 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1295 MVT::i32);
1296 if (isWorthFoldingAddr(RHS, Size))
1297 return true;
1298 }
1299
1300 return false;
1301}
1302
1303// Check if the given immediate is preferred by ADD. If an immediate can be
1304// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1305// encoded by one MOVZ, return true.
1306static bool isPreferredADD(int64_t ImmOff) {
1307 // Constant in [0x0, 0xfff] can be encoded in ADD.
1308 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1309 return true;
1310 // Check if it can be encoded in an "ADD LSL #12".
1311 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1312 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1313 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1314 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1315 return false;
1316}
1317
1318bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1320 SDValue &SignExtend,
1321 SDValue &DoShift) {
1322 if (N.getOpcode() != ISD::ADD)
1323 return false;
1324 SDValue LHS = N.getOperand(0);
1325 SDValue RHS = N.getOperand(1);
1326 SDLoc DL(N);
1327
1328 // Check if this particular node is reused in any non-memory related
1329 // operation. If yes, do not try to fold this node into the address
1330 // computation, since the computation will be kept.
1331 const SDNode *Node = N.getNode();
1332 for (SDNode *UI : Node->uses()) {
1333 if (!isa<MemSDNode>(*UI))
1334 return false;
1335 }
1336
1337 // Watch out if RHS is a wide immediate, it can not be selected into
1338 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1339 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1340 // instructions like:
1341 // MOV X0, WideImmediate
1342 // ADD X1, BaseReg, X0
1343 // LDR X2, [X1, 0]
1344 // For such situation, using [BaseReg, XReg] addressing mode can save one
1345 // ADD/SUB:
1346 // MOV X0, WideImmediate
1347 // LDR X2, [BaseReg, X0]
1348 if (isa<ConstantSDNode>(RHS)) {
1349 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1350 // Skip the immediate can be selected by load/store addressing mode.
1351 // Also skip the immediate can be encoded by a single ADD (SUB is also
1352 // checked by using -ImmOff).
1353 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1354 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1355 return false;
1356
1357 SDValue Ops[] = { RHS };
1358 SDNode *MOVI =
1359 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1360 SDValue MOVIV = SDValue(MOVI, 0);
1361 // This ADD of two X register will be selected into [Reg+Reg] mode.
1362 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1363 }
1364
1365 // Remember if it is worth folding N when it produces extended register.
1366 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1367
1368 // Try to match a shifted extend on the RHS.
1369 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1370 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1371 Base = LHS;
1372 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1373 return true;
1374 }
1375
1376 // Try to match a shifted extend on the LHS.
1377 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1378 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1379 Base = RHS;
1380 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1381 return true;
1382 }
1383
1384 // Match any non-shifted, non-extend, non-immediate add expression.
1385 Base = LHS;
1386 Offset = RHS;
1387 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1388 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1389 // Reg1 + Reg2 is free: no check needed.
1390 return true;
1391}
1392
1393SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1394 static const unsigned RegClassIDs[] = {
1395 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1396 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1397 AArch64::dsub2, AArch64::dsub3};
1398
1399 return createTuple(Regs, RegClassIDs, SubRegs);
1400}
1401
1402SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1403 static const unsigned RegClassIDs[] = {
1404 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1405 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1406 AArch64::qsub2, AArch64::qsub3};
1407
1408 return createTuple(Regs, RegClassIDs, SubRegs);
1409}
1410
1411SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1412 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1413 AArch64::ZPR3RegClassID,
1414 AArch64::ZPR4RegClassID};
1415 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1416 AArch64::zsub2, AArch64::zsub3};
1417
1418 return createTuple(Regs, RegClassIDs, SubRegs);
1419}
1420
1421SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1422 assert(Regs.size() == 2 || Regs.size() == 4);
1423
1424 // The createTuple interface requires 3 RegClassIDs for each possible
1425 // tuple type even though we only have them for ZPR2 and ZPR4.
1426 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1427 AArch64::ZPR4Mul4RegClassID};
1428 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1429 AArch64::zsub2, AArch64::zsub3};
1430 return createTuple(Regs, RegClassIDs, SubRegs);
1431}
1432
1433SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1434 const unsigned RegClassIDs[],
1435 const unsigned SubRegs[]) {
1436 // There's no special register-class for a vector-list of 1 element: it's just
1437 // a vector.
1438 if (Regs.size() == 1)
1439 return Regs[0];
1440
1441 assert(Regs.size() >= 2 && Regs.size() <= 4);
1442
1443 SDLoc DL(Regs[0]);
1444
1446
1447 // First operand of REG_SEQUENCE is the desired RegClass.
1448 Ops.push_back(
1449 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1450
1451 // Then we get pairs of source & subregister-position for the components.
1452 for (unsigned i = 0; i < Regs.size(); ++i) {
1453 Ops.push_back(Regs[i]);
1454 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1455 }
1456
1457 SDNode *N =
1458 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1459 return SDValue(N, 0);
1460}
1461
1462void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1463 bool isExt) {
1464 SDLoc dl(N);
1465 EVT VT = N->getValueType(0);
1466
1467 unsigned ExtOff = isExt;
1468
1469 // Form a REG_SEQUENCE to force register allocation.
1470 unsigned Vec0Off = ExtOff + 1;
1471 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1472 N->op_begin() + Vec0Off + NumVecs);
1473 SDValue RegSeq = createQTuple(Regs);
1474
1476 if (isExt)
1477 Ops.push_back(N->getOperand(1));
1478 Ops.push_back(RegSeq);
1479 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1480 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1481}
1482
1483bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1484 LoadSDNode *LD = cast<LoadSDNode>(N);
1485 if (LD->isUnindexed())
1486 return false;
1487 EVT VT = LD->getMemoryVT();
1488 EVT DstVT = N->getValueType(0);
1489 ISD::MemIndexedMode AM = LD->getAddressingMode();
1490 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1491
1492 // We're not doing validity checking here. That was done when checking
1493 // if we should mark the load as indexed or not. We're just selecting
1494 // the right instruction.
1495 unsigned Opcode = 0;
1496
1497 ISD::LoadExtType ExtType = LD->getExtensionType();
1498 bool InsertTo64 = false;
1499 if (VT == MVT::i64)
1500 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1501 else if (VT == MVT::i32) {
1502 if (ExtType == ISD::NON_EXTLOAD)
1503 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1504 else if (ExtType == ISD::SEXTLOAD)
1505 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1506 else {
1507 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1508 InsertTo64 = true;
1509 // The result of the load is only i32. It's the subreg_to_reg that makes
1510 // it into an i64.
1511 DstVT = MVT::i32;
1512 }
1513 } else if (VT == MVT::i16) {
1514 if (ExtType == ISD::SEXTLOAD) {
1515 if (DstVT == MVT::i64)
1516 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1517 else
1518 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1519 } else {
1520 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1521 InsertTo64 = DstVT == MVT::i64;
1522 // The result of the load is only i32. It's the subreg_to_reg that makes
1523 // it into an i64.
1524 DstVT = MVT::i32;
1525 }
1526 } else if (VT == MVT::i8) {
1527 if (ExtType == ISD::SEXTLOAD) {
1528 if (DstVT == MVT::i64)
1529 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1530 else
1531 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1532 } else {
1533 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1534 InsertTo64 = DstVT == MVT::i64;
1535 // The result of the load is only i32. It's the subreg_to_reg that makes
1536 // it into an i64.
1537 DstVT = MVT::i32;
1538 }
1539 } else if (VT == MVT::f16) {
1540 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1541 } else if (VT == MVT::bf16) {
1542 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1543 } else if (VT == MVT::f32) {
1544 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1545 } else if (VT == MVT::f64 || VT.is64BitVector()) {
1546 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1547 } else if (VT.is128BitVector()) {
1548 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1549 } else
1550 return false;
1551 SDValue Chain = LD->getChain();
1552 SDValue Base = LD->getBasePtr();
1553 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1554 int OffsetVal = (int)OffsetOp->getZExtValue();
1555 SDLoc dl(N);
1556 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1557 SDValue Ops[] = { Base, Offset, Chain };
1558 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1559 MVT::Other, Ops);
1560
1561 // Transfer memoperands.
1562 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1563 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1564
1565 // Either way, we're replacing the node, so tell the caller that.
1566 SDValue LoadedVal = SDValue(Res, 1);
1567 if (InsertTo64) {
1568 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1569 LoadedVal =
1570 SDValue(CurDAG->getMachineNode(
1571 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1572 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1573 SubReg),
1574 0);
1575 }
1576
1577 ReplaceUses(SDValue(N, 0), LoadedVal);
1578 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1579 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1580 CurDAG->RemoveDeadNode(N);
1581 return true;
1582}
1583
1584void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1585 unsigned SubRegIdx) {
1586 SDLoc dl(N);
1587 EVT VT = N->getValueType(0);
1588 SDValue Chain = N->getOperand(0);
1589
1590 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1591 Chain};
1592
1593 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1594
1595 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1596 SDValue SuperReg = SDValue(Ld, 0);
1597 for (unsigned i = 0; i < NumVecs; ++i)
1598 ReplaceUses(SDValue(N, i),
1599 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1600
1601 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1602
1603 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1604 // because it's too simple to have needed special treatment during lowering.
1605 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1606 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1607 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1608 }
1609
1610 CurDAG->RemoveDeadNode(N);
1611}
1612
1613void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1614 unsigned Opc, unsigned SubRegIdx) {
1615 SDLoc dl(N);
1616 EVT VT = N->getValueType(0);
1617 SDValue Chain = N->getOperand(0);
1618
1619 SDValue Ops[] = {N->getOperand(1), // Mem operand
1620 N->getOperand(2), // Incremental
1621 Chain};
1622
1623 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1624 MVT::Untyped, MVT::Other};
1625
1626 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1627
1628 // Update uses of write back register
1629 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1630
1631 // Update uses of vector list
1632 SDValue SuperReg = SDValue(Ld, 1);
1633 if (NumVecs == 1)
1634 ReplaceUses(SDValue(N, 0), SuperReg);
1635 else
1636 for (unsigned i = 0; i < NumVecs; ++i)
1637 ReplaceUses(SDValue(N, i),
1638 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1639
1640 // Update the chain
1641 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1642 CurDAG->RemoveDeadNode(N);
1643}
1644
1645/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1646/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1647/// new Base and an SDValue representing the new offset.
1648std::tuple<unsigned, SDValue, SDValue>
1649AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1650 unsigned Opc_ri,
1651 const SDValue &OldBase,
1652 const SDValue &OldOffset,
1653 unsigned Scale) {
1654 SDValue NewBase = OldBase;
1655 SDValue NewOffset = OldOffset;
1656 // Detect a possible Reg+Imm addressing mode.
1657 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1658 N, OldBase, NewBase, NewOffset);
1659
1660 // Detect a possible reg+reg addressing mode, but only if we haven't already
1661 // detected a Reg+Imm one.
1662 const bool IsRegReg =
1663 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1664
1665 // Select the instruction.
1666 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1667}
1668
1669enum class SelectTypeKind {
1670 Int1 = 0,
1671 Int = 1,
1672 FP = 2,
1673 AnyType = 3,
1674};
1675
1676/// This function selects an opcode from a list of opcodes, which is
1677/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1678/// element types, in this order.
1679template <SelectTypeKind Kind>
1680static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1681 // Only match scalable vector VTs
1682 if (!VT.isScalableVector())
1683 return 0;
1684
1685 EVT EltVT = VT.getVectorElementType();
1686 unsigned Key = VT.getVectorMinNumElements();
1687 switch (Kind) {
1689 break;
1691 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1692 EltVT != MVT::i64)
1693 return 0;
1694 break;
1696 if (EltVT != MVT::i1)
1697 return 0;
1698 break;
1699 case SelectTypeKind::FP:
1700 if (EltVT == MVT::bf16)
1701 Key = 16;
1702 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1703 EltVT != MVT::f64)
1704 return 0;
1705 break;
1706 }
1707
1708 unsigned Offset;
1709 switch (Key) {
1710 case 16: // 8-bit or bf16
1711 Offset = 0;
1712 break;
1713 case 8: // 16-bit
1714 Offset = 1;
1715 break;
1716 case 4: // 32-bit
1717 Offset = 2;
1718 break;
1719 case 2: // 64-bit
1720 Offset = 3;
1721 break;
1722 default:
1723 return 0;
1724 }
1725
1726 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1727}
1728
1729// This function is almost identical to SelectWhilePair, but has an
1730// extra check on the range of the immediate operand.
1731// TODO: Merge these two functions together at some point?
1732void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1733 // Immediate can be either 0 or 1.
1734 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1735 if (Imm->getZExtValue() > 1)
1736 return;
1737
1738 SDLoc DL(N);
1739 EVT VT = N->getValueType(0);
1740 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1741 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1742 SDValue SuperReg = SDValue(WhilePair, 0);
1743
1744 for (unsigned I = 0; I < 2; ++I)
1745 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1746 AArch64::psub0 + I, DL, VT, SuperReg));
1747
1748 CurDAG->RemoveDeadNode(N);
1749}
1750
1751void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1752 SDLoc DL(N);
1753 EVT VT = N->getValueType(0);
1754
1755 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1756
1757 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1758 SDValue SuperReg = SDValue(WhilePair, 0);
1759
1760 for (unsigned I = 0; I < 2; ++I)
1761 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1762 AArch64::psub0 + I, DL, VT, SuperReg));
1763
1764 CurDAG->RemoveDeadNode(N);
1765}
1766
1767void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1768 unsigned Opcode) {
1769 EVT VT = N->getValueType(0);
1770 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1771 SDValue Ops = createZTuple(Regs);
1772 SDLoc DL(N);
1773 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1774 SDValue SuperReg = SDValue(Intrinsic, 0);
1775 for (unsigned i = 0; i < NumVecs; ++i)
1776 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1777 AArch64::zsub0 + i, DL, VT, SuperReg));
1778
1779 CurDAG->RemoveDeadNode(N);
1780}
1781
1782void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1783 unsigned NumVecs,
1784 bool IsZmMulti,
1785 unsigned Opcode,
1786 bool HasPred) {
1787 assert(Opcode != 0 && "Unexpected opcode");
1788
1789 SDLoc DL(N);
1790 EVT VT = N->getValueType(0);
1791 unsigned FirstVecIdx = HasPred ? 2 : 1;
1792
1793 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1794 SmallVector<SDValue, 4> Regs(N->op_begin() + StartIdx,
1795 N->op_begin() + StartIdx + NumVecs);
1796 return createZMulTuple(Regs);
1797 };
1798
1799 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1800
1801 SDValue Zm;
1802 if (IsZmMulti)
1803 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1804 else
1805 Zm = N->getOperand(NumVecs + FirstVecIdx);
1806
1808 if (HasPred)
1809 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1810 N->getOperand(1), Zdn, Zm);
1811 else
1812 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1813 SDValue SuperReg = SDValue(Intrinsic, 0);
1814 for (unsigned i = 0; i < NumVecs; ++i)
1815 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1816 AArch64::zsub0 + i, DL, VT, SuperReg));
1817
1818 CurDAG->RemoveDeadNode(N);
1819}
1820
1821void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1822 unsigned Scale, unsigned Opc_ri,
1823 unsigned Opc_rr, bool IsIntr) {
1824 assert(Scale < 5 && "Invalid scaling value.");
1825 SDLoc DL(N);
1826 EVT VT = N->getValueType(0);
1827 SDValue Chain = N->getOperand(0);
1828
1829 // Optimize addressing mode.
1831 unsigned Opc;
1832 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1833 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1834 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1835
1836 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1837 Base, // Memory operand
1838 Offset, Chain};
1839
1840 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1841
1842 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1843 SDValue SuperReg = SDValue(Load, 0);
1844 for (unsigned i = 0; i < NumVecs; ++i)
1845 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1846 AArch64::zsub0 + i, DL, VT, SuperReg));
1847
1848 // Copy chain
1849 unsigned ChainIdx = NumVecs;
1850 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1851 CurDAG->RemoveDeadNode(N);
1852}
1853
1854void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
1855 unsigned NumVecs,
1856 unsigned Scale,
1857 unsigned Opc_ri,
1858 unsigned Opc_rr) {
1859 assert(Scale < 4 && "Invalid scaling value.");
1860 SDLoc DL(N);
1861 EVT VT = N->getValueType(0);
1862 SDValue Chain = N->getOperand(0);
1863
1864 SDValue PNg = N->getOperand(2);
1865 SDValue Base = N->getOperand(3);
1866 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
1867 unsigned Opc;
1868 std::tie(Opc, Base, Offset) =
1869 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
1870
1871 SDValue Ops[] = {PNg, // Predicate-as-counter
1872 Base, // Memory operand
1873 Offset, Chain};
1874
1875 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1876
1877 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1878 SDValue SuperReg = SDValue(Load, 0);
1879 for (unsigned i = 0; i < NumVecs; ++i)
1880 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1881 AArch64::zsub0 + i, DL, VT, SuperReg));
1882
1883 // Copy chain
1884 unsigned ChainIdx = NumVecs;
1885 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1886 CurDAG->RemoveDeadNode(N);
1887}
1888
1889void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
1890 unsigned Opcode) {
1891 if (N->getValueType(0) != MVT::nxv4f32)
1892 return;
1893 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
1894}
1895
1896void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
1897 unsigned NumOutVecs,
1898 unsigned Opc, uint32_t MaxImm) {
1899 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
1900 if (Imm->getZExtValue() > MaxImm)
1901 return;
1902
1903 SDValue ZtValue;
1904 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
1905 return;
1906 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
1907 SDLoc DL(Node);
1908 EVT VT = Node->getValueType(0);
1909
1911 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
1912 SDValue SuperReg = SDValue(Instruction, 0);
1913
1914 for (unsigned I = 0; I < NumOutVecs; ++I)
1915 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
1916 AArch64::zsub0 + I, DL, VT, SuperReg));
1917
1918 // Copy chain
1919 unsigned ChainIdx = NumOutVecs;
1920 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
1921 CurDAG->RemoveDeadNode(Node);
1922}
1923
1924void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
1925 unsigned Op) {
1926 SDLoc DL(N);
1927 EVT VT = N->getValueType(0);
1928
1929 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1930 SDValue Zd = createZMulTuple(Regs);
1931 SDValue Zn = N->getOperand(1 + NumVecs);
1932 SDValue Zm = N->getOperand(2 + NumVecs);
1933
1934 SDValue Ops[] = {Zd, Zn, Zm};
1935
1936 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
1937 SDValue SuperReg = SDValue(Intrinsic, 0);
1938 for (unsigned i = 0; i < NumVecs; ++i)
1939 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1940 AArch64::zsub0 + i, DL, VT, SuperReg));
1941
1942 CurDAG->RemoveDeadNode(N);
1943}
1944
1945bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
1946 switch (BaseReg) {
1947 default:
1948 return false;
1949 case AArch64::ZA:
1950 case AArch64::ZAB0:
1951 if (TileNum == 0)
1952 break;
1953 return false;
1954 case AArch64::ZAH0:
1955 if (TileNum <= 1)
1956 break;
1957 return false;
1958 case AArch64::ZAS0:
1959 if (TileNum <= 3)
1960 break;
1961 return false;
1962 case AArch64::ZAD0:
1963 if (TileNum <= 7)
1964 break;
1965 return false;
1966 }
1967
1968 BaseReg += TileNum;
1969 return true;
1970}
1971
1972template <unsigned MaxIdx, unsigned Scale>
1973void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
1974 unsigned BaseReg, unsigned Op) {
1975 unsigned TileNum = 0;
1976 if (BaseReg != AArch64::ZA)
1977 TileNum = N->getConstantOperandVal(2);
1978
1979 if (!SelectSMETile(BaseReg, TileNum))
1980 return;
1981
1982 SDValue SliceBase, Base, Offset;
1983 if (BaseReg == AArch64::ZA)
1984 SliceBase = N->getOperand(2);
1985 else
1986 SliceBase = N->getOperand(3);
1987
1988 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
1989 return;
1990
1991 SDLoc DL(N);
1992 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
1993 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
1994 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
1995
1996 EVT VT = N->getValueType(0);
1997 for (unsigned I = 0; I < NumVecs; ++I)
1998 ReplaceUses(SDValue(N, I),
1999 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2000 SDValue(Mov, 0)));
2001 // Copy chain
2002 unsigned ChainIdx = NumVecs;
2003 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2004 CurDAG->RemoveDeadNode(N);
2005}
2006
2007void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2008 unsigned Op, unsigned MaxIdx,
2009 unsigned Scale) {
2010
2011 SDValue SliceBase = N->getOperand(3);
2013 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2014 return;
2015 // The correct Za tile number is computed in Machine Instruction
2016 // See EmitZAInstr
2017 // DAG cannot select Za tile as an output register with ZReg
2018 SDLoc DL(N);
2019 SDValue Ops[] = {/*TileNum*/ N->getOperand(2), Base, Offset,
2020 /*Chain*/ N->getOperand(0)};
2021 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2022
2023 EVT VT = N->getValueType(0);
2024 for (unsigned I = 0; I < NumVecs; ++I)
2025 ReplaceUses(SDValue(N, I),
2026 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2027 SDValue(Mov, 0)));
2028
2029 // Copy chain
2030 unsigned ChainIdx = NumVecs;
2031 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2032 CurDAG->RemoveDeadNode(N);
2033}
2034
2035void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2036 unsigned NumOutVecs,
2037 bool IsTupleInput,
2038 unsigned Opc) {
2039 SDLoc DL(N);
2040 EVT VT = N->getValueType(0);
2041 unsigned NumInVecs = N->getNumOperands() - 1;
2042
2044 if (IsTupleInput) {
2045 assert((NumInVecs == 2 || NumInVecs == 4) &&
2046 "Don't know how to handle multi-register input!");
2047 SmallVector<SDValue, 4> Regs(N->op_begin() + 1,
2048 N->op_begin() + 1 + NumInVecs);
2049 Ops.push_back(createZMulTuple(Regs));
2050 } else {
2051 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2052 for (unsigned I = 0; I < NumInVecs; I++)
2053 Ops.push_back(N->getOperand(1 + I));
2054 }
2055
2056 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2057 SDValue SuperReg = SDValue(Res, 0);
2058
2059 for (unsigned I = 0; I < NumOutVecs; I++)
2060 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2061 AArch64::zsub0 + I, DL, VT, SuperReg));
2062 CurDAG->RemoveDeadNode(N);
2063}
2064
2065void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2066 unsigned Opc) {
2067 SDLoc dl(N);
2068 EVT VT = N->getOperand(2)->getValueType(0);
2069
2070 // Form a REG_SEQUENCE to force register allocation.
2071 bool Is128Bit = VT.getSizeInBits() == 128;
2072 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2073 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2074
2075 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2076 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2077
2078 // Transfer memoperands.
2079 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2080 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2081
2082 ReplaceNode(N, St);
2083}
2084
2085void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2086 unsigned Scale, unsigned Opc_rr,
2087 unsigned Opc_ri) {
2088 SDLoc dl(N);
2089
2090 // Form a REG_SEQUENCE to force register allocation.
2091 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2092 SDValue RegSeq = createZTuple(Regs);
2093
2094 // Optimize addressing mode.
2095 unsigned Opc;
2097 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2098 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2099 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2100
2101 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2102 Base, // address
2103 Offset, // offset
2104 N->getOperand(0)}; // chain
2105 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2106
2107 ReplaceNode(N, St);
2108}
2109
2110bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2111 SDValue &OffImm) {
2112 SDLoc dl(N);
2113 const DataLayout &DL = CurDAG->getDataLayout();
2114 const TargetLowering *TLI = getTargetLowering();
2115
2116 // Try to match it for the frame address
2117 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2118 int FI = FINode->getIndex();
2119 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2120 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2121 return true;
2122 }
2123
2124 return false;
2125}
2126
2127void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2128 unsigned Opc) {
2129 SDLoc dl(N);
2130 EVT VT = N->getOperand(2)->getValueType(0);
2131 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2132 MVT::Other}; // Type for the Chain
2133
2134 // Form a REG_SEQUENCE to force register allocation.
2135 bool Is128Bit = VT.getSizeInBits() == 128;
2136 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2137 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2138
2139 SDValue Ops[] = {RegSeq,
2140 N->getOperand(NumVecs + 1), // base register
2141 N->getOperand(NumVecs + 2), // Incremental
2142 N->getOperand(0)}; // Chain
2143 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2144
2145 ReplaceNode(N, St);
2146}
2147
2148namespace {
2149/// WidenVector - Given a value in the V64 register class, produce the
2150/// equivalent value in the V128 register class.
2151class WidenVector {
2152 SelectionDAG &DAG;
2153
2154public:
2155 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2156
2157 SDValue operator()(SDValue V64Reg) {
2158 EVT VT = V64Reg.getValueType();
2159 unsigned NarrowSize = VT.getVectorNumElements();
2160 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2161 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2162 SDLoc DL(V64Reg);
2163
2164 SDValue Undef =
2165 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2166 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2167 }
2168};
2169} // namespace
2170
2171/// NarrowVector - Given a value in the V128 register class, produce the
2172/// equivalent value in the V64 register class.
2174 EVT VT = V128Reg.getValueType();
2175 unsigned WideSize = VT.getVectorNumElements();
2176 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2177 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2178
2179 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2180 V128Reg);
2181}
2182
2183void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2184 unsigned Opc) {
2185 SDLoc dl(N);
2186 EVT VT = N->getValueType(0);
2187 bool Narrow = VT.getSizeInBits() == 64;
2188
2189 // Form a REG_SEQUENCE to force register allocation.
2190 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2191
2192 if (Narrow)
2193 transform(Regs, Regs.begin(),
2194 WidenVector(*CurDAG));
2195
2196 SDValue RegSeq = createQTuple(Regs);
2197
2198 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2199
2200 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2201
2202 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2203 N->getOperand(NumVecs + 3), N->getOperand(0)};
2204 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2205 SDValue SuperReg = SDValue(Ld, 0);
2206
2207 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2208 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2209 AArch64::qsub2, AArch64::qsub3 };
2210 for (unsigned i = 0; i < NumVecs; ++i) {
2211 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2212 if (Narrow)
2213 NV = NarrowVector(NV, *CurDAG);
2214 ReplaceUses(SDValue(N, i), NV);
2215 }
2216
2217 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2218 CurDAG->RemoveDeadNode(N);
2219}
2220
2221void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2222 unsigned Opc) {
2223 SDLoc dl(N);
2224 EVT VT = N->getValueType(0);
2225 bool Narrow = VT.getSizeInBits() == 64;
2226
2227 // Form a REG_SEQUENCE to force register allocation.
2228 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2229
2230 if (Narrow)
2231 transform(Regs, Regs.begin(),
2232 WidenVector(*CurDAG));
2233
2234 SDValue RegSeq = createQTuple(Regs);
2235
2236 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2237 RegSeq->getValueType(0), MVT::Other};
2238
2239 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2240
2241 SDValue Ops[] = {RegSeq,
2242 CurDAG->getTargetConstant(LaneNo, dl,
2243 MVT::i64), // Lane Number
2244 N->getOperand(NumVecs + 2), // Base register
2245 N->getOperand(NumVecs + 3), // Incremental
2246 N->getOperand(0)};
2247 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2248
2249 // Update uses of the write back register
2250 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2251
2252 // Update uses of the vector list
2253 SDValue SuperReg = SDValue(Ld, 1);
2254 if (NumVecs == 1) {
2255 ReplaceUses(SDValue(N, 0),
2256 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2257 } else {
2258 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2259 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2260 AArch64::qsub2, AArch64::qsub3 };
2261 for (unsigned i = 0; i < NumVecs; ++i) {
2262 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2263 SuperReg);
2264 if (Narrow)
2265 NV = NarrowVector(NV, *CurDAG);
2266 ReplaceUses(SDValue(N, i), NV);
2267 }
2268 }
2269
2270 // Update the Chain
2271 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2272 CurDAG->RemoveDeadNode(N);
2273}
2274
2275void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2276 unsigned Opc) {
2277 SDLoc dl(N);
2278 EVT VT = N->getOperand(2)->getValueType(0);
2279 bool Narrow = VT.getSizeInBits() == 64;
2280
2281 // Form a REG_SEQUENCE to force register allocation.
2282 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2283
2284 if (Narrow)
2285 transform(Regs, Regs.begin(),
2286 WidenVector(*CurDAG));
2287
2288 SDValue RegSeq = createQTuple(Regs);
2289
2290 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2291
2292 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2293 N->getOperand(NumVecs + 3), N->getOperand(0)};
2294 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2295
2296 // Transfer memoperands.
2297 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2298 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2299
2300 ReplaceNode(N, St);
2301}
2302
2303void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2304 unsigned Opc) {
2305 SDLoc dl(N);
2306 EVT VT = N->getOperand(2)->getValueType(0);
2307 bool Narrow = VT.getSizeInBits() == 64;
2308
2309 // Form a REG_SEQUENCE to force register allocation.
2310 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2311
2312 if (Narrow)
2313 transform(Regs, Regs.begin(),
2314 WidenVector(*CurDAG));
2315
2316 SDValue RegSeq = createQTuple(Regs);
2317
2318 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2319 MVT::Other};
2320
2321 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2322
2323 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2324 N->getOperand(NumVecs + 2), // Base Register
2325 N->getOperand(NumVecs + 3), // Incremental
2326 N->getOperand(0)};
2327 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2328
2329 // Transfer memoperands.
2330 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2331 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2332
2333 ReplaceNode(N, St);
2334}
2335
2337 unsigned &Opc, SDValue &Opd0,
2338 unsigned &LSB, unsigned &MSB,
2339 unsigned NumberOfIgnoredLowBits,
2340 bool BiggerPattern) {
2341 assert(N->getOpcode() == ISD::AND &&
2342 "N must be a AND operation to call this function");
2343
2344 EVT VT = N->getValueType(0);
2345
2346 // Here we can test the type of VT and return false when the type does not
2347 // match, but since it is done prior to that call in the current context
2348 // we turned that into an assert to avoid redundant code.
2349 assert((VT == MVT::i32 || VT == MVT::i64) &&
2350 "Type checking must have been done before calling this function");
2351
2352 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2353 // changed the AND node to a 32-bit mask operation. We'll have to
2354 // undo that as part of the transform here if we want to catch all
2355 // the opportunities.
2356 // Currently the NumberOfIgnoredLowBits argument helps to recover
2357 // from these situations when matching bigger pattern (bitfield insert).
2358
2359 // For unsigned extracts, check for a shift right and mask
2360 uint64_t AndImm = 0;
2361 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2362 return false;
2363
2364 const SDNode *Op0 = N->getOperand(0).getNode();
2365
2366 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2367 // simplified. Try to undo that
2368 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2369
2370 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2371 if (AndImm & (AndImm + 1))
2372 return false;
2373
2374 bool ClampMSB = false;
2375 uint64_t SrlImm = 0;
2376 // Handle the SRL + ANY_EXTEND case.
2377 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2378 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2379 // Extend the incoming operand of the SRL to 64-bit.
2380 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2381 // Make sure to clamp the MSB so that we preserve the semantics of the
2382 // original operations.
2383 ClampMSB = true;
2384 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2386 SrlImm)) {
2387 // If the shift result was truncated, we can still combine them.
2388 Opd0 = Op0->getOperand(0).getOperand(0);
2389
2390 // Use the type of SRL node.
2391 VT = Opd0->getValueType(0);
2392 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2393 Opd0 = Op0->getOperand(0);
2394 ClampMSB = (VT == MVT::i32);
2395 } else if (BiggerPattern) {
2396 // Let's pretend a 0 shift right has been performed.
2397 // The resulting code will be at least as good as the original one
2398 // plus it may expose more opportunities for bitfield insert pattern.
2399 // FIXME: Currently we limit this to the bigger pattern, because
2400 // some optimizations expect AND and not UBFM.
2401 Opd0 = N->getOperand(0);
2402 } else
2403 return false;
2404
2405 // Bail out on large immediates. This happens when no proper
2406 // combining/constant folding was performed.
2407 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2408 LLVM_DEBUG(
2409 (dbgs() << N
2410 << ": Found large shift immediate, this should not happen\n"));
2411 return false;
2412 }
2413
2414 LSB = SrlImm;
2415 MSB = SrlImm +
2416 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2417 : llvm::countr_one<uint64_t>(AndImm)) -
2418 1;
2419 if (ClampMSB)
2420 // Since we're moving the extend before the right shift operation, we need
2421 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2422 // the zeros which would get shifted in with the original right shift
2423 // operation.
2424 MSB = MSB > 31 ? 31 : MSB;
2425
2426 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2427 return true;
2428}
2429
2430static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2431 SDValue &Opd0, unsigned &Immr,
2432 unsigned &Imms) {
2433 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2434
2435 EVT VT = N->getValueType(0);
2436 unsigned BitWidth = VT.getSizeInBits();
2437 assert((VT == MVT::i32 || VT == MVT::i64) &&
2438 "Type checking must have been done before calling this function");
2439
2440 SDValue Op = N->getOperand(0);
2441 if (Op->getOpcode() == ISD::TRUNCATE) {
2442 Op = Op->getOperand(0);
2443 VT = Op->getValueType(0);
2444 BitWidth = VT.getSizeInBits();
2445 }
2446
2447 uint64_t ShiftImm;
2448 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2449 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2450 return false;
2451
2452 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2453 if (ShiftImm + Width > BitWidth)
2454 return false;
2455
2456 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2457 Opd0 = Op.getOperand(0);
2458 Immr = ShiftImm;
2459 Imms = ShiftImm + Width - 1;
2460 return true;
2461}
2462
2463static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2464 SDValue &Opd0, unsigned &LSB,
2465 unsigned &MSB) {
2466 // We are looking for the following pattern which basically extracts several
2467 // continuous bits from the source value and places it from the LSB of the
2468 // destination value, all other bits of the destination value or set to zero:
2469 //
2470 // Value2 = AND Value, MaskImm
2471 // SRL Value2, ShiftImm
2472 //
2473 // with MaskImm >> ShiftImm to search for the bit width.
2474 //
2475 // This gets selected into a single UBFM:
2476 //
2477 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2478 //
2479
2480 if (N->getOpcode() != ISD::SRL)
2481 return false;
2482
2483 uint64_t AndMask = 0;
2484 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2485 return false;
2486
2487 Opd0 = N->getOperand(0).getOperand(0);
2488
2489 uint64_t SrlImm = 0;
2490 if (!isIntImmediate(N->getOperand(1), SrlImm))
2491 return false;
2492
2493 // Check whether we really have several bits extract here.
2494 if (!isMask_64(AndMask >> SrlImm))
2495 return false;
2496
2497 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2498 LSB = SrlImm;
2499 MSB = llvm::Log2_64(AndMask);
2500 return true;
2501}
2502
2503static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2504 unsigned &Immr, unsigned &Imms,
2505 bool BiggerPattern) {
2506 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2507 "N must be a SHR/SRA operation to call this function");
2508
2509 EVT VT = N->getValueType(0);
2510
2511 // Here we can test the type of VT and return false when the type does not
2512 // match, but since it is done prior to that call in the current context
2513 // we turned that into an assert to avoid redundant code.
2514 assert((VT == MVT::i32 || VT == MVT::i64) &&
2515 "Type checking must have been done before calling this function");
2516
2517 // Check for AND + SRL doing several bits extract.
2518 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2519 return true;
2520
2521 // We're looking for a shift of a shift.
2522 uint64_t ShlImm = 0;
2523 uint64_t TruncBits = 0;
2524 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2525 Opd0 = N->getOperand(0).getOperand(0);
2526 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2527 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2528 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2529 // be considered as setting high 32 bits as zero. Our strategy here is to
2530 // always generate 64bit UBFM. This consistency will help the CSE pass
2531 // later find more redundancy.
2532 Opd0 = N->getOperand(0).getOperand(0);
2533 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2534 VT = Opd0.getValueType();
2535 assert(VT == MVT::i64 && "the promoted type should be i64");
2536 } else if (BiggerPattern) {
2537 // Let's pretend a 0 shift left has been performed.
2538 // FIXME: Currently we limit this to the bigger pattern case,
2539 // because some optimizations expect AND and not UBFM
2540 Opd0 = N->getOperand(0);
2541 } else
2542 return false;
2543
2544 // Missing combines/constant folding may have left us with strange
2545 // constants.
2546 if (ShlImm >= VT.getSizeInBits()) {
2547 LLVM_DEBUG(
2548 (dbgs() << N
2549 << ": Found large shift immediate, this should not happen\n"));
2550 return false;
2551 }
2552
2553 uint64_t SrlImm = 0;
2554 if (!isIntImmediate(N->getOperand(1), SrlImm))
2555 return false;
2556
2557 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2558 "bad amount in shift node!");
2559 int immr = SrlImm - ShlImm;
2560 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2561 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2562 // SRA requires a signed extraction
2563 if (VT == MVT::i32)
2564 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2565 else
2566 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2567 return true;
2568}
2569
2570bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2571 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2572
2573 EVT VT = N->getValueType(0);
2574 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2575 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2576 return false;
2577
2578 uint64_t ShiftImm;
2579 SDValue Op = N->getOperand(0);
2580 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2581 return false;
2582
2583 SDLoc dl(N);
2584 // Extend the incoming operand of the shift to 64-bits.
2585 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2586 unsigned Immr = ShiftImm;
2587 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2588 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2589 CurDAG->getTargetConstant(Imms, dl, VT)};
2590 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2591 return true;
2592}
2593
2594static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2595 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2596 unsigned NumberOfIgnoredLowBits = 0,
2597 bool BiggerPattern = false) {
2598 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2599 return false;
2600
2601 switch (N->getOpcode()) {
2602 default:
2603 if (!N->isMachineOpcode())
2604 return false;
2605 break;
2606 case ISD::AND:
2607 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2608 NumberOfIgnoredLowBits, BiggerPattern);
2609 case ISD::SRL:
2610 case ISD::SRA:
2611 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2612
2614 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2615 }
2616
2617 unsigned NOpc = N->getMachineOpcode();
2618 switch (NOpc) {
2619 default:
2620 return false;
2621 case AArch64::SBFMWri:
2622 case AArch64::UBFMWri:
2623 case AArch64::SBFMXri:
2624 case AArch64::UBFMXri:
2625 Opc = NOpc;
2626 Opd0 = N->getOperand(0);
2627 Immr = N->getConstantOperandVal(1);
2628 Imms = N->getConstantOperandVal(2);
2629 return true;
2630 }
2631 // Unreachable
2632 return false;
2633}
2634
2635bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2636 unsigned Opc, Immr, Imms;
2637 SDValue Opd0;
2638 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2639 return false;
2640
2641 EVT VT = N->getValueType(0);
2642 SDLoc dl(N);
2643
2644 // If the bit extract operation is 64bit but the original type is 32bit, we
2645 // need to add one EXTRACT_SUBREG.
2646 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2647 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2648 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2649
2650 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2651 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2652 MVT::i32, SDValue(BFM, 0));
2653 ReplaceNode(N, Inner.getNode());
2654 return true;
2655 }
2656
2657 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2658 CurDAG->getTargetConstant(Imms, dl, VT)};
2659 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2660 return true;
2661}
2662
2663/// Does DstMask form a complementary pair with the mask provided by
2664/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2665/// this asks whether DstMask zeroes precisely those bits that will be set by
2666/// the other half.
2667static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2668 unsigned NumberOfIgnoredHighBits, EVT VT) {
2669 assert((VT == MVT::i32 || VT == MVT::i64) &&
2670 "i32 or i64 mask type expected!");
2671 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2672
2673 APInt SignificantDstMask = APInt(BitWidth, DstMask);
2674 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2675
2676 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2677 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2678}
2679
2680// Look for bits that will be useful for later uses.
2681// A bit is consider useless as soon as it is dropped and never used
2682// before it as been dropped.
2683// E.g., looking for useful bit of x
2684// 1. y = x & 0x7
2685// 2. z = y >> 2
2686// After #1, x useful bits are 0x7, then the useful bits of x, live through
2687// y.
2688// After #2, the useful bits of x are 0x4.
2689// However, if x is used on an unpredicatable instruction, then all its bits
2690// are useful.
2691// E.g.
2692// 1. y = x & 0x7
2693// 2. z = y >> 2
2694// 3. str x, [@x]
2695static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2696
2698 unsigned Depth) {
2699 uint64_t Imm =
2700 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2701 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2702 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2703 getUsefulBits(Op, UsefulBits, Depth + 1);
2704}
2705
2707 uint64_t Imm, uint64_t MSB,
2708 unsigned Depth) {
2709 // inherit the bitwidth value
2710 APInt OpUsefulBits(UsefulBits);
2711 OpUsefulBits = 1;
2712
2713 if (MSB >= Imm) {
2714 OpUsefulBits <<= MSB - Imm + 1;
2715 --OpUsefulBits;
2716 // The interesting part will be in the lower part of the result
2717 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2718 // The interesting part was starting at Imm in the argument
2719 OpUsefulBits <<= Imm;
2720 } else {
2721 OpUsefulBits <<= MSB + 1;
2722 --OpUsefulBits;
2723 // The interesting part will be shifted in the result
2724 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2725 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2726 // The interesting part was at zero in the argument
2727 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2728 }
2729
2730 UsefulBits &= OpUsefulBits;
2731}
2732
2733static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2734 unsigned Depth) {
2735 uint64_t Imm =
2736 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2737 uint64_t MSB =
2738 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2739
2740 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2741}
2742
2744 unsigned Depth) {
2745 uint64_t ShiftTypeAndValue =
2746 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2747 APInt Mask(UsefulBits);
2748 Mask.clearAllBits();
2749 Mask.flipAllBits();
2750
2751 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2752 // Shift Left
2753 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2754 Mask <<= ShiftAmt;
2755 getUsefulBits(Op, Mask, Depth + 1);
2756 Mask.lshrInPlace(ShiftAmt);
2757 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2758 // Shift Right
2759 // We do not handle AArch64_AM::ASR, because the sign will change the
2760 // number of useful bits
2761 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2762 Mask.lshrInPlace(ShiftAmt);
2763 getUsefulBits(Op, Mask, Depth + 1);
2764 Mask <<= ShiftAmt;
2765 } else
2766 return;
2767
2768 UsefulBits &= Mask;
2769}
2770
2771static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2772 unsigned Depth) {
2773 uint64_t Imm =
2774 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2775 uint64_t MSB =
2776 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2777
2778 APInt OpUsefulBits(UsefulBits);
2779 OpUsefulBits = 1;
2780
2781 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2782 ResultUsefulBits.flipAllBits();
2783 APInt Mask(UsefulBits.getBitWidth(), 0);
2784
2785 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2786
2787 if (MSB >= Imm) {
2788 // The instruction is a BFXIL.
2789 uint64_t Width = MSB - Imm + 1;
2790 uint64_t LSB = Imm;
2791
2792 OpUsefulBits <<= Width;
2793 --OpUsefulBits;
2794
2795 if (Op.getOperand(1) == Orig) {
2796 // Copy the low bits from the result to bits starting from LSB.
2797 Mask = ResultUsefulBits & OpUsefulBits;
2798 Mask <<= LSB;
2799 }
2800
2801 if (Op.getOperand(0) == Orig)
2802 // Bits starting from LSB in the input contribute to the result.
2803 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2804 } else {
2805 // The instruction is a BFI.
2806 uint64_t Width = MSB + 1;
2807 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2808
2809 OpUsefulBits <<= Width;
2810 --OpUsefulBits;
2811 OpUsefulBits <<= LSB;
2812
2813 if (Op.getOperand(1) == Orig) {
2814 // Copy the bits from the result to the zero bits.
2815 Mask = ResultUsefulBits & OpUsefulBits;
2816 Mask.lshrInPlace(LSB);
2817 }
2818
2819 if (Op.getOperand(0) == Orig)
2820 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2821 }
2822
2823 UsefulBits &= Mask;
2824}
2825
2826static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2827 SDValue Orig, unsigned Depth) {
2828
2829 // Users of this node should have already been instruction selected
2830 // FIXME: Can we turn that into an assert?
2831 if (!UserNode->isMachineOpcode())
2832 return;
2833
2834 switch (UserNode->getMachineOpcode()) {
2835 default:
2836 return;
2837 case AArch64::ANDSWri:
2838 case AArch64::ANDSXri:
2839 case AArch64::ANDWri:
2840 case AArch64::ANDXri:
2841 // We increment Depth only when we call the getUsefulBits
2842 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2843 Depth);
2844 case AArch64::UBFMWri:
2845 case AArch64::UBFMXri:
2846 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2847
2848 case AArch64::ORRWrs:
2849 case AArch64::ORRXrs:
2850 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
2851 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2852 Depth);
2853 return;
2854 case AArch64::BFMWri:
2855 case AArch64::BFMXri:
2856 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2857
2858 case AArch64::STRBBui:
2859 case AArch64::STURBBi:
2860 if (UserNode->getOperand(0) != Orig)
2861 return;
2862 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2863 return;
2864
2865 case AArch64::STRHHui:
2866 case AArch64::STURHHi:
2867 if (UserNode->getOperand(0) != Orig)
2868 return;
2869 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2870 return;
2871 }
2872}
2873
2874static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2876 return;
2877 // Initialize UsefulBits
2878 if (!Depth) {
2879 unsigned Bitwidth = Op.getScalarValueSizeInBits();
2880 // At the beginning, assume every produced bits is useful
2881 UsefulBits = APInt(Bitwidth, 0);
2882 UsefulBits.flipAllBits();
2883 }
2884 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2885
2886 for (SDNode *Node : Op.getNode()->uses()) {
2887 // A use cannot produce useful bits
2888 APInt UsefulBitsForUse = APInt(UsefulBits);
2889 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2890 UsersUsefulBits |= UsefulBitsForUse;
2891 }
2892 // UsefulBits contains the produced bits that are meaningful for the
2893 // current definition, thus a user cannot make a bit meaningful at
2894 // this point
2895 UsefulBits &= UsersUsefulBits;
2896}
2897
2898/// Create a machine node performing a notional SHL of Op by ShlAmount. If
2899/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2900/// 0, return Op unchanged.
2901static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2902 if (ShlAmount == 0)
2903 return Op;
2904
2905 EVT VT = Op.getValueType();
2906 SDLoc dl(Op);
2907 unsigned BitWidth = VT.getSizeInBits();
2908 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2909
2910 SDNode *ShiftNode;
2911 if (ShlAmount > 0) {
2912 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2913 ShiftNode = CurDAG->getMachineNode(
2914 UBFMOpc, dl, VT, Op,
2915 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2916 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2917 } else {
2918 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2919 assert(ShlAmount < 0 && "expected right shift");
2920 int ShrAmount = -ShlAmount;
2921 ShiftNode = CurDAG->getMachineNode(
2922 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2923 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2924 }
2925
2926 return SDValue(ShiftNode, 0);
2927}
2928
2929// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
2931 bool BiggerPattern,
2932 const uint64_t NonZeroBits,
2933 SDValue &Src, int &DstLSB,
2934 int &Width);
2935
2936// For bit-field-positioning pattern "shl VAL, N)".
2938 bool BiggerPattern,
2939 const uint64_t NonZeroBits,
2940 SDValue &Src, int &DstLSB,
2941 int &Width);
2942
2943/// Does this tree qualify as an attempt to move a bitfield into position,
2944/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
2946 bool BiggerPattern, SDValue &Src,
2947 int &DstLSB, int &Width) {
2948 EVT VT = Op.getValueType();
2949 unsigned BitWidth = VT.getSizeInBits();
2950 (void)BitWidth;
2951 assert(BitWidth == 32 || BitWidth == 64);
2952
2953 KnownBits Known = CurDAG->computeKnownBits(Op);
2954
2955 // Non-zero in the sense that they're not provably zero, which is the key
2956 // point if we want to use this value
2957 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2958 if (!isShiftedMask_64(NonZeroBits))
2959 return false;
2960
2961 switch (Op.getOpcode()) {
2962 default:
2963 break;
2964 case ISD::AND:
2965 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
2966 NonZeroBits, Src, DstLSB, Width);
2967 case ISD::SHL:
2968 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
2969 NonZeroBits, Src, DstLSB, Width);
2970 }
2971
2972 return false;
2973}
2974
2976 bool BiggerPattern,
2977 const uint64_t NonZeroBits,
2978 SDValue &Src, int &DstLSB,
2979 int &Width) {
2980 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
2981
2982 EVT VT = Op.getValueType();
2983 assert((VT == MVT::i32 || VT == MVT::i64) &&
2984 "Caller guarantees VT is one of i32 or i64");
2985 (void)VT;
2986
2987 uint64_t AndImm;
2988 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
2989 return false;
2990
2991 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
2992 // 1) (AndImm & (1 << POS) == 0)
2993 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
2994 //
2995 // 1) and 2) don't agree so something must be wrong (e.g., in
2996 // 'SelectionDAG::computeKnownBits')
2997 assert((~AndImm & NonZeroBits) == 0 &&
2998 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
2999
3000 SDValue AndOp0 = Op.getOperand(0);
3001
3002 uint64_t ShlImm;
3003 SDValue ShlOp0;
3004 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3005 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3006 ShlOp0 = AndOp0.getOperand(0);
3007 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3009 ShlImm)) {
3010 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3011
3012 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3013 SDValue ShlVal = AndOp0.getOperand(0);
3014
3015 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3016 // expect VT to be MVT::i32.
3017 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3018
3019 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3020 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3021 } else
3022 return false;
3023
3024 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3025 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3026 // AndOp0+AND.
3027 if (!BiggerPattern && !AndOp0.hasOneUse())
3028 return false;
3029
3030 DstLSB = llvm::countr_zero(NonZeroBits);
3031 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3032
3033 // Bail out on large Width. This happens when no proper combining / constant
3034 // folding was performed.
3035 if (Width >= (int)VT.getSizeInBits()) {
3036 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3037 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3038 // "val".
3039 // If VT is i32, what Width >= 32 means:
3040 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3041 // demands at least 'Width' bits (after dag-combiner). This together with
3042 // `any_extend` Op (undefined higher bits) indicates missed combination
3043 // when lowering the 'and' IR instruction to an machine IR instruction.
3044 LLVM_DEBUG(
3045 dbgs()
3046 << "Found large Width in bit-field-positioning -- this indicates no "
3047 "proper combining / constant folding was performed\n");
3048 return false;
3049 }
3050
3051 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3052 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3053 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3054 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3055 // which case it is not profitable to insert an extra shift.
3056 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3057 return false;
3058
3059 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3060 return true;
3061}
3062
3063// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3064// UBFIZ.
3066 SDValue &Src, int &DstLSB,
3067 int &Width) {
3068 // Caller should have verified that N is a left shift with constant shift
3069 // amount; asserts that.
3070 assert(Op.getOpcode() == ISD::SHL &&
3071 "Op.getNode() should be a SHL node to call this function");
3072 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3073 "Op.getNode() should shift ShlImm to call this function");
3074
3075 uint64_t AndImm = 0;
3076 SDValue Op0 = Op.getOperand(0);
3077 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3078 return false;
3079
3080 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3081 if (isMask_64(ShiftedAndImm)) {
3082 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3083 // should end with Mask, and could be prefixed with random bits if those
3084 // bits are shifted out.
3085 //
3086 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3087 // the AND result corresponding to those bits are shifted out, so it's fine
3088 // to not extract them.
3089 Width = llvm::countr_one(ShiftedAndImm);
3090 DstLSB = ShlImm;
3091 Src = Op0.getOperand(0);
3092 return true;
3093 }
3094 return false;
3095}
3096
3098 bool BiggerPattern,
3099 const uint64_t NonZeroBits,
3100 SDValue &Src, int &DstLSB,
3101 int &Width) {
3102 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3103
3104 EVT VT = Op.getValueType();
3105 assert((VT == MVT::i32 || VT == MVT::i64) &&
3106 "Caller guarantees that type is i32 or i64");
3107 (void)VT;
3108
3109 uint64_t ShlImm;
3110 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3111 return false;
3112
3113 if (!BiggerPattern && !Op.hasOneUse())
3114 return false;
3115
3116 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3117 return true;
3118
3119 DstLSB = llvm::countr_zero(NonZeroBits);
3120 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3121
3122 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3123 return false;
3124
3125 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3126 return true;
3127}
3128
3129static bool isShiftedMask(uint64_t Mask, EVT VT) {
3130 assert(VT == MVT::i32 || VT == MVT::i64);
3131 if (VT == MVT::i32)
3132 return isShiftedMask_32(Mask);
3133 return isShiftedMask_64(Mask);
3134}
3135
3136// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3137// inserted only sets known zero bits.
3139 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3140
3141 EVT VT = N->getValueType(0);
3142 if (VT != MVT::i32 && VT != MVT::i64)
3143 return false;
3144
3145 unsigned BitWidth = VT.getSizeInBits();
3146
3147 uint64_t OrImm;
3148 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3149 return false;
3150
3151 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3152 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3153 // performance neutral.
3155 return false;
3156
3157 uint64_t MaskImm;
3158 SDValue And = N->getOperand(0);
3159 // Must be a single use AND with an immediate operand.
3160 if (!And.hasOneUse() ||
3161 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3162 return false;
3163
3164 // Compute the Known Zero for the AND as this allows us to catch more general
3165 // cases than just looking for AND with imm.
3166 KnownBits Known = CurDAG->computeKnownBits(And);
3167
3168 // Non-zero in the sense that they're not provably zero, which is the key
3169 // point if we want to use this value.
3170 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3171
3172 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3173 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3174 return false;
3175
3176 // The bits being inserted must only set those bits that are known to be zero.
3177 if ((OrImm & NotKnownZero) != 0) {
3178 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3179 // currently handle this case.
3180 return false;
3181 }
3182
3183 // BFI/BFXIL dst, src, #lsb, #width.
3184 int LSB = llvm::countr_one(NotKnownZero);
3185 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3186
3187 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3188 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3189 unsigned ImmS = Width - 1;
3190
3191 // If we're creating a BFI instruction avoid cases where we need more
3192 // instructions to materialize the BFI constant as compared to the original
3193 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3194 // should be no worse in this case.
3195 bool IsBFI = LSB != 0;
3196 uint64_t BFIImm = OrImm >> LSB;
3197 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3198 // We have a BFI instruction and we know the constant can't be materialized
3199 // with a ORR-immediate with the zero register.
3200 unsigned OrChunks = 0, BFIChunks = 0;
3201 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3202 if (((OrImm >> Shift) & 0xFFFF) != 0)
3203 ++OrChunks;
3204 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3205 ++BFIChunks;
3206 }
3207 if (BFIChunks > OrChunks)
3208 return false;
3209 }
3210
3211 // Materialize the constant to be inserted.
3212 SDLoc DL(N);
3213 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3214 SDNode *MOVI = CurDAG->getMachineNode(
3215 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3216
3217 // Create the BFI/BFXIL instruction.
3218 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3219 CurDAG->getTargetConstant(ImmR, DL, VT),
3220 CurDAG->getTargetConstant(ImmS, DL, VT)};
3221 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3222 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3223 return true;
3224}
3225
3227 SDValue &ShiftedOperand,
3228 uint64_t &EncodedShiftImm) {
3229 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3230 if (!Dst.hasOneUse())
3231 return false;
3232
3233 EVT VT = Dst.getValueType();
3234 assert((VT == MVT::i32 || VT == MVT::i64) &&
3235 "Caller should guarantee that VT is one of i32 or i64");
3236 const unsigned SizeInBits = VT.getSizeInBits();
3237
3238 SDLoc DL(Dst.getNode());
3239 uint64_t AndImm, ShlImm;
3240 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3241 isShiftedMask_64(AndImm)) {
3242 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3243 SDValue DstOp0 = Dst.getOperand(0);
3244 if (!DstOp0.hasOneUse())
3245 return false;
3246
3247 // An example to illustrate the transformation
3248 // From:
3249 // lsr x8, x1, #1
3250 // and x8, x8, #0x3f80
3251 // bfxil x8, x1, #0, #7
3252 // To:
3253 // and x8, x23, #0x7f
3254 // ubfx x9, x23, #8, #7
3255 // orr x23, x8, x9, lsl #7
3256 //
3257 // The number of instructions remains the same, but ORR is faster than BFXIL
3258 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3259 // the dependency chain is improved after the transformation.
3260 uint64_t SrlImm;
3261 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3262 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3263 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3264 unsigned MaskWidth =
3265 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3266 unsigned UBFMOpc =
3267 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3268 SDNode *UBFMNode = CurDAG->getMachineNode(
3269 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3270 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3271 VT),
3272 CurDAG->getTargetConstant(
3273 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3274 ShiftedOperand = SDValue(UBFMNode, 0);
3275 EncodedShiftImm = AArch64_AM::getShifterImm(
3276 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3277 return true;
3278 }
3279 }
3280 return false;
3281 }
3282
3283 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3284 ShiftedOperand = Dst.getOperand(0);
3285 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3286 return true;
3287 }
3288
3289 uint64_t SrlImm;
3290 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3291 ShiftedOperand = Dst.getOperand(0);
3292 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3293 return true;
3294 }
3295 return false;
3296}
3297
3298// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3299// the operands and select it to AArch64::ORR with shifted registers if
3300// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3301static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3302 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3303 const bool BiggerPattern) {
3304 EVT VT = N->getValueType(0);
3305 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3306 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3307 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3308 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3309 assert((VT == MVT::i32 || VT == MVT::i64) &&
3310 "Expect result type to be i32 or i64 since N is combinable to BFM");
3311 SDLoc DL(N);
3312
3313 // Bail out if BFM simplifies away one node in BFM Dst.
3314 if (OrOpd1 != Dst)
3315 return false;
3316
3317 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3318 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3319 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3320 if (BiggerPattern) {
3321 uint64_t SrcAndImm;
3322 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3323 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3324 // OrOpd0 = AND Src, #Mask
3325 // So BFM simplifies away one AND node from Src and doesn't simplify away
3326 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3327 // one node (from Rd), ORR is better since it has higher throughput and
3328 // smaller latency than BFM on many AArch64 processors (and for the rest
3329 // ORR is at least as good as BFM).
3330 SDValue ShiftedOperand;
3331 uint64_t EncodedShiftImm;
3332 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3333 EncodedShiftImm)) {
3334 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3335 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3336 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3337 return true;
3338 }
3339 }
3340 return false;
3341 }
3342
3343 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3344
3345 uint64_t ShlImm;
3346 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3347 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3348 SDValue Ops[] = {
3349 Dst, Src,
3350 CurDAG->getTargetConstant(
3352 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3353 return true;
3354 }
3355
3356 // Select the following pattern to left-shifted operand rather than BFI.
3357 // %val1 = op ..
3358 // %val2 = shl %val1, #imm
3359 // %res = or %val1, %val2
3360 //
3361 // If N is selected to be BFI, we know that
3362 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3363 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3364 //
3365 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3366 if (OrOpd0.getOperand(0) == OrOpd1) {
3367 SDValue Ops[] = {
3368 OrOpd1, OrOpd1,
3369 CurDAG->getTargetConstant(
3371 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3372 return true;
3373 }
3374 }
3375
3376 uint64_t SrlImm;
3377 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3378 // Select the following pattern to right-shifted operand rather than BFXIL.
3379 // %val1 = op ..
3380 // %val2 = lshr %val1, #imm
3381 // %res = or %val1, %val2
3382 //
3383 // If N is selected to be BFXIL, we know that
3384 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3385 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3386 //
3387 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3388 if (OrOpd0.getOperand(0) == OrOpd1) {
3389 SDValue Ops[] = {
3390 OrOpd1, OrOpd1,
3391 CurDAG->getTargetConstant(
3393 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3394 return true;
3395 }
3396 }
3397
3398 return false;
3399}
3400
3401static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3402 SelectionDAG *CurDAG) {
3403 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3404
3405 EVT VT = N->getValueType(0);
3406 if (VT != MVT::i32 && VT != MVT::i64)
3407 return false;
3408
3409 unsigned BitWidth = VT.getSizeInBits();
3410
3411 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3412 // have the expected shape. Try to undo that.
3413
3414 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3415 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3416
3417 // Given a OR operation, check if we have the following pattern
3418 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3419 // isBitfieldExtractOp)
3420 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3421 // countTrailingZeros(mask2) == imm2 - imm + 1
3422 // f = d | c
3423 // if yes, replace the OR instruction with:
3424 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3425
3426 // OR is commutative, check all combinations of operand order and values of
3427 // BiggerPattern, i.e.
3428 // Opd0, Opd1, BiggerPattern=false
3429 // Opd1, Opd0, BiggerPattern=false
3430 // Opd0, Opd1, BiggerPattern=true
3431 // Opd1, Opd0, BiggerPattern=true
3432 // Several of these combinations may match, so check with BiggerPattern=false
3433 // first since that will produce better results by matching more instructions
3434 // and/or inserting fewer extra instructions.
3435 for (int I = 0; I < 4; ++I) {
3436
3437 SDValue Dst, Src;
3438 unsigned ImmR, ImmS;
3439 bool BiggerPattern = I / 2;
3440 SDValue OrOpd0Val = N->getOperand(I % 2);
3441 SDNode *OrOpd0 = OrOpd0Val.getNode();
3442 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3443 SDNode *OrOpd1 = OrOpd1Val.getNode();
3444
3445 unsigned BFXOpc;
3446 int DstLSB, Width;
3447 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3448 NumberOfIgnoredLowBits, BiggerPattern)) {
3449 // Check that the returned opcode is compatible with the pattern,
3450 // i.e., same type and zero extended (U and not S)
3451 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3452 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3453 continue;
3454
3455 // Compute the width of the bitfield insertion
3456 DstLSB = 0;
3457 Width = ImmS - ImmR + 1;
3458 // FIXME: This constraint is to catch bitfield insertion we may
3459 // want to widen the pattern if we want to grab general bitfied
3460 // move case
3461 if (Width <= 0)
3462 continue;
3463
3464 // If the mask on the insertee is correct, we have a BFXIL operation. We
3465 // can share the ImmR and ImmS values from the already-computed UBFM.
3466 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3467 BiggerPattern,
3468 Src, DstLSB, Width)) {
3469 ImmR = (BitWidth - DstLSB) % BitWidth;
3470 ImmS = Width - 1;
3471 } else
3472 continue;
3473
3474 // Check the second part of the pattern
3475 EVT VT = OrOpd1Val.getValueType();
3476 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3477
3478 // Compute the Known Zero for the candidate of the first operand.
3479 // This allows to catch more general case than just looking for
3480 // AND with imm. Indeed, simplify-demanded-bits may have removed
3481 // the AND instruction because it proves it was useless.
3482 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3483
3484 // Check if there is enough room for the second operand to appear
3485 // in the first one
3486 APInt BitsToBeInserted =
3487 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3488
3489 if ((BitsToBeInserted & ~Known.Zero) != 0)
3490 continue;
3491
3492 // Set the first operand
3493 uint64_t Imm;
3494 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3495 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3496 // In that case, we can eliminate the AND
3497 Dst = OrOpd1->getOperand(0);
3498 else
3499 // Maybe the AND has been removed by simplify-demanded-bits
3500 // or is useful because it discards more bits
3501 Dst = OrOpd1Val;
3502
3503 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3504 // with shifted operand is more efficient.
3505 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3506 BiggerPattern))
3507 return true;
3508
3509 // both parts match
3510 SDLoc DL(N);
3511 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3512 CurDAG->getTargetConstant(ImmS, DL, VT)};
3513 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3514 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3515 return true;
3516 }
3517
3518 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3519 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3520 // mask (e.g., 0x000ffff0).
3521 uint64_t Mask0Imm, Mask1Imm;
3522 SDValue And0 = N->getOperand(0);
3523 SDValue And1 = N->getOperand(1);
3524 if (And0.hasOneUse() && And1.hasOneUse() &&
3525 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3526 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3527 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3528 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3529
3530 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3531 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3532 // bits to be inserted.
3533 if (isShiftedMask(Mask0Imm, VT)) {
3534 std::swap(And0, And1);
3535 std::swap(Mask0Imm, Mask1Imm);
3536 }
3537
3538 SDValue Src = And1->getOperand(0);
3539 SDValue Dst = And0->getOperand(0);
3540 unsigned LSB = llvm::countr_zero(Mask1Imm);
3541 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3542
3543 // The BFXIL inserts the low-order bits from a source register, so right
3544 // shift the needed bits into place.
3545 SDLoc DL(N);
3546 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3547 uint64_t LsrImm = LSB;
3548 if (Src->hasOneUse() &&
3549 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3550 (LsrImm + LSB) < BitWidth) {
3551 Src = Src->getOperand(0);
3552 LsrImm += LSB;
3553 }
3554
3555 SDNode *LSR = CurDAG->getMachineNode(
3556 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3557 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3558
3559 // BFXIL is an alias of BFM, so translate to BFM operands.
3560 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3561 unsigned ImmS = Width - 1;
3562
3563 // Create the BFXIL instruction.
3564 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3565 CurDAG->getTargetConstant(ImmR, DL, VT),
3566 CurDAG->getTargetConstant(ImmS, DL, VT)};
3567 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3568 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3569 return true;
3570 }
3571
3572 return false;
3573}
3574
3575bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3576 if (N->getOpcode() != ISD::OR)
3577 return false;
3578
3579 APInt NUsefulBits;
3580 getUsefulBits(SDValue(N, 0), NUsefulBits);
3581
3582 // If all bits are not useful, just return UNDEF.
3583 if (!NUsefulBits) {
3584 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3585 return true;
3586 }
3587
3588 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3589 return true;
3590
3591 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3592}
3593
3594/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3595/// equivalent of a left shift by a constant amount followed by an and masking
3596/// out a contiguous set of bits.
3597bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3598 if (N->getOpcode() != ISD::AND)
3599 return false;
3600
3601 EVT VT = N->getValueType(0);
3602 if (VT != MVT::i32 && VT != MVT::i64)
3603 return false;
3604
3605 SDValue Op0;
3606 int DstLSB, Width;
3607 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3608 Op0, DstLSB, Width))
3609 return false;
3610
3611 // ImmR is the rotate right amount.
3612 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3613 // ImmS is the most significant bit of the source to be moved.
3614 unsigned ImmS = Width - 1;
3615
3616 SDLoc DL(N);
3617 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3618 CurDAG->getTargetConstant(ImmS, DL, VT)};
3619 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3620 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3621 return true;
3622}
3623
3624/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3625/// variable shift/rotate instructions.
3626bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3627 EVT VT = N->getValueType(0);
3628
3629 unsigned Opc;
3630 switch (N->getOpcode()) {
3631 case ISD::ROTR:
3632 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3633 break;
3634 case ISD::SHL:
3635 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3636 break;
3637 case ISD::SRL:
3638 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3639 break;
3640 case ISD::SRA:
3641 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3642 break;
3643 default:
3644 return false;
3645 }
3646
3647 uint64_t Size;
3648 uint64_t Bits;
3649 if (VT == MVT::i32) {
3650 Bits = 5;
3651 Size = 32;
3652 } else if (VT == MVT::i64) {
3653 Bits = 6;
3654 Size = 64;
3655 } else
3656 return false;
3657
3658 SDValue ShiftAmt = N->getOperand(1);
3659 SDLoc DL(N);
3660 SDValue NewShiftAmt;
3661
3662 // Skip over an extend of the shift amount.
3663 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3664 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3665 ShiftAmt = ShiftAmt->getOperand(0);
3666
3667 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3668 SDValue Add0 = ShiftAmt->getOperand(0);
3669 SDValue Add1 = ShiftAmt->getOperand(1);
3670 uint64_t Add0Imm;
3671 uint64_t Add1Imm;
3672 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3673 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3674 // to avoid the ADD/SUB.
3675 NewShiftAmt = Add0;
3676 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3677 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3678 (Add0Imm % Size == 0)) {
3679 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3680 // to generate a NEG instead of a SUB from a constant.
3681 unsigned NegOpc;
3682 unsigned ZeroReg;
3683 EVT SubVT = ShiftAmt->getValueType(0);
3684 if (SubVT == MVT::i32) {
3685 NegOpc = AArch64::SUBWrr;
3686 ZeroReg = AArch64::WZR;
3687 } else {
3688 assert(SubVT == MVT::i64);
3689 NegOpc = AArch64::SUBXrr;
3690 ZeroReg = AArch64::XZR;
3691 }
3692 SDValue Zero =
3693 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3694 MachineSDNode *Neg =
3695 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3696 NewShiftAmt = SDValue(Neg, 0);
3697 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3698 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3699 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3700 // to generate a NOT instead of a SUB from a constant.
3701 unsigned NotOpc;
3702 unsigned ZeroReg;
3703 EVT SubVT = ShiftAmt->getValueType(0);
3704 if (SubVT == MVT::i32) {
3705 NotOpc = AArch64::ORNWrr;
3706 ZeroReg = AArch64::WZR;
3707 } else {
3708 assert(SubVT == MVT::i64);
3709 NotOpc = AArch64::ORNXrr;
3710 ZeroReg = AArch64::XZR;
3711 }
3712 SDValue Zero =
3713 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3715 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3716 NewShiftAmt = SDValue(Not, 0);
3717 } else
3718 return false;
3719 } else {
3720 // If the shift amount is masked with an AND, check that the mask covers the
3721 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3722 // the AND.
3723 uint64_t MaskImm;
3724 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3725 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3726 return false;
3727
3728 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3729 return false;
3730
3731 NewShiftAmt = ShiftAmt->getOperand(0);
3732 }
3733
3734 // Narrow/widen the shift amount to match the size of the shift operation.
3735 if (VT == MVT::i32)
3736 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3737 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3738 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3739 MachineSDNode *Ext = CurDAG->getMachineNode(
3740 AArch64::SUBREG_TO_REG, DL, VT,
3741 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3742 NewShiftAmt = SDValue(Ext, 0);
3743 }
3744
3745 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3746 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3747 return true;
3748}
3749
3751 SDValue &FixedPos,
3752 unsigned RegWidth,
3753 bool isReciprocal) {
3754 APFloat FVal(0.0);
3755 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3756 FVal = CN->getValueAPF();
3757 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3758 // Some otherwise illegal constants are allowed in this case.
3759 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3760 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3761 return false;
3762
3763 ConstantPoolSDNode *CN =
3764 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3765 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3766 } else
3767 return false;
3768
3769 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3770 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3771 // x-register.
3772 //
3773 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3774 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3775 // integers.
3776 bool IsExact;
3777
3778 if (isReciprocal)
3779 if (!FVal.getExactInverse(&FVal))
3780 return false;
3781
3782 // fbits is between 1 and 64 in the worst-case, which means the fmul
3783 // could have 2^64 as an actual operand. Need 65 bits of precision.
3784 APSInt IntVal(65, true);
3785 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3786
3787 // N.b. isPowerOf2 also checks for > 0.
3788 if (!IsExact || !IntVal.isPowerOf2())
3789 return false;
3790 unsigned FBits = IntVal.logBase2();
3791
3792 // Checks above should have guaranteed that we haven't lost information in
3793 // finding FBits, but it must still be in range.
3794 if (FBits == 0 || FBits > RegWidth) return false;
3795
3796 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3797 return true;
3798}
3799
3800bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3801 unsigned RegWidth) {
3802 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3803 false);
3804}
3805
3806bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
3807 SDValue &FixedPos,
3808 unsigned RegWidth) {
3809 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3810 true);
3811}
3812
3813// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3814// of the string and obtains the integer values from them and combines these
3815// into a single value to be used in the MRS/MSR instruction.
3818 RegString.split(Fields, ':');
3819
3820 if (Fields.size() == 1)
3821 return -1;
3822
3823 assert(Fields.size() == 5
3824 && "Invalid number of fields in read register string");
3825
3827 bool AllIntFields = true;
3828
3829 for (StringRef Field : Fields) {
3830 unsigned IntField;
3831 AllIntFields &= !Field.getAsInteger(10, IntField);
3832 Ops.push_back(IntField);
3833 }
3834
3835 assert(AllIntFields &&
3836 "Unexpected non-integer value in special register string.");
3837 (void)AllIntFields;
3838
3839 // Need to combine the integer fields of the string into a single value
3840 // based on the bit encoding of MRS/MSR instruction.
3841 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3842 (Ops[3] << 3) | (Ops[4]);
3843}
3844
3845// Lower the read_register intrinsic to an MRS instruction node if the special
3846// register string argument is either of the form detailed in the ALCE (the
3847// form described in getIntOperandsFromRegsterString) or is a named register
3848// known by the MRS SysReg mapper.
3849bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3850 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3851 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3852 SDLoc DL(N);
3853
3854 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
3855
3856 unsigned Opcode64Bit = AArch64::MRS;
3857 int Imm = getIntOperandFromRegisterString(RegString->getString());
3858 if (Imm == -1) {
3859 // No match, Use the sysreg mapper to map the remaining possible strings to
3860 // the value for the register to be used for the instruction operand.
3861 const auto *TheReg =
3862 AArch64SysReg::lookupSysRegByName(RegString->getString());
3863 if (TheReg && TheReg->Readable &&
3864 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3865 Imm = TheReg->Encoding;
3866 else
3867 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3868
3869 if (Imm == -1) {
3870 // Still no match, see if this is "pc" or give up.
3871 if (!ReadIs128Bit && RegString->getString() == "pc") {
3872 Opcode64Bit = AArch64::ADR;
3873 Imm = 0;
3874 } else {
3875 return false;
3876 }
3877 }
3878 }
3879
3880 SDValue InChain = N->getOperand(0);
3881 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
3882 if (!ReadIs128Bit) {
3883 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
3884 {SysRegImm, InChain});
3885 } else {
3886 SDNode *MRRS = CurDAG->getMachineNode(
3887 AArch64::MRRS, DL,
3888 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
3889 {SysRegImm, InChain});
3890
3891 // Sysregs are not endian. The even register always contains the low half
3892 // of the register.
3893 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
3894 SDValue(MRRS, 0));
3895 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
3896 SDValue(MRRS, 0));
3897 SDValue OutChain = SDValue(MRRS, 1);
3898
3899 ReplaceUses(SDValue(N, 0), Lo);
3900 ReplaceUses(SDValue(N, 1), Hi);
3901 ReplaceUses(SDValue(N, 2), OutChain);
3902 };
3903 return true;
3904}
3905
3906// Lower the write_register intrinsic to an MSR instruction node if the special
3907// register string argument is either of the form detailed in the ALCE (the
3908// form described in getIntOperandsFromRegsterString) or is a named register
3909// known by the MSR SysReg mapper.
3910bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
3911 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3912 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3913 SDLoc DL(N);
3914
3915 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
3916
3917 if (!WriteIs128Bit) {
3918 // Check if the register was one of those allowed as the pstatefield value
3919 // in the MSR (immediate) instruction. To accept the values allowed in the
3920 // pstatefield for the MSR (immediate) instruction, we also require that an
3921 // immediate value has been provided as an argument, we know that this is
3922 // the case as it has been ensured by semantic checking.
3923 auto trySelectPState = [&](auto PMapper, unsigned State) {
3924 if (PMapper) {
3925 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
3926 "Expected a constant integer expression.");
3927 unsigned Reg = PMapper->Encoding;
3928 uint64_t Immed = N->getConstantOperandVal(2);
3929 CurDAG->SelectNodeTo(
3930 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3931 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
3932 return true;
3933 }
3934 return false;
3935 };
3936
3937 if (trySelectPState(
3938 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
3939 AArch64::MSRpstateImm4))
3940 return true;
3941 if (trySelectPState(
3942 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
3943 AArch64::MSRpstateImm1))
3944 return true;
3945 }
3946
3947 int Imm = getIntOperandFromRegisterString(RegString->getString());
3948 if (Imm == -1) {
3949 // Use the sysreg mapper to attempt to map the remaining possible strings
3950 // to the value for the register to be used for the MSR (register)
3951 // instruction operand.
3952 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
3953 if (TheReg && TheReg->Writeable &&
3954 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3955 Imm = TheReg->Encoding;
3956 else
3957 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3958
3959 if (Imm == -1)
3960 return false;
3961 }
3962
3963 SDValue InChain = N->getOperand(0);
3964 if (!WriteIs128Bit) {
3965 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
3966 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3967 N->getOperand(2), InChain);
3968 } else {
3969 // No endian swap. The lower half always goes into the even subreg, and the
3970 // higher half always into the odd supreg.
3971 SDNode *Pair = CurDAG->getMachineNode(
3972 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
3973 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
3974 MVT::i32),
3975 N->getOperand(2),
3976 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
3977 N->getOperand(3),
3978 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
3979
3980 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
3981 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3982 SDValue(Pair, 0), InChain);
3983 }
3984
3985 return true;
3986}
3987
3988/// We've got special pseudo-instructions for these
3989bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3990 unsigned Opcode;
3991 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3992
3993 // Leave IR for LSE if subtarget supports it.
3994 if (Subtarget->hasLSE()) return false;
3995
3996 if (MemTy == MVT::i8)
3997 Opcode = AArch64::CMP_SWAP_8;
3998 else if (MemTy == MVT::i16)
3999 Opcode = AArch64::CMP_SWAP_16;
4000 else if (MemTy == MVT::i32)
4001 Opcode = AArch64::CMP_SWAP_32;
4002 else if (MemTy == MVT::i64)
4003 Opcode = AArch64::CMP_SWAP_64;
4004 else
4005 llvm_unreachable("Unknown AtomicCmpSwap type");
4006
4007 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4008 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4009 N->getOperand(0)};
4010 SDNode *CmpSwap = CurDAG->getMachineNode(
4011 Opcode, SDLoc(N),
4012 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4013
4014 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4015 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4016
4017 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4018 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4019 CurDAG->RemoveDeadNode(N);
4020
4021 return true;
4022}
4023
4024bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4025 SDValue &Shift) {
4026 if (!isa<ConstantSDNode>(N))
4027 return false;
4028
4029 SDLoc DL(N);
4030 uint64_t Val = cast<ConstantSDNode>(N)
4031 ->getAPIntValue()
4032 .trunc(VT.getFixedSizeInBits())
4033 .getZExtValue();
4034
4035 switch (VT.SimpleTy) {
4036 case MVT::i8:
4037 // All immediates are supported.
4038 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4039 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4040 return true;
4041 case MVT::i16:
4042 case MVT::i32:
4043 case MVT::i64:
4044 // Support 8bit unsigned immediates.
4045 if (Val <= 255) {
4046 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4047 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4048 return true;
4049 }
4050 // Support 16bit unsigned immediates that are a multiple of 256.
4051 if (Val <= 65280 && Val % 256 == 0) {
4052 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4053 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4054 return true;
4055 }
4056 break;
4057 default:
4058 break;
4059 }
4060
4061 return false;
4062}
4063
4064bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4065 SDValue &Imm, SDValue &Shift,
4066 bool Negate) {
4067 if (!isa<ConstantSDNode>(N))
4068 return false;
4069
4070 SDLoc DL(N);
4071 int64_t Val = cast<ConstantSDNode>(N)
4072 ->getAPIntValue()
4073 .trunc(VT.getFixedSizeInBits())
4074 .getSExtValue();
4075
4076 if (Negate)
4077 Val = -Val;
4078
4079 // Signed saturating instructions treat their immediate operand as unsigned,
4080 // whereas the related intrinsics define their operands to be signed. This
4081 // means we can only use the immediate form when the operand is non-negative.
4082 if (Val < 0)
4083 return false;
4084
4085 switch (VT.SimpleTy) {
4086 case MVT::i8:
4087 // All positive immediates are supported.
4088 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4089 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4090 return true;
4091 case MVT::i16:
4092 case MVT::i32:
4093 case MVT::i64:
4094 // Support 8bit positive immediates.
4095 if (Val <= 255) {
4096 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4097 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4098 return true;
4099 }
4100 // Support 16bit positive immediates that are a multiple of 256.
4101 if (Val <= 65280 && Val % 256 == 0) {
4102 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4103 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4104 return true;
4105 }
4106 break;
4107 default:
4108 break;
4109 }
4110
4111 return false;
4112}
4113
4114bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4115 SDValue &Shift) {
4116 if (!isa<ConstantSDNode>(N))
4117 return false;
4118
4119 SDLoc DL(N);
4120 int64_t Val = cast<ConstantSDNode>(N)
4121 ->getAPIntValue()
4122 .trunc(VT.getFixedSizeInBits())
4123 .getSExtValue();
4124
4125 switch (VT.SimpleTy) {
4126 case MVT::i8:
4127 // All immediates are supported.
4128 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4129 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4130 return true;
4131 case MVT::i16:
4132 case MVT::i32:
4133 case MVT::i64:
4134 // Support 8bit signed immediates.
4135 if (Val >= -128 && Val <= 127) {
4136 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4137 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4138 return true;
4139 }
4140 // Support 16bit signed immediates that are a multiple of 256.
4141 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4142 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4143 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4144 return true;
4145 }
4146 break;
4147 default:
4148 break;
4149 }
4150
4151 return false;
4152}
4153
4154bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4155 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4156 int64_t ImmVal = CNode->getSExtValue();
4157 SDLoc DL(N);
4158 if (ImmVal >= -128 && ImmVal < 128) {
4159 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4160 return true;
4161 }
4162 }
4163 return false;
4164}
4165
4166bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4167 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4168 uint64_t ImmVal = CNode->getZExtValue();
4169
4170 switch (VT.SimpleTy) {
4171 case MVT::i8:
4172 ImmVal &= 0xFF;
4173 break;
4174 case MVT::i16:
4175 ImmVal &= 0xFFFF;
4176 break;
4177 case MVT::i32:
4178 ImmVal &= 0xFFFFFFFF;
4179 break;
4180 case MVT::i64:
4181 break;
4182 default:
4183 llvm_unreachable("Unexpected type");
4184 }
4185
4186 if (ImmVal < 256) {
4187 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4188 return true;
4189 }
4190 }
4191 return false;
4192}
4193
4194bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4195 bool Invert) {
4196 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4197 uint64_t ImmVal = CNode->getZExtValue();
4198 SDLoc DL(N);
4199
4200 if (Invert)
4201 ImmVal = ~ImmVal;
4202
4203 // Shift mask depending on type size.
4204 switch (VT.SimpleTy) {
4205 case MVT::i8:
4206 ImmVal &= 0xFF;
4207 ImmVal |= ImmVal << 8;
4208 ImmVal |= ImmVal << 16;
4209 ImmVal |= ImmVal << 32;
4210 break;
4211 case MVT::i16:
4212 ImmVal &= 0xFFFF;
4213 ImmVal |= ImmVal << 16;
4214 ImmVal |= ImmVal << 32;
4215 break;
4216 case MVT::i32:
4217 ImmVal &= 0xFFFFFFFF;
4218 ImmVal |= ImmVal << 32;
4219 break;
4220 case MVT::i64:
4221 break;
4222 default:
4223 llvm_unreachable("Unexpected type");
4224 }
4225
4226 uint64_t encoding;
4227 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4228 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4229 return true;
4230 }
4231 }
4232 return false;
4233}
4234
4235// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4236// Rather than attempt to normalise everything we can sometimes saturate the
4237// shift amount during selection. This function also allows for consistent
4238// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4239// required by the instructions.
4240bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4241 uint64_t High, bool AllowSaturation,
4242 SDValue &Imm) {
4243 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4244 uint64_t ImmVal = CN->getZExtValue();
4245
4246 // Reject shift amounts that are too small.
4247 if (ImmVal < Low)
4248 return false;
4249
4250 // Reject or saturate shift amounts that are too big.
4251 if (ImmVal > High) {
4252 if (!AllowSaturation)
4253 return false;
4254 ImmVal = High;
4255 }
4256
4257 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4258 return true;
4259 }
4260
4261 return false;
4262}
4263
4264bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4265 // tagp(FrameIndex, IRGstack, tag_offset):
4266 // since the offset between FrameIndex and IRGstack is a compile-time
4267 // constant, this can be lowered to a single ADDG instruction.
4268 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4269 return false;
4270 }
4271
4272 SDValue IRG_SP = N->getOperand(2);
4273 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4274 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4275 return false;
4276 }
4277
4278 const TargetLowering *TLI = getTargetLowering();
4279 SDLoc DL(N);
4280 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4281 SDValue FiOp = CurDAG->getTargetFrameIndex(
4282 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4283 int TagOffset = N->getConstantOperandVal(3);
4284
4285 SDNode *Out = CurDAG->getMachineNode(
4286 AArch64::TAGPstack, DL, MVT::i64,
4287 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4288 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4289 ReplaceNode(N, Out);
4290 return true;
4291}
4292
4293void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4294 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4295 "llvm.aarch64.tagp third argument must be an immediate");
4296 if (trySelectStackSlotTagP(N))
4297 return;
4298 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4299 // compile-time constant, not just for stack allocations.
4300
4301 // General case for unrelated pointers in Op1 and Op2.
4302 SDLoc DL(N);
4303 int TagOffset = N->getConstantOperandVal(3);
4304 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4305 {N->getOperand(1), N->getOperand(2)});
4306 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4307 {SDValue(N1, 0), N->getOperand(2)});
4308 SDNode *N3 = CurDAG->getMachineNode(
4309 AArch64::ADDG, DL, MVT::i64,
4310 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4311 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4312 ReplaceNode(N, N3);
4313}
4314
4315bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4316 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4317
4318 // Bail when not a "cast" like insert_subvector.
4319 if (N->getConstantOperandVal(2) != 0)
4320 return false;
4321 if (!N->getOperand(0).isUndef())
4322 return false;
4323
4324 // Bail when normal isel should do the job.
4325 EVT VT = N->getValueType(0);
4326 EVT InVT = N->getOperand(1).getValueType();
4327 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4328 return false;
4329 if (InVT.getSizeInBits() <= 128)
4330 return false;
4331
4332 // NOTE: We can only get here when doing fixed length SVE code generation.
4333 // We do manual selection because the types involved are not linked to real
4334 // registers (despite being legal) and must be coerced into SVE registers.
4335
4337 "Expected to insert into a packed scalable vector!");
4338
4339 SDLoc DL(N);
4340 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4341 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4342 N->getOperand(1), RC));
4343 return true;
4344}
4345
4346bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4347 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4348
4349 // Bail when not a "cast" like extract_subvector.
4350 if (N->getConstantOperandVal(1) != 0)
4351 return false;
4352
4353 // Bail when normal isel can do the job.
4354 EVT VT = N->getValueType(0);
4355 EVT InVT = N->getOperand(0).getValueType();
4356 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4357 return false;
4358 if (VT.getSizeInBits() <= 128)
4359 return false;
4360
4361 // NOTE: We can only get here when doing fixed length SVE code generation.
4362 // We do manual selection because the types involved are not linked to real
4363 // registers (despite being legal) and must be coerced into SVE registers.
4364
4366 "Expected to extract from a packed scalable vector!");
4367
4368 SDLoc DL(N);
4369 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4370 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4371 N->getOperand(0), RC));
4372 return true;
4373}
4374
4375bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4376 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4377
4378 SDValue N0 = N->getOperand(0);
4379 SDValue N1 = N->getOperand(1);
4380 EVT VT = N->getValueType(0);
4381
4382 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4383 // Rotate by a constant is a funnel shift in IR which is exanded to
4384 // an OR with shifted operands.
4385 // We do the following transform:
4386 // OR N0, N1 -> xar (x, y, imm)
4387 // Where:
4388 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4389 // N0 = SHL_PRED true, V, splat(bits-imm)
4390 // V = (xor x, y)
4391 if (VT.isScalableVector() &&
4392 (Subtarget->hasSVE2() ||
4393 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4394 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4396 std::swap(N0, N1);
4397 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4399 return false;
4400
4401 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4402 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4403 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4404 return false;
4405
4406 SDValue XOR = N0.getOperand(1);
4407 if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1))
4408 return false;
4409
4410 APInt ShlAmt, ShrAmt;
4411 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4413 return false;
4414
4415 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4416 return false;
4417
4418 SDLoc DL(N);
4419 SDValue Imm =
4420 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4421
4422 SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm};
4423 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4424 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4425 AArch64::XAR_ZZZI_D})) {
4426 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4427 return true;
4428 }
4429 return false;
4430 }
4431
4432 if (!Subtarget->hasSHA3())
4433 return false;
4434
4435 if (N0->getOpcode() != AArch64ISD::VSHL ||
4437 return false;
4438
4439 if (N0->getOperand(0) != N1->getOperand(0) ||
4440 N1->getOperand(0)->getOpcode() != ISD::XOR)
4441 return false;
4442
4443 SDValue XOR = N0.getOperand(0);
4444 SDValue R1 = XOR.getOperand(0);
4445 SDValue R2 = XOR.getOperand(1);
4446
4447 unsigned HsAmt = N0.getConstantOperandVal(1);
4448 unsigned ShAmt = N1.getConstantOperandVal(1);
4449
4450 SDLoc DL = SDLoc(N0.getOperand(1));
4451 SDValue Imm = CurDAG->getTargetConstant(
4452 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4453
4454 if (ShAmt + HsAmt != 64)
4455 return false;
4456
4457 SDValue Ops[] = {R1, R2, Imm};
4458 CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
4459
4460 return true;
4461}
4462
4463void AArch64DAGToDAGISel::Select(SDNode *Node) {
4464 // If we have a custom node, we already have selected!
4465 if (Node->isMachineOpcode()) {
4466 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4467 Node->setNodeId(-1);
4468 return;
4469 }
4470
4471 // Few custom selection stuff.
4472 EVT VT = Node->getValueType(0);
4473
4474 switch (Node->getOpcode()) {
4475 default:
4476 break;
4477
4479 if (SelectCMP_SWAP(Node))
4480 return;
4481 break;
4482
4483 case ISD::READ_REGISTER:
4484 case AArch64ISD::MRRS:
4485 if (tryReadRegister(Node))
4486 return;
4487 break;
4488
4490 case AArch64ISD::MSRR:
4491 if (tryWriteRegister(Node))
4492 return;
4493 break;
4494
4495 case ISD::LOAD: {
4496 // Try to select as an indexed load. Fall through to normal processing
4497 // if we can't.
4498 if (tryIndexedLoad(Node))
4499 return;
4500 break;
4501 }
4502
4503 case ISD::SRL:
4504 case ISD::AND:
4505 case ISD::SRA:
4507 if (tryBitfieldExtractOp(Node))
4508 return;
4509 if (tryBitfieldInsertInZeroOp(Node))
4510 return;
4511 [[fallthrough]];
4512 case ISD::ROTR:
4513 case ISD::SHL:
4514 if (tryShiftAmountMod(Node))
4515 return;
4516 break;
4517
4518 case ISD::SIGN_EXTEND:
4519 if (tryBitfieldExtractOpFromSExt(Node))
4520 return;
4521 break;
4522
4523 case ISD::OR:
4524 if (tryBitfieldInsertOp(Node))
4525 return;
4526 if (trySelectXAR(Node))
4527 return;
4528 break;
4529
4531 if (trySelectCastScalableToFixedLengthVector(Node))
4532 return;
4533 break;
4534 }
4535
4536 case ISD::INSERT_SUBVECTOR: {
4537 if (trySelectCastFixedLengthToScalableVector(Node))
4538 return;
4539 break;
4540 }
4541
4542 case ISD::Constant: {
4543 // Materialize zero constants as copies from WZR/XZR. This allows
4544 // the coalescer to propagate these into other instructions.
4545 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4546 if (ConstNode->isZero()) {
4547 if (VT == MVT::i32) {
4548 SDValue New = CurDAG->getCopyFromReg(
4549 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4550 ReplaceNode(Node, New.getNode());
4551 return;
4552 } else if (VT == MVT::i64) {
4553 SDValue New = CurDAG->getCopyFromReg(
4554 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4555 ReplaceNode(Node, New.getNode());
4556 return;
4557 }
4558 }
4559 break;
4560 }
4561
4562 case ISD::FrameIndex: {
4563 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4564 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4565 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4566 const TargetLowering *TLI = getTargetLowering();
4567 SDValue TFI = CurDAG->getTargetFrameIndex(
4568 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4569 SDLoc DL(Node);
4570 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4571 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4572 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4573 return;
4574 }
4576 unsigned IntNo = Node->getConstantOperandVal(1);
4577 switch (IntNo) {
4578 default:
4579 break;
4580 case Intrinsic::aarch64_ldaxp:
4581 case Intrinsic::aarch64_ldxp: {
4582 unsigned Op =
4583 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4584 SDValue MemAddr = Node->getOperand(2);
4585 SDLoc DL(Node);
4586 SDValue Chain = Node->getOperand(0);
4587
4588 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4589 MVT::Other, MemAddr, Chain);
4590
4591 // Transfer memoperands.
4593 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4594 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4595 ReplaceNode(Node, Ld);
4596 return;
4597 }
4598 case Intrinsic::aarch64_stlxp:
4599 case Intrinsic::aarch64_stxp: {
4600 unsigned Op =
4601 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4602 SDLoc DL(Node);
4603 SDValue Chain = Node->getOperand(0);
4604 SDValue ValLo = Node->getOperand(2);
4605 SDValue ValHi = Node->getOperand(3);
4606 SDValue MemAddr = Node->getOperand(4);
4607
4608 // Place arguments in the right order.
4609 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4610
4611 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4612 // Transfer memoperands.
4614 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4615 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4616
4617 ReplaceNode(Node, St);
4618 return;
4619 }
4620 case Intrinsic::aarch64_neon_ld1x2:
4621 if (VT == MVT::v8i8) {
4622 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4623 return;
4624 } else if (VT == MVT::v16i8) {
4625 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4626 return;
4627 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4628 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4629 return;
4630 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4631 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4632 return;
4633 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4634 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4635 return;
4636 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4637 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4638 return;
4639 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4640 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4641 return;
4642 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4643 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4644 return;
4645 }
4646 break;
4647 case Intrinsic::aarch64_neon_ld1x3:
4648 if (VT == MVT::v8i8) {
4649 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4650 return;
4651 } else if (VT == MVT::v16i8) {
4652 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4653 return;
4654 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4655 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4656 return;
4657 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4658 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4659 return;
4660 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4661 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4662 return;
4663 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4664 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4665 return;
4666 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4667 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4668 return;
4669 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4670 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4671 return;
4672 }
4673 break;
4674 case Intrinsic::aarch64_neon_ld1x4:
4675 if (VT == MVT::v8i8) {
4676 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4677 return;
4678 } else if (VT == MVT::v16i8) {
4679 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4680 return;
4681 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4682 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4683 return;
4684 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4685 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4686 return;
4687 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4688 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4689 return;
4690 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4691 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4692 return;
4693 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4694 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4695 return;
4696 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4697 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4698 return;
4699 }
4700 break;
4701 case Intrinsic::aarch64_neon_ld2:
4702 if (VT == MVT::v8i8) {
4703 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4704 return;
4705 } else if (VT == MVT::v16i8) {
4706 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4707 return;
4708 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4709 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4710 return;
4711 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4712 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4713 return;
4714 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4715 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4716 return;
4717 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4718 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4719 return;
4720 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4721 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4722 return;
4723 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4724 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4725 return;
4726 }
4727 break;
4728 case Intrinsic::aarch64_neon_ld3:
4729 if (VT == MVT::v8i8) {
4730 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4731 return;
4732 } else if (VT == MVT::v16i8) {
4733 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4734 return;
4735 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4736 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4737 return;
4738 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4739 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4740 return;
4741 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4742 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4743 return;
4744 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4745 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4746 return;
4747 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4748 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4749 return;
4750 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4751 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4752 return;
4753 }
4754 break;
4755 case Intrinsic::aarch64_neon_ld4:
4756 if (VT == MVT::v8i8) {
4757 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4758 return;
4759 } else if (VT == MVT::v16i8) {
4760 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4761 return;
4762 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4763 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4764 return;
4765 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4766 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4767 return;
4768 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4769 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4770 return;
4771 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4772 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4773 return;
4774 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4775 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4776 return;
4777 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4778 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4779 return;
4780 }
4781 break;
4782 case Intrinsic::aarch64_neon_ld2r:
4783 if (VT == MVT::v8i8) {
4784 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4785 return;
4786 } else if (VT == MVT::v16i8) {
4787 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4788 return;
4789 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4790 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4791 return;
4792 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4793 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4794 return;
4795 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4796 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4797 return;
4798 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4799 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4800 return;
4801 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4802 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4803 return;
4804 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4805 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4806 return;
4807 }
4808 break;
4809 case Intrinsic::aarch64_neon_ld3r:
4810 if (VT == MVT::v8i8) {
4811 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4812 return;
4813 } else if (VT == MVT::v16i8) {
4814 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4815 return;
4816 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4817 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4818 return;
4819 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4820 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4821 return;
4822 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4823 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4824 return;
4825 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4826 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4827 return;
4828 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4829 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
4830 return;
4831 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4832 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
4833 return;
4834 }
4835 break;
4836 case Intrinsic::aarch64_neon_ld4r:
4837 if (VT == MVT::v8i8) {
4838 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
4839 return;
4840 } else if (VT == MVT::v16i8) {
4841 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
4842 return;
4843 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4844 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
4845 return;
4846 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4847 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
4848 return;
4849 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4850 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
4851 return;
4852 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4853 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
4854 return;
4855 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4856 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
4857 return;
4858 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4859 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
4860 return;
4861 }
4862 break;
4863 case Intrinsic::aarch64_neon_ld2lane:
4864 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4865 SelectLoadLane(Node, 2, AArch64::LD2i8);
4866 return;
4867 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4868 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4869 SelectLoadLane(Node, 2, AArch64::LD2i16);
4870 return;
4871 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4872 VT == MVT::v2f32) {
4873 SelectLoadLane(Node, 2, AArch64::LD2i32);
4874 return;
4875 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4876 VT == MVT::v1f64) {
4877 SelectLoadLane(Node, 2, AArch64::LD2i64);
4878 return;
4879 }
4880 break;
4881 case Intrinsic::aarch64_neon_ld3lane:
4882 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4883 SelectLoadLane(Node, 3, AArch64::LD3i8);
4884 return;
4885 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4886 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4887 SelectLoadLane(Node, 3, AArch64::LD3i16);
4888 return;
4889 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4890 VT == MVT::v2f32) {
4891 SelectLoadLane(Node, 3, AArch64::LD3i32);
4892 return;
4893 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4894 VT == MVT::v1f64) {
4895 SelectLoadLane(Node, 3, AArch64::LD3i64);
4896 return;
4897 }
4898 break;
4899 case Intrinsic::aarch64_neon_ld4lane:
4900 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4901 SelectLoadLane(Node, 4, AArch64::LD4i8);
4902 return;
4903 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4904 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4905 SelectLoadLane(Node, 4, AArch64::LD4i16);
4906 return;
4907 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4908 VT == MVT::v2f32) {
4909 SelectLoadLane(Node, 4, AArch64::LD4i32);
4910 return;
4911 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4912 VT == MVT::v1f64) {
4913 SelectLoadLane(Node, 4, AArch64::LD4i64);
4914 return;
4915 }
4916 break;
4917 case Intrinsic::aarch64_ld64b:
4918 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
4919 return;
4920 case Intrinsic::aarch64_sve_ld2q_sret: {
4921 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
4922 return;
4923 }
4924 case Intrinsic::aarch64_sve_ld3q_sret: {
4925 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
4926 return;
4927 }
4928 case Intrinsic::aarch64_sve_ld4q_sret: {
4929 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
4930 return;
4931 }
4932 case Intrinsic::aarch64_sve_ld2_sret: {
4933 if (VT == MVT::nxv16i8) {
4934 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
4935 true);
4936 return;
4937 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4938 VT == MVT::nxv8bf16) {
4939 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
4940 true);
4941 return;
4942 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4943 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
4944 true);
4945 return;
4946 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4947 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
4948 true);
4949 return;
4950 }
4951 break;
4952 }
4953 case Intrinsic::aarch64_sve_ld1_pn_x2: {
4954 if (VT == MVT::nxv16i8) {
4955 if (Subtarget->hasSME2())
4956 SelectContiguousMultiVectorLoad(
4957 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
4958 else if (Subtarget->hasSVE2p1())
4959 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
4960 AArch64::LD1B_2Z);
4961 else
4962 break;
4963 return;
4964 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4965 VT == MVT::nxv8bf16) {
4966 if (Subtarget->hasSME2())
4967 SelectContiguousMultiVectorLoad(
4968 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
4969 else if (Subtarget->hasSVE2p1())
4970 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
4971 AArch64::LD1H_2Z);
4972 else
4973 break;
4974 return;
4975 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4976 if (Subtarget->hasSME2())
4977 SelectContiguousMultiVectorLoad(
4978 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
4979 else if (Subtarget->hasSVE2p1())
4980 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
4981 AArch64::LD1W_2Z);
4982 else
4983 break;
4984 return;
4985 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4986 if (Subtarget->hasSME2())
4987 SelectContiguousMultiVectorLoad(
4988 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
4989 else if (Subtarget->hasSVE2p1())
4990 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
4991 AArch64::LD1D_2Z);
4992 else
4993 break;
4994 return;
4995 }
4996 break;
4997 }
4998 case Intrinsic::aarch64_sve_ld1_pn_x4: {
4999 if (VT == MVT::nxv16i8) {
5000 if (Subtarget->hasSME2())
5001 SelectContiguousMultiVectorLoad(
5002 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5003 else if (Subtarget->hasSVE2p1())
5004 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5005 AArch64::LD1B_4Z);
5006 else
5007 break;
5008 return;
5009 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5010 VT == MVT::nxv8bf16) {
5011 if (Subtarget->hasSME2())
5012 SelectContiguousMultiVectorLoad(
5013 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5014 else if (Subtarget->hasSVE2p1())
5015 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5016 AArch64::LD1H_4Z);
5017 else
5018 break;
5019 return;
5020 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5021 if (Subtarget->hasSME2())
5022 SelectContiguousMultiVectorLoad(
5023 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5024 else if (Subtarget->hasSVE2p1())
5025 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5026 AArch64::LD1W_4Z);
5027 else
5028 break;
5029 return;
5030 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5031 if (Subtarget->hasSME2())
5032 SelectContiguousMultiVectorLoad(
5033 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5034 else if (Subtarget->hasSVE2p1())
5035 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5036 AArch64::LD1D_4Z);
5037 else
5038 break;
5039 return;
5040 }
5041 break;
5042 }
5043 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5044 if (VT == MVT::nxv16i8) {
5045 if (Subtarget->hasSME2())
5046 SelectContiguousMultiVectorLoad(Node, 2, 0,
5047 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5048 AArch64::LDNT1B_2Z_PSEUDO);
5049 else if (Subtarget->hasSVE2p1())
5050 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5051 AArch64::LDNT1B_2Z);
5052 else
5053 break;
5054 return;
5055 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5056 VT == MVT::nxv8bf16) {
5057 if (Subtarget->hasSME2())
5058 SelectContiguousMultiVectorLoad(Node, 2, 1,
5059 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5060 AArch64::LDNT1H_2Z_PSEUDO);
5061 else if (Subtarget->hasSVE2p1())
5062 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5063 AArch64::LDNT1H_2Z);
5064 else
5065 break;
5066 return;
5067 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5068 if (Subtarget->hasSME2())
5069 SelectContiguousMultiVectorLoad(Node, 2, 2,
5070 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5071 AArch64::LDNT1W_2Z_PSEUDO);
5072 else if (Subtarget->hasSVE2p1())
5073 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5074 AArch64::LDNT1W_2Z);
5075 else
5076 break;
5077 return;
5078 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5079 if (Subtarget->hasSME2())
5080 SelectContiguousMultiVectorLoad(Node, 2, 3,
5081 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5082 AArch64::LDNT1D_2Z_PSEUDO);
5083 else if (Subtarget->hasSVE2p1())
5084 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5085 AArch64::LDNT1D_2Z);
5086 else
5087 break;
5088 return;
5089 }
5090 break;
5091 }
5092 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5093 if (VT == MVT::nxv16i8) {
5094 if (Subtarget->hasSME2())
5095 SelectContiguousMultiVectorLoad(Node, 4, 0,
5096 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5097 AArch64::LDNT1B_4Z_PSEUDO);
5098 else if (Subtarget->hasSVE2p1())
5099 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5100 AArch64::LDNT1B_4Z);
5101 else
5102 break;
5103 return;
5104 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5105 VT == MVT::nxv8bf16) {
5106 if (Subtarget->hasSME2())
5107 SelectContiguousMultiVectorLoad(Node, 4, 1,
5108 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5109 AArch64::LDNT1H_4Z_PSEUDO);
5110 else if (Subtarget->hasSVE2p1())
5111 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5112 AArch64::LDNT1H_4Z);
5113 else
5114 break;
5115 return;
5116 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5117 if (Subtarget->hasSME2())
5118 SelectContiguousMultiVectorLoad(Node, 4, 2,
5119 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5120 AArch64::LDNT1W_4Z_PSEUDO);
5121 else if (Subtarget->hasSVE2p1())
5122 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5123 AArch64::LDNT1W_4Z);
5124 else
5125 break;
5126 return;
5127 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5128 if (Subtarget->hasSME2())
5129 SelectContiguousMultiVectorLoad(Node, 4, 3,
5130 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5131 AArch64::LDNT1D_4Z_PSEUDO);
5132 else if (Subtarget->hasSVE2p1())
5133 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5134 AArch64::LDNT1D_4Z);
5135 else
5136 break;
5137 return;
5138 }
5139 break;
5140 }
5141 case Intrinsic::aarch64_sve_ld3_sret: {
5142 if (VT == MVT::nxv16i8) {
5143 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5144 true);
5145 return;
5146 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5147 VT == MVT::nxv8bf16) {
5148 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5149 true);
5150 return;
5151 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5152 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5153 true);
5154 return;
5155 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5156 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5157 true);
5158 return;
5159 }
5160 break;
5161 }
5162 case Intrinsic::aarch64_sve_ld4_sret: {
5163 if (VT == MVT::nxv16i8) {
5164 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5165 true);
5166 return;
5167 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5168 VT == MVT::nxv8bf16) {
5169 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5170 true);
5171 return;
5172 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5173 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5174 true);
5175 return;
5176 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5177 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5178 true);
5179 return;
5180 }
5181 break;
5182 }
5183 case Intrinsic::aarch64_sme_read_hor_vg2: {
5184 if (VT == MVT::nxv16i8) {
5185 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5186 AArch64::MOVA_2ZMXI_H_B);
5187 return;
5188 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5189 VT == MVT::nxv8bf16) {
5190 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5191 AArch64::MOVA_2ZMXI_H_H);
5192 return;
5193 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5194 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5195 AArch64::MOVA_2ZMXI_H_S);
5196 return;
5197 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5198 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5199 AArch64::MOVA_2ZMXI_H_D);
5200 return;
5201 }
5202 break;
5203 }
5204 case Intrinsic::aarch64_sme_read_ver_vg2: {
5205 if (VT == MVT::nxv16i8) {
5206 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5207 AArch64::MOVA_2ZMXI_V_B);
5208 return;
5209 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5210 VT == MVT::nxv8bf16) {
5211 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5212 AArch64::MOVA_2ZMXI_V_H);
5213 return;
5214 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5215 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5216 AArch64::MOVA_2ZMXI_V_S);
5217 return;
5218 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5219 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5220 AArch64::MOVA_2ZMXI_V_D);
5221 return;
5222 }
5223 break;
5224 }
5225 case Intrinsic::aarch64_sme_read_hor_vg4: {
5226 if (VT == MVT::nxv16i8) {
5227 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5228 AArch64::MOVA_4ZMXI_H_B);
5229 return;
5230 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5231 VT == MVT::nxv8bf16) {
5232 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5233 AArch64::MOVA_4ZMXI_H_H);
5234 return;
5235 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5236 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5237 AArch64::MOVA_4ZMXI_H_S);
5238 return;
5239 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5240 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5241 AArch64::MOVA_4ZMXI_H_D);
5242 return;
5243 }
5244 break;
5245 }
5246 case Intrinsic::aarch64_sme_read_ver_vg4: {
5247 if (VT == MVT::nxv16i8) {
5248 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5249 AArch64::MOVA_4ZMXI_V_B);
5250 return;
5251 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5252 VT == MVT::nxv8bf16) {
5253 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5254 AArch64::MOVA_4ZMXI_V_H);
5255 return;
5256 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5257 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5258 AArch64::MOVA_4ZMXI_V_S);
5259 return;
5260 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5261 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5262 AArch64::MOVA_4ZMXI_V_D);
5263 return;
5264 }
5265 break;
5266 }
5267 case Intrinsic::aarch64_sme_read_vg1x2: {
5268 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5269 AArch64::MOVA_VG2_2ZMXI);
5270 return;
5271 }
5272 case Intrinsic::aarch64_sme_read_vg1x4: {
5273 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5274 AArch64::MOVA_VG4_4ZMXI);
5275 return;
5276 }
5277 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5278 if (VT == MVT::nxv16i8) {
5279 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5280 return;
5281 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5282 VT == MVT::nxv8bf16) {
5283 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5284 return;
5285 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5286 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5287 return;
5288 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5289 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5290 return;
5291 }
5292 break;
5293 }
5294 case Intrinsic::aarch64_sme_readz_vert_x2: {
5295 if (VT == MVT::nxv16i8) {
5296 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5297 return;
5298 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5299 VT == MVT::nxv8bf16) {
5300 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5301 return;
5302 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5303 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5304 return;
5305 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5306 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5307 return;
5308 }
5309 break;
5310 }
5311 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5312 if (VT == MVT::nxv16i8) {
5313 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5314 return;
5315 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5316 VT == MVT::nxv8bf16) {
5317 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5318 return;
5319 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5320 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5321 return;
5322 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5323 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5324 return;
5325 }
5326 break;
5327 }
5328 case Intrinsic::aarch64_sme_readz_vert_x4: {
5329 if (VT == MVT::nxv16i8) {
5330 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5331 return;
5332 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5333 VT == MVT::nxv8bf16) {
5334 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5335 return;
5336 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5337 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5338 return;
5339 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5340 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5341 return;
5342 }
5343 break;
5344 }
5345 case Intrinsic::swift_async_context_addr: {
5346 SDLoc DL(Node);
5347 SDValue Chain = Node->getOperand(0);
5348 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5349 SDValue Res = SDValue(
5350 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5351 CurDAG->getTargetConstant(8, DL, MVT::i32),
5352 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5353 0);
5354 ReplaceUses(SDValue(Node, 0), Res);
5355 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5356 CurDAG->RemoveDeadNode(Node);
5357
5358 auto &MF = CurDAG->getMachineFunction();
5359 MF.getFrameInfo().setFrameAddressIsTaken(true);
5360 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5361 return;
5362 }
5363 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5364 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5365 Node->getValueType(0),
5366 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5367 AArch64::LUTI2_4ZTZI_S}))
5368 // Second Immediate must be <= 3:
5369 SelectMultiVectorLuti(Node, 4, Opc, 3);
5370 return;
5371 }
5372 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5373 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5374 Node->getValueType(0),
5375 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5376 // Second Immediate must be <= 1:
5377 SelectMultiVectorLuti(Node, 4, Opc, 1);
5378 return;
5379 }
5380 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5381 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5382 Node->getValueType(0),
5383 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5384 AArch64::LUTI2_2ZTZI_S}))
5385 // Second Immediate must be <= 7:
5386 SelectMultiVectorLuti(Node, 2, Opc, 7);
5387 return;
5388 }
5389 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5390 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5391 Node->getValueType(0),
5392 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5393 AArch64::LUTI4_2ZTZI_S}))
5394 // Second Immediate must be <= 3:
5395 SelectMultiVectorLuti(Node, 2, Opc, 3);
5396 return;
5397 }
5398 }
5399 } break;
5401 unsigned IntNo = Node->getConstantOperandVal(0);
5402 switch (IntNo) {
5403 default:
5404 break;
5405 case Intrinsic::aarch64_tagp:
5406 SelectTagP(Node);
5407 return;
5408 case Intrinsic::aarch64_neon_tbl2:
5409 SelectTable(Node, 2,
5410 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5411 false);
5412 return;
5413 case Intrinsic::aarch64_neon_tbl3:
5414 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5415 : AArch64::TBLv16i8Three,
5416 false);
5417 return;
5418 case Intrinsic::aarch64_neon_tbl4:
5419 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5420 : AArch64::TBLv16i8Four,
5421 false);
5422 return;
5423 case Intrinsic::aarch64_neon_tbx2:
5424 SelectTable(Node, 2,
5425 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5426 true);
5427 return;
5428 case Intrinsic::aarch64_neon_tbx3:
5429 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5430 : AArch64::TBXv16i8Three,
5431 true);
5432 return;
5433 case Intrinsic::aarch64_neon_tbx4:
5434 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5435 : AArch64::TBXv16i8Four,
5436 true);
5437 return;
5438 case Intrinsic::aarch64_sve_srshl_single_x2:
5439 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5440 Node->getValueType(0),
5441 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5442 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5443 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5444 return;
5445 case Intrinsic::aarch64_sve_srshl_single_x4:
5446 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5447 Node->getValueType(0),
5448 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5449 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5450 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5451 return;
5452 case Intrinsic::aarch64_sve_urshl_single_x2:
5453 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5454 Node->getValueType(0),
5455 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5456 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5457 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5458 return;
5459 case Intrinsic::aarch64_sve_urshl_single_x4:
5460 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5461 Node->getValueType(0),
5462 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5463 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5464 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5465 return;
5466 case Intrinsic::aarch64_sve_srshl_x2:
5467 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5468 Node->getValueType(0),
5469 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5470 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5471 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5472 return;
5473 case Intrinsic::aarch64_sve_srshl_x4:
5474 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5475 Node->getValueType(0),
5476 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5477 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5478 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5479 return;
5480 case Intrinsic::aarch64_sve_urshl_x2:
5481 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5482 Node->getValueType(0),
5483 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5484 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5485 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5486 return;
5487 case Intrinsic::aarch64_sve_urshl_x4:
5488 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5489 Node->getValueType(0),
5490 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5491 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5492 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5493 return;
5494 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5495 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5496 Node->getValueType(0),
5497 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5498 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5499 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5500 return;
5501 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5502 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5503 Node->getValueType(0),
5504 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5505 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5506 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5507 return;
5508 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5509 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5510 Node->getValueType(0),
5511 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5512 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5513 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5514 return;
5515 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5516 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5517 Node->getValueType(0),
5518 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5519 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5520 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5521 return;
5522 case Intrinsic::aarch64_sve_whilege_x2:
5523 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5524 Node->getValueType(0),
5525 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5526 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5527 SelectWhilePair(Node, Op);
5528 return;
5529 case Intrinsic::aarch64_sve_whilegt_x2:
5530 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5531 Node->getValueType(0),
5532 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5533 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5534 SelectWhilePair(Node, Op);
5535 return;
5536 case Intrinsic::aarch64_sve_whilehi_x2:
5537 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5538 Node->getValueType(0),
5539 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5540 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5541 SelectWhilePair(Node, Op);
5542 return;
5543 case Intrinsic::aarch64_sve_whilehs_x2:
5544 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5545 Node->getValueType(0),
5546 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5547 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5548 SelectWhilePair(Node, Op);
5549 return;
5550 case Intrinsic::aarch64_sve_whilele_x2:
5551 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5552 Node->getValueType(0),
5553 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5554 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5555 SelectWhilePair(Node, Op);
5556 return;
5557 case Intrinsic::aarch64_sve_whilelo_x2:
5558 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5559 Node->getValueType(0),
5560 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5561 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5562 SelectWhilePair(Node, Op);
5563 return;
5564 case Intrinsic::aarch64_sve_whilels_x2:
5565 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5566 Node->getValueType(0),
5567 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5568 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5569 SelectWhilePair(Node, Op);
5570 return;
5571 case Intrinsic::aarch64_sve_whilelt_x2:
5572 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5573 Node->getValueType(0),
5574 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5575 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5576 SelectWhilePair(Node, Op);
5577 return;
5578 case Intrinsic::aarch64_sve_smax_single_x2:
5579 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5580 Node->getValueType(0),
5581 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
5582 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
5583 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5584 return;
5585 case Intrinsic::aarch64_sve_umax_single_x2:
5586 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5587 Node->getValueType(0),
5588 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
5589 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
5590 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5591 return;
5592 case Intrinsic::aarch64_sve_fmax_single_x2:
5593 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5594 Node->getValueType(0),
5595 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
5596 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
5597 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5598 return;
5599 case Intrinsic::aarch64_sve_smax_single_x4:
5600 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5601 Node->getValueType(0),
5602 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
5603 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
5604 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5605 return;
5606 case Intrinsic::aarch64_sve_umax_single_x4:
5607 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5608 Node->getValueType(0),
5609 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
5610 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
5611 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5612 return;
5613 case Intrinsic::aarch64_sve_fmax_single_x4:
5614 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5615 Node->getValueType(0),
5616 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
5617 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
5618 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5619 return;
5620 case Intrinsic::aarch64_sve_smin_single_x2:
5621 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5622 Node->getValueType(0),
5623 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
5624 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
5625 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5626 return;
5627 case Intrinsic::aarch64_sve_umin_single_x2:
5628 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5629 Node->getValueType(0),
5630 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
5631 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
5632 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5633 return;
5634 case Intrinsic::aarch64_sve_fmin_single_x2:
5635 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5636 Node->getValueType(0),
5637 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
5638 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
5639 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5640 return;
5641 case Intrinsic::aarch64_sve_smin_single_x4:
5642 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5643 Node->getValueType(0),
5644 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
5645 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
5646 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5647 return;
5648 case Intrinsic::aarch64_sve_umin_single_x4:
5649 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5650 Node->getValueType(0),
5651 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
5652 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
5653 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5654 return;
5655 case Intrinsic::aarch64_sve_fmin_single_x4:
5656 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5657 Node->getValueType(0),
5658 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
5659 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
5660 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5661 return;
5662 case Intrinsic::aarch64_sve_smax_x2:
5663 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5664 Node->getValueType(0),
5665 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
5666 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
5667 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5668 return;
5669 case Intrinsic::aarch64_sve_umax_x2:
5670 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5671 Node->getValueType(0),
5672 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
5673 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
5674 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5675 return;
5676 case Intrinsic::aarch64_sve_fmax_x2:
5677 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5678 Node->getValueType(0),
5679 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
5680 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
5681 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5682 return;
5683 case Intrinsic::aarch64_sve_smax_x4:
5684 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5685 Node->getValueType(0),
5686 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
5687 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
5688 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5689 return;
5690 case Intrinsic::aarch64_sve_umax_x4:
5691 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5692 Node->getValueType(0),
5693 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
5694 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
5695 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5696 return;
5697 case Intrinsic::aarch64_sve_fmax_x4:
5698 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5699 Node->getValueType(0),
5700 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
5701 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
5702 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5703 return;
5704 case Intrinsic::aarch64_sve_smin_x2:
5705 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5706 Node->getValueType(0),
5707 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
5708 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
5709 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5710 return;
5711 case Intrinsic::aarch64_sve_umin_x2:
5712 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5713 Node->getValueType(0),
5714 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
5715 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
5716 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5717 return;
5718 case Intrinsic::aarch64_sve_fmin_x2:
5719 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5720 Node->getValueType(0),
5721 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
5722 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
5723 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5724 return;
5725 case Intrinsic::aarch64_sve_smin_x4:
5726 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5727 Node->getValueType(0),
5728 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
5729 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
5730 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5731 return;
5732 case Intrinsic::aarch64_sve_umin_x4:
5733 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5734 Node->getValueType(0),
5735 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
5736 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
5737 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5738 return;
5739 case Intrinsic::aarch64_sve_fmin_x4:
5740 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5741 Node->getValueType(0),
5742 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
5743 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
5744 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5745 return;
5746 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
5747 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5748 Node->getValueType(0),
5749 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
5750 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
5751 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5752 return;
5753 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
5754 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5755 Node->getValueType(0),
5756 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
5757 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
5758 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5759 return;
5760 case Intrinsic::aarch64_sve_fminnm_single_x2:
5761 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5762 Node->getValueType(0),
5763 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
5764 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
5765 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5766 return;
5767 case Intrinsic::aarch64_sve_fminnm_single_x4:
5768 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5769 Node->getValueType(0),
5770 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
5771 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
5772 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5773 return;
5774 case Intrinsic::aarch64_sve_fmaxnm_x2:
5775 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5776 Node->getValueType(0),
5777 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
5778 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
5779 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5780 return;
5781 case Intrinsic::aarch64_sve_fmaxnm_x4:
5782 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5783 Node->getValueType(0),
5784 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
5785 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
5786 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5787 return;
5788 case Intrinsic::aarch64_sve_fminnm_x2:
5789 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5790 Node->getValueType(0),
5791 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
5792 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
5793 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5794 return;
5795 case Intrinsic::aarch64_sve_fminnm_x4:
5796 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5797 Node->getValueType(0),
5798 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
5799 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
5800 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5801 return;
5802 case Intrinsic::aarch64_sve_fcvtzs_x2:
5803 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
5804 return;
5805 case Intrinsic::aarch64_sve_scvtf_x2:
5806 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
5807 return;
5808 case Intrinsic::aarch64_sve_fcvtzu_x2:
5809 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
5810 return;
5811 case Intrinsic::aarch64_sve_ucvtf_x2:
5812 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
5813 return;
5814 case Intrinsic::aarch64_sve_fcvtzs_x4:
5815 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
5816 return;
5817 case Intrinsic::aarch64_sve_scvtf_x4:
5818 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
5819 return;
5820 case Intrinsic::aarch64_sve_fcvtzu_x4:
5821 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
5822 return;
5823 case Intrinsic::aarch64_sve_ucvtf_x4:
5824 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
5825 return;
5826 case Intrinsic::aarch64_sve_fcvt_widen_x2:
5827 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
5828 return;
5829 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
5830 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
5831 return;
5832 case Intrinsic::aarch64_sve_sclamp_single_x2:
5833 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5834 Node->getValueType(0),
5835 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
5836 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
5837 SelectClamp(Node, 2, Op);
5838 return;
5839 case Intrinsic::aarch64_sve_uclamp_single_x2:
5840 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5841 Node->getValueType(0),
5842 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
5843 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
5844 SelectClamp(Node, 2, Op);
5845 return;
5846 case Intrinsic::aarch64_sve_fclamp_single_x2:
5847 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5848 Node->getValueType(0),
5849 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
5850 AArch64::FCLAMP_VG2_2Z2Z_D}))
5851 SelectClamp(Node, 2, Op);
5852 return;
5853 case Intrinsic::aarch64_sve_bfclamp_single_x2:
5854 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
5855 return;
5856 case Intrinsic::aarch64_sve_sclamp_single_x4:
5857 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5858 Node->getValueType(0),
5859 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
5860 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
5861 SelectClamp(Node, 4, Op);
5862 return;
5863 case Intrinsic::aarch64_sve_uclamp_single_x4:
5864 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5865 Node->getValueType(0),
5866 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
5867 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
5868 SelectClamp(Node, 4, Op);
5869 return;
5870 case Intrinsic::aarch64_sve_fclamp_single_x4:
5871 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5872 Node->getValueType(0),
5873 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
5874 AArch64::FCLAMP_VG4_4Z4Z_D}))
5875 SelectClamp(Node, 4, Op);
5876 return;
5877 case Intrinsic::aarch64_sve_bfclamp_single_x4:
5878 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
5879 return;
5880 case Intrinsic::aarch64_sve_add_single_x2:
5881 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5882 Node->getValueType(0),
5883 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
5884 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
5885 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5886 return;
5887 case Intrinsic::aarch64_sve_add_single_x4:
5888 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5889 Node->getValueType(0),
5890 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
5891 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
5892 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5893 return;
5894 case Intrinsic::aarch64_sve_zip_x2:
5895 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5896 Node->getValueType(0),
5897 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
5898 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
5899 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
5900 return;
5901 case Intrinsic::aarch64_sve_zipq_x2:
5902 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
5903 AArch64::ZIP_VG2_2ZZZ_Q);
5904 return;
5905 case Intrinsic::aarch64_sve_zip_x4:
5906 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5907 Node->getValueType(0),
5908 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
5909 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
5910 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
5911 return;
5912 case Intrinsic::aarch64_sve_zipq_x4:
5913 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
5914 AArch64::ZIP_VG4_4Z4Z_Q);
5915 return;
5916 case Intrinsic::aarch64_sve_uzp_x2:
5917 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5918 Node->getValueType(0),
5919 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
5920 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
5921 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
5922 return;
5923 case Intrinsic::aarch64_sve_uzpq_x2:
5924 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
5925 AArch64::UZP_VG2_2ZZZ_Q);
5926 return;
5927 case Intrinsic::aarch64_sve_uzp_x4:
5928 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5929 Node->getValueType(0),
5930 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
5931 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
5932 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
5933 return;
5934 case Intrinsic::aarch64_sve_uzpq_x4:
5935 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
5936 AArch64::UZP_VG4_4Z4Z_Q);
5937 return;
5938 case Intrinsic::aarch64_sve_sel_x2:
5939 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5940 Node->getValueType(0),
5941 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
5942 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
5943 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
5944 return;
5945 case Intrinsic::aarch64_sve_sel_x4:
5946 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5947 Node->getValueType(0),
5948 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
5949 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
5950 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
5951 return;
5952 case Intrinsic::aarch64_sve_frinta_x2:
5953 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
5954 return;
5955 case Intrinsic::aarch64_sve_frinta_x4:
5956 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
5957 return;
5958 case Intrinsic::aarch64_sve_frintm_x2:
5959 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
5960 return;
5961 case Intrinsic::aarch64_sve_frintm_x4:
5962 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
5963 return;
5964 case Intrinsic::aarch64_sve_frintn_x2:
5965 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
5966 return;
5967 case Intrinsic::aarch64_sve_frintn_x4:
5968 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
5969 return;
5970 case Intrinsic::aarch64_sve_frintp_x2:
5971 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
5972 return;
5973 case Intrinsic::aarch64_sve_frintp_x4:
5974 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
5975 return;
5976 case Intrinsic::aarch64_sve_sunpk_x2:
5977 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5978 Node->getValueType(0),
5979 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
5980 AArch64::SUNPK_VG2_2ZZ_D}))
5981 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
5982 return;
5983 case Intrinsic::aarch64_sve_uunpk_x2:
5984 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5985 Node->getValueType(0),
5986 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
5987 AArch64::UUNPK_VG2_2ZZ_D}))
5988 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
5989 return;
5990 case Intrinsic::aarch64_sve_sunpk_x4:
5991 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5992 Node->getValueType(0),
5993 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
5994 AArch64::SUNPK_VG4_4Z2Z_D}))
5995 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
5996 return;
5997 case Intrinsic::aarch64_sve_uunpk_x4:
5998 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5999 Node->getValueType(0),
6000 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6001 AArch64::UUNPK_VG4_4Z2Z_D}))
6002 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6003 return;
6004 case Intrinsic::aarch64_sve_pext_x2: {
6005 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6006 Node->getValueType(0),
6007 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6008 AArch64::PEXT_2PCI_D}))
6009 SelectPExtPair(Node, Op);
6010 return;
6011 }
6012 }
6013 break;
6014 }
6015 case ISD::INTRINSIC_VOID: {
6016 unsigned IntNo = Node->getConstantOperandVal(1);
6017 if (Node->getNumOperands() >= 3)
6018 VT = Node->getOperand(2)->getValueType(0);
6019 switch (IntNo) {
6020 default:
6021 break;
6022 case Intrinsic::aarch64_neon_st1x2: {
6023 if (VT == MVT::v8i8) {
6024 SelectStore(Node, 2, AArch64::ST1Twov8b);
6025 return;
6026 } else if (VT == MVT::v16i8) {
6027 SelectStore(Node, 2, AArch64::ST1Twov16b);
6028 return;
6029 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6030 VT == MVT::v4bf16) {
6031 SelectStore(Node, 2, AArch64::ST1Twov4h);
6032 return;
6033 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6034 VT == MVT::v8bf16) {
6035 SelectStore(Node, 2, AArch64::ST1Twov8h);
6036 return;
6037 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6038 SelectStore(Node, 2, AArch64::ST1Twov2s);
6039 return;
6040 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6041 SelectStore(Node, 2, AArch64::ST1Twov4s);
6042 return;
6043 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6044 SelectStore(Node, 2, AArch64::ST1Twov2d);
6045 return;
6046 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6047 SelectStore(Node, 2, AArch64::ST1Twov1d);
6048 return;
6049 }
6050 break;
6051 }
6052 case Intrinsic::aarch64_neon_st1x3: {
6053 if (VT == MVT::v8i8) {
6054 SelectStore(Node, 3, AArch64::ST1Threev8b);
6055 return;
6056 } else if (VT == MVT::v16i8) {
6057 SelectStore(Node, 3, AArch64::ST1Threev16b);
6058 return;
6059 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6060 VT == MVT::v4bf16) {
6061 SelectStore(Node, 3, AArch64::ST1Threev4h);
6062 return;
6063 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6064 VT == MVT::v8bf16) {
6065 SelectStore(Node, 3, AArch64::ST1Threev8h);
6066 return;
6067 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6068 SelectStore(Node, 3, AArch64::ST1Threev2s);
6069 return;
6070 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6071 SelectStore(Node, 3, AArch64::ST1Threev4s);
6072 return;
6073 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6074 SelectStore(Node, 3, AArch64::ST1Threev2d);
6075 return;
6076 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6077 SelectStore(Node, 3, AArch64::ST1Threev1d);
6078 return;
6079 }
6080 break;
6081 }
6082 case Intrinsic::aarch64_neon_st1x4: {
6083 if (VT == MVT::v8i8) {
6084 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6085 return;
6086 } else if (VT == MVT::v16i8) {
6087 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6088 return;
6089 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6090 VT == MVT::v4bf16) {
6091 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6092 return;
6093 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6094 VT == MVT::v8bf16) {
6095 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6096 return;
6097 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6098 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6099 return;
6100 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6101 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6102 return;
6103 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6104 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6105 return;
6106 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6107 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6108 return;
6109 }
6110 break;
6111 }
6112 case Intrinsic::aarch64_neon_st2: {
6113 if (VT == MVT::v8i8) {
6114 SelectStore(Node, 2, AArch64::ST2Twov8b);
6115 return;
6116 } else if (VT == MVT::v16i8) {
6117 SelectStore(Node, 2, AArch64::ST2Twov16b);
6118 return;
6119 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6120 VT == MVT::v4bf16) {
6121 SelectStore(Node, 2, AArch64::ST2Twov4h);
6122 return;
6123 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6124 VT == MVT::v8bf16) {
6125 SelectStore(Node, 2, AArch64::ST2Twov8h);
6126 return;
6127 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6128 SelectStore(Node, 2, AArch64::ST2Twov2s);
6129 return;
6130 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6131 SelectStore(Node, 2, AArch64::ST2Twov4s);
6132 return;
6133 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6134 SelectStore(Node, 2, AArch64::ST2Twov2d);
6135 return;
6136 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6137 SelectStore(Node, 2, AArch64::ST1Twov1d);
6138 return;
6139 }
6140 break;
6141 }
6142 case Intrinsic::aarch64_neon_st3: {
6143 if (VT == MVT::v8i8) {
6144 SelectStore(Node, 3, AArch64::ST3Threev8b);
6145 return;
6146 } else if (VT == MVT::v16i8) {
6147 SelectStore(Node, 3, AArch64::ST3Threev16b);
6148 return;
6149 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6150 VT == MVT::v4bf16) {
6151 SelectStore(Node, 3, AArch64::ST3Threev4h);
6152 return;
6153 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6154 VT == MVT::v8bf16) {
6155 SelectStore(Node, 3, AArch64::ST3Threev8h);
6156 return;
6157 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6158 SelectStore(Node, 3, AArch64::ST3Threev2s);
6159 return;
6160 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6161 SelectStore(Node, 3, AArch64::ST3Threev4s);
6162 return;
6163 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6164 SelectStore(Node, 3, AArch64::ST3Threev2d);
6165 return;
6166 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6167 SelectStore(Node, 3, AArch64::ST1Threev1d);
6168 return;
6169 }
6170 break;
6171 }
6172 case Intrinsic::aarch64_neon_st4: {
6173 if (VT == MVT::v8i8) {
6174 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6175 return;
6176 } else if (VT == MVT::v16i8) {
6177 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6178 return;
6179 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6180 VT == MVT::v4bf16) {
6181 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6182 return;
6183 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6184 VT == MVT::v8bf16) {
6185 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6186 return;
6187 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6188 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6189 return;
6190 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6191 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6192 return;
6193 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6194 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6195 return;
6196 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6197 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6198 return;
6199 }
6200 break;
6201 }
6202 case Intrinsic::aarch64_neon_st2lane: {
6203 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6204 SelectStoreLane(Node, 2, AArch64::ST2i8);
6205 return;
6206 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6207 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6208 SelectStoreLane(Node, 2, AArch64::ST2i16);
6209 return;
6210 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6211 VT == MVT::v2f32) {
6212 SelectStoreLane(Node, 2, AArch64::ST2i32);
6213 return;
6214 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6215 VT == MVT::v1f64) {
6216 SelectStoreLane(Node, 2, AArch64::ST2i64);
6217 return;
6218 }
6219 break;
6220 }
6221 case Intrinsic::aarch64_neon_st3lane: {
6222 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6223 SelectStoreLane(Node, 3, AArch64::ST3i8);
6224 return;
6225 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6226 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6227 SelectStoreLane(Node, 3, AArch64::ST3i16);
6228 return;
6229 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6230 VT == MVT::v2f32) {
6231 SelectStoreLane(Node, 3, AArch64::ST3i32);
6232 return;
6233 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6234 VT == MVT::v1f64) {
6235 SelectStoreLane(Node, 3, AArch64::ST3i64);
6236 return;
6237 }
6238 break;
6239 }
6240 case Intrinsic::aarch64_neon_st4lane: {
6241 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6242 SelectStoreLane(Node, 4, AArch64::ST4i8);
6243 return;
6244 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6245 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6246 SelectStoreLane(Node, 4, AArch64::ST4i16);
6247 return;
6248 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6249 VT == MVT::v2f32) {
6250 SelectStoreLane(Node, 4, AArch64::ST4i32);
6251 return;
6252 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6253 VT == MVT::v1f64) {
6254 SelectStoreLane(Node, 4, AArch64::ST4i64);
6255 return;
6256 }
6257 break;
6258 }
6259 case Intrinsic::aarch64_sve_st2q: {
6260 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6261 return;
6262 }
6263 case Intrinsic::aarch64_sve_st3q: {
6264 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6265 return;
6266 }
6267 case Intrinsic::aarch64_sve_st4q: {
6268 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6269 return;
6270 }
6271 case Intrinsic::aarch64_sve_st2: {
6272 if (VT == MVT::nxv16i8) {
6273 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6274 return;
6275 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6276 VT == MVT::nxv8bf16) {
6277 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6278 return;
6279 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6280 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6281 return;
6282 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6283 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6284 return;
6285 }
6286 break;
6287 }
6288 case Intrinsic::aarch64_sve_st3: {
6289 if (VT == MVT::nxv16i8) {
6290 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6291 return;
6292 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6293 VT == MVT::nxv8bf16) {
6294 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6295 return;
6296 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6297 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6298 return;
6299 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6300 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6301 return;
6302 }
6303 break;
6304 }
6305 case Intrinsic::aarch64_sve_st4: {
6306 if (VT == MVT::nxv16i8) {
6307 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6308 return;
6309 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6310 VT == MVT::nxv8bf16) {
6311 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6312 return;
6313 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6314 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6315 return;
6316 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6317 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6318 return;
6319 }
6320 break;
6321 }
6322 }
6323 break;
6324 }
6325 case AArch64ISD::LD2post: {
6326 if (VT == MVT::v8i8) {
6327 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6328 return;
6329 } else if (VT == MVT::v16i8) {
6330 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6331 return;
6332 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6333 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6334 return;
6335 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6336 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6337 return;
6338 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6339 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6340 return;
6341 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6342 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6343 return;
6344 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6345 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6346 return;
6347 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6348 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6349 return;
6350 }
6351 break;
6352 }
6353 case AArch64ISD::LD3post: {
6354 if (VT == MVT::v8i8) {
6355 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6356 return;
6357 } else if (VT == MVT::v16i8) {
6358 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6359 return;
6360 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6361 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6362 return;
6363 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6364 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6365 return;
6366 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6367 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6368 return;
6369 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6370 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6371 return;
6372 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6373 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6374 return;
6375 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6376 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6377 return;
6378 }
6379 break;
6380 }
6381 case AArch64ISD::LD4post: {
6382 if (VT == MVT::v8i8) {
6383 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6384 return;
6385 } else if (VT == MVT::v16i8) {
6386 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6387 return;
6388 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6389 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6390 return;
6391 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6392 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6393 return;
6394 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6395 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6396 return;
6397 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6398 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6399 return;
6400 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6401 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6402 return;
6403 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6404 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6405 return;
6406 }
6407 break;
6408 }
6409 case AArch64ISD::LD1x2post: {
6410 if (VT == MVT::v8i8) {
6411 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6412 return;
6413 } else if (VT == MVT::v16i8) {
6414 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6415 return;
6416 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6417 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6418 return;
6419 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6420 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6421 return;
6422 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6423 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6424 return;
6425 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6426 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6427 return;
6428 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6429 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6430 return;
6431 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6432 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6433 return;
6434 }
6435 break;
6436 }
6437 case AArch64ISD::LD1x3post: {
6438 if (VT == MVT::v8i8) {
6439 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6440 return;
6441 } else if (VT == MVT::v16i8) {
6442 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6443 return;
6444 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6445 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6446 return;
6447 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6448 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6449 return;
6450 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6451 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6452 return;
6453 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6454 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6455 return;
6456 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6457 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6458 return;
6459 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6460 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6461 return;
6462 }
6463 break;
6464 }
6465 case AArch64ISD::LD1x4post: {
6466 if (VT == MVT::v8i8) {
6467 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6468 return;
6469 } else if (VT == MVT::v16i8) {
6470 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6471 return;
6472 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6473 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6474 return;
6475 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6476 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6477 return;
6478 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6479 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6480 return;
6481 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6482 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6483 return;
6484 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6485 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6486 return;
6487 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6488 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6489 return;
6490 }
6491 break;
6492 }
6494 if (VT == MVT::v8i8) {
6495 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6496 return;
6497 } else if (VT == MVT::v16i8) {
6498 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6499 return;
6500 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6501 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6502 return;
6503 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6504 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6505 return;
6506 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6507 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6508 return;
6509 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6510 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6511 return;
6512 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6513 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6514 return;
6515 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6516 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6517 return;
6518 }
6519 break;
6520 }
6522 if (VT == MVT::v8i8) {
6523 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6524 return;
6525 } else if (VT == MVT::v16i8) {
6526 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6527 return;
6528 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6529 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6530 return;
6531 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6532 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6533 return;
6534 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6535 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6536 return;
6537 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6538 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6539 return;
6540 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6541 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6542 return;
6543 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6544 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6545 return;
6546 }
6547 break;
6548 }
6550 if (VT == MVT::v8i8) {
6551 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
6552 return;
6553 } else if (VT == MVT::v16i8) {
6554 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
6555 return;
6556 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6557 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
6558 return;
6559 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6560 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
6561 return;
6562 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6563 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
6564 return;
6565 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6566 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
6567 return;
6568 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6569 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
6570 return;
6571 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6572 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
6573 return;
6574 }
6575 break;
6576 }
6578 if (VT == MVT::v8i8) {
6579 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
6580 return;
6581 } else if (VT == MVT::v16i8) {
6582 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
6583 return;
6584 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6585 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
6586 return;
6587 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6588 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
6589 return;
6590 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6591 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
6592 return;
6593 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6594 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
6595 return;
6596 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6597 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
6598 return;
6599 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6600 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
6601 return;
6602 }
6603 break;
6604 }
6606 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6607 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
6608 return;
6609 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6610 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6611 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
6612 return;
6613 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6614 VT == MVT::v2f32) {
6615 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
6616 return;
6617 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6618 VT == MVT::v1f64) {
6619 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
6620 return;
6621 }
6622 break;
6623 }
6625 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6626 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
6627 return;
6628 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6629 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6630 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
6631 return;
6632 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6633 VT == MVT::v2f32) {
6634 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
6635 return;
6636 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6637 VT == MVT::v1f64) {
6638 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
6639 return;
6640 }
6641 break;
6642 }
6644 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6645 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
6646 return;
6647 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6648 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6649 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
6650 return;
6651 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6652 VT == MVT::v2f32) {
6653 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
6654 return;
6655 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6656 VT == MVT::v1f64) {
6657 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
6658 return;
6659 }
6660 break;
6661 }
6663 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6664 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
6665 return;
6666 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6667 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6668 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
6669 return;
6670 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6671 VT == MVT::v2f32) {
6672 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
6673 return;
6674 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6675 VT == MVT::v1f64) {
6676 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
6677 return;
6678 }
6679 break;
6680 }
6681 case AArch64ISD::ST2post: {
6682 VT = Node->getOperand(1).getValueType();
6683 if (VT == MVT::v8i8) {
6684 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
6685 return;
6686 } else if (VT == MVT::v16i8) {
6687 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
6688 return;
6689 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6690 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
6691 return;
6692 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6693 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
6694 return;
6695 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6696 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
6697 return;
6698 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6699 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
6700 return;
6701 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6702 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
6703 return;
6704 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6705 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6706 return;
6707 }
6708 break;
6709 }
6710 case AArch64ISD::ST3post: {
6711 VT = Node->getOperand(1).getValueType();
6712 if (VT == MVT::v8i8) {
6713 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
6714 return;
6715 } else if (VT == MVT::v16i8) {
6716 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
6717 return;
6718 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6719 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
6720 return;
6721 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6722 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
6723 return;
6724 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6725 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
6726 return;
6727 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6728 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
6729 return;
6730 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6731 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
6732 return;
6733 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6734 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6735 return;
6736 }
6737 break;
6738 }
6739 case AArch64ISD::ST4post: {
6740 VT = Node->getOperand(1).getValueType();
6741 if (VT == MVT::v8i8) {
6742 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
6743 return;
6744 } else if (VT == MVT::v16i8) {
6745 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
6746 return;
6747 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6748 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
6749 return;
6750 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6751 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
6752 return;
6753 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6754 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
6755 return;
6756 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6757 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
6758 return;
6759 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6760 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
6761 return;
6762 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6763 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6764 return;
6765 }
6766 break;
6767 }
6768 case AArch64ISD::ST1x2post: {
6769 VT = Node->getOperand(1).getValueType();
6770 if (VT == MVT::v8i8) {
6771 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
6772 return;
6773 } else if (VT == MVT::v16i8) {
6774 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
6775 return;
6776 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6777 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
6778 return;
6779 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6780 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
6781 return;
6782 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6783 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
6784 return;
6785 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6786 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
6787 return;
6788 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6789 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6790 return;
6791 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6792 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
6793 return;
6794 }
6795 break;
6796 }
6797 case AArch64ISD::ST1x3post: {
6798 VT = Node->getOperand(1).getValueType();
6799 if (VT == MVT::v8i8) {
6800 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
6801 return;
6802 } else if (VT == MVT::v16i8) {
6803 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
6804 return;
6805 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6806 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
6807 return;
6808 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
6809 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
6810 return;
6811 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6812 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
6813 return;
6814 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6815 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
6816 return;
6817 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6818 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6819 return;
6820 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6821 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
6822 return;
6823 }
6824 break;
6825 }
6826 case AArch64ISD::ST1x4post: {
6827 VT = Node->getOperand(1).getValueType();
6828 if (VT == MVT::v8i8) {
6829 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
6830 return;
6831 } else if (VT == MVT::v16i8) {
6832 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
6833 return;
6834 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6835 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
6836 return;
6837 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6838 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
6839 return;
6840 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6841 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
6842 return;
6843 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6844 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
6845 return;
6846 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6847 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6848 return;
6849 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6850 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
6851 return;
6852 }
6853 break;
6854 }
6856 VT = Node->getOperand(1).getValueType();
6857 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6858 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
6859 return;
6860 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6861 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6862 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
6863 return;
6864 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6865 VT == MVT::v2f32) {
6866 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
6867 return;
6868 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6869 VT == MVT::v1f64) {
6870 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
6871 return;
6872 }
6873 break;
6874 }
6876 VT = Node->getOperand(1).getValueType();
6877 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6878 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
6879 return;
6880 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6881 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6882 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
6883 return;
6884 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6885 VT == MVT::v2f32) {
6886 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
6887 return;
6888 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6889 VT == MVT::v1f64) {
6890 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
6891 return;
6892 }
6893 break;
6894 }
6896 VT = Node->getOperand(1).getValueType();
6897 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6898 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
6899 return;
6900 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6901 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6902 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
6903 return;
6904 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6905 VT == MVT::v2f32) {
6906 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
6907 return;
6908 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6909 VT == MVT::v1f64) {
6910 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
6911 return;
6912 }
6913 break;
6914 }
6916 if (VT == MVT::nxv16i8) {
6917 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
6918 return;
6919 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6920 VT == MVT::nxv8bf16) {
6921 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
6922 return;
6923 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6924 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
6925 return;
6926 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6927 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
6928 return;
6929 }
6930 break;
6931 }
6933 if (VT == MVT::nxv16i8) {
6934 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
6935 return;
6936 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6937 VT == MVT::nxv8bf16) {
6938 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
6939 return;
6940 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6941 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
6942 return;
6943 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6944 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
6945 return;
6946 }
6947 break;
6948 }
6950 if (VT == MVT::nxv16i8) {
6951 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
6952 return;
6953 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6954 VT == MVT::nxv8bf16) {
6955 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
6956 return;
6957 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6958 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
6959 return;
6960 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6961 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
6962 return;
6963 }
6964 break;
6965 }
6966 }
6967
6968 // Select the default instruction
6969 SelectCode(Node);
6970}
6971
6972/// createAArch64ISelDag - This pass converts a legalized DAG into a
6973/// AArch64-specific DAG, ready for instruction scheduling.
6975 CodeGenOptLevel OptLevel) {
6976 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
6977}
6978
6979/// When \p PredVT is a scalable vector predicate in the form
6980/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
6981/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
6982/// structured vectors (NumVec >1), the output data type is
6983/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
6984/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
6985/// EVT.
6987 unsigned NumVec) {
6988 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
6989 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
6990 return EVT();
6991
6992 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
6993 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
6994 return EVT();
6995
6996 ElementCount EC = PredVT.getVectorElementCount();
6997 EVT ScalarVT =
6998 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
6999 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7000
7001 return MemVT;
7002}
7003
7004/// Return the EVT of the data associated to a memory operation in \p
7005/// Root. If such EVT cannot be retrived, it returns an invalid EVT.
7007 if (isa<MemSDNode>(Root))
7008 return cast<MemSDNode>(Root)->getMemoryVT();
7009
7010 if (isa<MemIntrinsicSDNode>(Root))
7011 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
7012
7013 const unsigned Opcode = Root->getOpcode();
7014 // For custom ISD nodes, we have to look at them individually to extract the
7015 // type of the data moved to/from memory.
7016 switch (Opcode) {
7021 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7023 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7026 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
7029 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
7032 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
7033 default:
7034 break;
7035 }
7036
7037 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7038 return EVT();
7039
7040 switch (Root->getConstantOperandVal(1)) {
7041 default:
7042 return EVT();
7043 case Intrinsic::aarch64_sme_ldr:
7044 case Intrinsic::aarch64_sme_str:
7045 return MVT::nxv16i8;
7046 case Intrinsic::aarch64_sve_prf:
7047 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7048 // width of the predicate.
7050 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7051 case Intrinsic::aarch64_sve_ld2_sret:
7052 case Intrinsic::aarch64_sve_ld2q_sret:
7054 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7055 case Intrinsic::aarch64_sve_st2q:
7057 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7058 case Intrinsic::aarch64_sve_ld3_sret:
7059 case Intrinsic::aarch64_sve_ld3q_sret:
7061 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7062 case Intrinsic::aarch64_sve_st3q:
7064 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7065 case Intrinsic::aarch64_sve_ld4_sret:
7066 case Intrinsic::aarch64_sve_ld4q_sret:
7068 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7069 case Intrinsic::aarch64_sve_st4q:
7071 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7072 case Intrinsic::aarch64_sve_ld1udq:
7073 case Intrinsic::aarch64_sve_st1dq:
7074 return EVT(MVT::nxv1i64);
7075 case Intrinsic::aarch64_sve_ld1uwq:
7076 case Intrinsic::aarch64_sve_st1wq:
7077 return EVT(MVT::nxv1i32);
7078 }
7079}
7080
7081/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7082/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7083/// where Root is the memory access using N for its address.
7084template <int64_t Min, int64_t Max>
7085bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7086 SDValue &Base,
7087 SDValue &OffImm) {
7088 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7089 const DataLayout &DL = CurDAG->getDataLayout();
7090 const MachineFrameInfo &MFI = MF->getFrameInfo();
7091
7092 if (N.getOpcode() == ISD::FrameIndex) {
7093 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7094 // We can only encode VL scaled offsets, so only fold in frame indexes
7095 // referencing SVE objects.
7097 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7098 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7099 return true;
7100 }
7101
7102 return false;
7103 }
7104
7105 if (MemVT == EVT())
7106 return false;
7107
7108 if (N.getOpcode() != ISD::ADD)
7109 return false;
7110
7111 SDValue VScale = N.getOperand(1);
7112 if (VScale.getOpcode() != ISD::VSCALE)
7113 return false;
7114
7115 TypeSize TS = MemVT.getSizeInBits();
7116 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7117 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7118
7119 if ((MulImm % MemWidthBytes) != 0)
7120 return false;
7121
7122 int64_t Offset = MulImm / MemWidthBytes;
7123 if (Offset < Min || Offset > Max)
7124 return false;
7125
7126 Base = N.getOperand(0);
7127 if (Base.getOpcode() == ISD::FrameIndex) {
7128 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7129 // We can only encode VL scaled offsets, so only fold in frame indexes
7130 // referencing SVE objects.
7132 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7133 }
7134
7135 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7136 return true;
7137}
7138
7139/// Select register plus register addressing mode for SVE, with scaled
7140/// offset.
7141bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7142 SDValue &Base,
7143 SDValue &Offset) {
7144 if (N.getOpcode() != ISD::ADD)
7145 return false;
7146
7147 // Process an ADD node.
7148 const SDValue LHS = N.getOperand(0);
7149 const SDValue RHS = N.getOperand(1);
7150
7151 // 8 bit data does not come with the SHL node, so it is treated
7152 // separately.
7153 if (Scale == 0) {
7154 Base = LHS;
7155 Offset = RHS;
7156 return true;
7157 }
7158
7159 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7160 int64_t ImmOff = C->getSExtValue();
7161 unsigned Size = 1 << Scale;
7162
7163 // To use the reg+reg addressing mode, the immediate must be a multiple of
7164 // the vector element's byte size.
7165 if (ImmOff % Size)
7166 return false;
7167
7168 SDLoc DL(N);
7169 Base = LHS;
7170 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7171 SDValue Ops[] = {Offset};
7172 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7173 Offset = SDValue(MI, 0);
7174 return true;
7175 }
7176
7177 // Check if the RHS is a shift node with a constant.
7178 if (RHS.getOpcode() != ISD::SHL)
7179 return false;
7180
7181 const SDValue ShiftRHS = RHS.getOperand(1);
7182 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7183 if (C->getZExtValue() == Scale) {
7184 Base = LHS;
7185 Offset = RHS.getOperand(0);
7186 return true;
7187 }
7188
7189 return false;
7190}
7191
7192bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7193 const AArch64TargetLowering *TLI =
7194 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7195
7196 return TLI->isAllActivePredicate(*CurDAG, N);
7197}
7198
7199bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7200 EVT VT = N.getValueType();
7201 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7202}
7203
7204bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7206 unsigned Scale) {
7207 // Try to untangle an ADD node into a 'reg + offset'
7208 if (N.getOpcode() == ISD::ADD)
7209 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
7210 int64_t ImmOff = C->getSExtValue();
7211 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
7212 Base = N.getOperand(0);
7213 Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7214 return true;
7215 }
7216 }
7217
7218 // By default, just match reg + 0.
7219 Base = N;
7220 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7221 return true;
7222}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
#define PASS_NAME
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
#define DEBUG_TYPE
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
aarch64 promote const
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
DEMANGLE_DUMP_METHOD void dump() const
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool getExactInverse(APFloat *inv) const
Definition: APFloat.h:1387
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1229
Class for arbitrary precision integers.
Definition: APInt.h:77
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1499
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1628
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:237
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1447
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1597
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1556
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1413
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:489
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:837
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint8_t getStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
iterator_range< use_iterator > uses()
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:451
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:693
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
const SysReg * lookupSysRegByName(StringRef)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:567
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1262
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1052
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:797
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:917
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:788
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1258
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:635
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:719
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:581
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:794
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1350
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1269
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:812
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:694
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:800
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1503
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1534
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:480
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:263
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:330
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:269
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Extended Value Type.
Definition: ValueTypes.h:34
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:203
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:198
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40