LLVM 17.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34//===--------------------------------------------------------------------===//
35/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
36/// instructions for SelectionDAG operations.
37///
38namespace {
39
40class AArch64DAGToDAGISel : public SelectionDAGISel {
41
42 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
43 /// make the right decision when generating code for different targets.
44 const AArch64Subtarget *Subtarget;
45
46public:
47 static char ID;
48
49 AArch64DAGToDAGISel() = delete;
50
51 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
52 CodeGenOpt::Level OptLevel)
53 : SelectionDAGISel(ID, tm, OptLevel), Subtarget(nullptr) {}
54
55 bool runOnMachineFunction(MachineFunction &MF) override {
56 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
58 }
59
60 void Select(SDNode *Node) override;
61
62 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
63 /// inline asm expressions.
65 unsigned ConstraintID,
66 std::vector<SDValue> &OutOps) override;
67
68 template <signed Low, signed High, signed Scale>
69 bool SelectRDVLImm(SDValue N, SDValue &Imm);
70
71 bool tryMLAV64LaneV128(SDNode *N);
72 bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
73 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
74 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
75 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
76 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
77 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
78 return SelectShiftedRegister(N, false, Reg, Shift);
79 }
80 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
81 return SelectShiftedRegister(N, true, Reg, Shift);
82 }
83 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
84 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
85 }
86 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
87 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
88 }
89 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
90 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
91 }
92 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
93 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
94 }
95 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
96 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
97 }
98 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
99 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
100 }
101 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
102 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
103 }
104 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
105 return SelectAddrModeIndexed(N, 1, Base, OffImm);
106 }
107 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
108 return SelectAddrModeIndexed(N, 2, Base, OffImm);
109 }
110 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
111 return SelectAddrModeIndexed(N, 4, Base, OffImm);
112 }
113 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
114 return SelectAddrModeIndexed(N, 8, Base, OffImm);
115 }
116 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
117 return SelectAddrModeIndexed(N, 16, Base, OffImm);
118 }
119 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
120 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
121 }
122 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
123 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
124 }
125 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
126 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
127 }
128 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
129 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
130 }
131 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
132 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
133 }
134 template <unsigned Size, unsigned Max>
135 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
136 // Test if there is an appropriate addressing mode and check if the
137 // immediate fits.
138 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
139 if (Found) {
140 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
141 int64_t C = CI->getSExtValue();
142 if (C <= Max)
143 return true;
144 }
145 }
146
147 // Otherwise, base only, materialize address in register.
148 Base = N;
149 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
150 return true;
151 }
152
153 template<int Width>
154 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
155 SDValue &SignExtend, SDValue &DoShift) {
156 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
157 }
158
159 template<int Width>
160 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
161 SDValue &SignExtend, SDValue &DoShift) {
162 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
163 }
164
165 bool SelectExtractHigh(SDValue N, SDValue &Res) {
166 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
167 N = N->getOperand(0);
168 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
169 !isa<ConstantSDNode>(N->getOperand(1)))
170 return false;
171 EVT VT = N->getValueType(0);
172 EVT LVT = N->getOperand(0).getValueType();
173 unsigned Index = N->getConstantOperandVal(1);
174 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
176 return false;
177 Res = N->getOperand(0);
178 return true;
179 }
180
181 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
182 if (N.getOpcode() != AArch64ISD::VLSHR)
183 return false;
184 SDValue Op = N->getOperand(0);
185 EVT VT = Op.getValueType();
186 unsigned ShtAmt = N->getConstantOperandVal(1);
187 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
188 return false;
189
190 APInt Imm;
191 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
192 Imm = APInt(VT.getScalarSizeInBits(),
193 Op.getOperand(1).getConstantOperandVal(0)
194 << Op.getOperand(1).getConstantOperandVal(1));
195 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
196 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
197 Imm = APInt(VT.getScalarSizeInBits(),
198 Op.getOperand(1).getConstantOperandVal(0));
199 else
200 return false;
201
202 if (Imm != 1ULL << (ShtAmt - 1))
203 return false;
204
205 Res1 = Op.getOperand(0);
206 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
207 return true;
208 }
209
210 bool SelectDupZeroOrUndef(SDValue N) {
211 switch(N->getOpcode()) {
212 case ISD::UNDEF:
213 return true;
214 case AArch64ISD::DUP:
215 case ISD::SPLAT_VECTOR: {
216 auto Opnd0 = N->getOperand(0);
217 if (isNullConstant(Opnd0))
218 return true;
219 if (isNullFPConstant(Opnd0))
220 return true;
221 break;
222 }
223 default:
224 break;
225 }
226
227 return false;
228 }
229
230 bool SelectDupZero(SDValue N) {
231 switch(N->getOpcode()) {
232 case AArch64ISD::DUP:
233 case ISD::SPLAT_VECTOR: {
234 auto Opnd0 = N->getOperand(0);
235 if (isNullConstant(Opnd0))
236 return true;
237 if (isNullFPConstant(Opnd0))
238 return true;
239 break;
240 }
241 }
242
243 return false;
244 }
245
246 template<MVT::SimpleValueType VT>
247 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
248 return SelectSVEAddSubImm(N, VT, Imm, Shift);
249 }
250
251 template <MVT::SimpleValueType VT>
252 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
253 return SelectSVECpyDupImm(N, VT, Imm, Shift);
254 }
255
256 template <MVT::SimpleValueType VT, bool Invert = false>
257 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
258 return SelectSVELogicalImm(N, VT, Imm, Invert);
259 }
260
261 template <MVT::SimpleValueType VT>
262 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
263 return SelectSVEArithImm(N, VT, Imm);
264 }
265
266 template <unsigned Low, unsigned High, bool AllowSaturation = false>
267 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
268 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
269 }
270
271 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
272 if (N->getOpcode() != ISD::SPLAT_VECTOR)
273 return false;
274
275 EVT EltVT = N->getValueType(0).getVectorElementType();
276 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
277 /* High */ EltVT.getFixedSizeInBits(),
278 /* AllowSaturation */ true, Imm);
279 }
280
281 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
282 template<signed Min, signed Max, signed Scale, bool Shift>
283 bool SelectCntImm(SDValue N, SDValue &Imm) {
284 if (!isa<ConstantSDNode>(N))
285 return false;
286
287 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
288 if (Shift)
289 MulImm = 1LL << MulImm;
290
291 if ((MulImm % std::abs(Scale)) != 0)
292 return false;
293
294 MulImm /= Scale;
295 if ((MulImm >= Min) && (MulImm <= Max)) {
296 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
297 return true;
298 }
299
300 return false;
301 }
302
303 template <signed Max, signed Scale>
304 bool SelectEXTImm(SDValue N, SDValue &Imm) {
305 if (!isa<ConstantSDNode>(N))
306 return false;
307
308 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
309
310 if (MulImm >= 0 && MulImm <= Max) {
311 MulImm *= Scale;
312 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
313 return true;
314 }
315
316 return false;
317 }
318
319 template <unsigned BaseReg> bool ImmToTile(SDValue N, SDValue &Imm) {
320 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
321 uint64_t C = CI->getZExtValue();
322 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
323 return true;
324 }
325 return false;
326 }
327
328 /// Form sequences of consecutive 64/128-bit registers for use in NEON
329 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
330 /// between 1 and 4 elements. If it contains a single element that is returned
331 /// unchanged; otherwise a REG_SEQUENCE value is returned.
334 // Form a sequence of SVE registers for instructions using list of vectors,
335 // e.g. structured loads and stores (ldN, stN).
336 SDValue createZTuple(ArrayRef<SDValue> Vecs);
337
338 // Similar to above, except the register must start at a multiple of the
339 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
340 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
341
342 /// Generic helper for the createDTuple/createQTuple
343 /// functions. Those should almost always be called instead.
344 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
345 const unsigned SubRegs[]);
346
347 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
348
349 bool tryIndexedLoad(SDNode *N);
350
351 bool trySelectStackSlotTagP(SDNode *N);
352 void SelectTagP(SDNode *N);
353
354 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
355 unsigned SubRegIdx);
356 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
357 unsigned SubRegIdx);
358 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
359 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
360 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
361 unsigned Opc_rr, unsigned Opc_ri,
362 bool IsIntr = false);
363 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
364 bool IsZmMulti, unsigned Opcode);
365 void SelectWhilePair(SDNode *N, unsigned Opc);
366 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
367 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
368 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
369 bool IsTupleInput, unsigned Opc);
370 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
371
372 template <unsigned MaxIdx, unsigned Scale>
373 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
374 unsigned Op);
375
376 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
377 /// SVE Reg+Imm addressing mode.
378 template <int64_t Min, int64_t Max>
379 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
380 SDValue &OffImm);
381 /// SVE Reg+Reg address mode.
382 template <unsigned Scale>
383 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
384 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
385 }
386
387 template <unsigned MaxIdx, unsigned Scale>
388 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
389 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
390 }
391
392 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
393 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
394 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
395 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
396 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
397 unsigned Opc_rr, unsigned Opc_ri);
398 std::tuple<unsigned, SDValue, SDValue>
399 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
400 const SDValue &OldBase, const SDValue &OldOffset,
401 unsigned Scale);
402
403 bool tryBitfieldExtractOp(SDNode *N);
404 bool tryBitfieldExtractOpFromSExt(SDNode *N);
405 bool tryBitfieldInsertOp(SDNode *N);
406 bool tryBitfieldInsertInZeroOp(SDNode *N);
407 bool tryShiftAmountMod(SDNode *N);
408 bool tryHighFPExt(SDNode *N);
409
410 bool tryReadRegister(SDNode *N);
411 bool tryWriteRegister(SDNode *N);
412
413 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
414 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
415
416// Include the pieces autogenerated from the target description.
417#include "AArch64GenDAGISel.inc"
418
419private:
420 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
421 SDValue &Shift);
422 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
423 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
424 SDValue &OffImm) {
425 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
426 }
427 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
428 unsigned Size, SDValue &Base,
429 SDValue &OffImm);
430 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
431 SDValue &OffImm);
432 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
433 SDValue &OffImm);
434 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
435 SDValue &Offset, SDValue &SignExtend,
436 SDValue &DoShift);
437 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
438 SDValue &Offset, SDValue &SignExtend,
439 SDValue &DoShift);
440 bool isWorthFolding(SDValue V) const;
441 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
442 SDValue &Offset, SDValue &SignExtend);
443
444 template<unsigned RegWidth>
445 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
446 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
447 }
448
449 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
450
451 bool SelectCMP_SWAP(SDNode *N);
452
453 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
454 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
455 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
456
457 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
458 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
459 bool AllowSaturation, SDValue &Imm);
460
461 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
462 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
463 SDValue &Offset);
464 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
465 SDValue &Offset, unsigned Scale = 1);
466
467 bool SelectAllActivePredicate(SDValue N);
468 bool SelectAnyPredicate(SDValue N);
469};
470} // end anonymous namespace
471
472char AArch64DAGToDAGISel::ID = 0;
473
474INITIALIZE_PASS(AArch64DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
475
476/// isIntImmediate - This method tests to see if the node is a constant
477/// operand. If so Imm will receive the 32-bit value.
478static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
479 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
480 Imm = C->getZExtValue();
481 return true;
482 }
483 return false;
484}
485
486// isIntImmediate - This method tests to see if a constant operand.
487// If so Imm will receive the value.
488static bool isIntImmediate(SDValue N, uint64_t &Imm) {
489 return isIntImmediate(N.getNode(), Imm);
490}
491
492// isOpcWithIntImmediate - This method tests to see if the node is a specific
493// opcode and that it has a immediate integer right operand.
494// If so Imm will receive the 32 bit value.
495static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
496 uint64_t &Imm) {
497 return N->getOpcode() == Opc &&
498 isIntImmediate(N->getOperand(1).getNode(), Imm);
499}
500
501// isIntImmediateEq - This method tests to see if N is a constant operand that
502// is equivalent to 'ImmExpected'.
503#ifndef NDEBUG
504static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
505 uint64_t Imm;
506 if (!isIntImmediate(N.getNode(), Imm))
507 return false;
508 return Imm == ImmExpected;
509}
510#endif
511
512bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
513 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
514 switch(ConstraintID) {
515 default:
516 llvm_unreachable("Unexpected asm memory constraint");
520 // We need to make sure that this one operand does not end up in XZR, thus
521 // require the address to be in a PointerRegClass register.
522 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
523 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
524 SDLoc dl(Op);
525 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
526 SDValue NewOp =
527 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
528 dl, Op.getValueType(),
529 Op, RC), 0);
530 OutOps.push_back(NewOp);
531 return false;
532 }
533 return true;
534}
535
536/// SelectArithImmed - Select an immediate value that can be represented as
537/// a 12-bit value shifted left by either 0 or 12. If so, return true with
538/// Val set to the 12-bit value and Shift set to the shifter operand.
539bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
540 SDValue &Shift) {
541 // This function is called from the addsub_shifted_imm ComplexPattern,
542 // which lists [imm] as the list of opcode it's interested in, however
543 // we still need to check whether the operand is actually an immediate
544 // here because the ComplexPattern opcode list is only used in
545 // root-level opcode matching.
546 if (!isa<ConstantSDNode>(N.getNode()))
547 return false;
548
549 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
550 unsigned ShiftAmt;
551
552 if (Immed >> 12 == 0) {
553 ShiftAmt = 0;
554 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
555 ShiftAmt = 12;
556 Immed = Immed >> 12;
557 } else
558 return false;
559
560 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
561 SDLoc dl(N);
562 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
563 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
564 return true;
565}
566
567/// SelectNegArithImmed - As above, but negates the value before trying to
568/// select it.
569bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
570 SDValue &Shift) {
571 // This function is called from the addsub_shifted_imm ComplexPattern,
572 // which lists [imm] as the list of opcode it's interested in, however
573 // we still need to check whether the operand is actually an immediate
574 // here because the ComplexPattern opcode list is only used in
575 // root-level opcode matching.
576 if (!isa<ConstantSDNode>(N.getNode()))
577 return false;
578
579 // The immediate operand must be a 24-bit zero-extended immediate.
580 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
581
582 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
583 // have the opposite effect on the C flag, so this pattern mustn't match under
584 // those circumstances.
585 if (Immed == 0)
586 return false;
587
588 if (N.getValueType() == MVT::i32)
589 Immed = ~((uint32_t)Immed) + 1;
590 else
591 Immed = ~Immed + 1ULL;
592 if (Immed & 0xFFFFFFFFFF000000ULL)
593 return false;
594
595 Immed &= 0xFFFFFFULL;
596 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
597 Shift);
598}
599
600/// getShiftTypeForNode - Translate a shift node to the corresponding
601/// ShiftType value.
603 switch (N.getOpcode()) {
604 default:
606 case ISD::SHL:
607 return AArch64_AM::LSL;
608 case ISD::SRL:
609 return AArch64_AM::LSR;
610 case ISD::SRA:
611 return AArch64_AM::ASR;
612 case ISD::ROTR:
613 return AArch64_AM::ROR;
614 }
615}
616
617/// Determine whether it is worth it to fold SHL into the addressing
618/// mode.
620 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
621 // It is worth folding logical shift of up to three places.
622 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
623 if (!CSD)
624 return false;
625 unsigned ShiftVal = CSD->getZExtValue();
626 if (ShiftVal > 3)
627 return false;
628
629 // Check if this particular node is reused in any non-memory related
630 // operation. If yes, do not try to fold this node into the address
631 // computation, since the computation will be kept.
632 const SDNode *Node = V.getNode();
633 for (SDNode *UI : Node->uses())
634 if (!isa<MemSDNode>(*UI))
635 for (SDNode *UII : UI->uses())
636 if (!isa<MemSDNode>(*UII))
637 return false;
638 return true;
639}
640
641/// Determine whether it is worth to fold V into an extended register.
642bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
643 // Trivial if we are optimizing for code size or if there is only
644 // one use of the value.
645 if (CurDAG->shouldOptForSize() || V.hasOneUse())
646 return true;
647 // If a subtarget has a fastpath LSL we can fold a logical shift into
648 // the addressing mode and save a cycle.
649 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
651 return true;
652 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
653 const SDValue LHS = V.getOperand(0);
654 const SDValue RHS = V.getOperand(1);
655 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
656 return true;
657 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
658 return true;
659 }
660
661 // It hurts otherwise, since the value will be reused.
662 return false;
663}
664
665/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
666/// to select more shifted register
667bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
668 SDValue &Shift) {
669 EVT VT = N.getValueType();
670 if (VT != MVT::i32 && VT != MVT::i64)
671 return false;
672
673 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
674 return false;
675 SDValue LHS = N.getOperand(0);
676 if (!LHS->hasOneUse())
677 return false;
678
679 unsigned LHSOpcode = LHS->getOpcode();
680 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
681 return false;
682
683 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
684 if (!ShiftAmtNode)
685 return false;
686
687 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
688 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
689 if (!RHSC)
690 return false;
691
692 APInt AndMask = RHSC->getAPIntValue();
693 unsigned LowZBits, MaskLen;
694 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
695 return false;
696
697 unsigned BitWidth = N.getValueSizeInBits();
698 SDLoc DL(LHS);
699 uint64_t NewShiftC;
700 unsigned NewShiftOp;
701 if (LHSOpcode == ISD::SHL) {
702 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
703 // BitWidth != LowZBits + MaskLen doesn't match the pattern
704 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
705 return false;
706
707 NewShiftC = LowZBits - ShiftAmtC;
708 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
709 } else {
710 if (LowZBits == 0)
711 return false;
712
713 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
714 NewShiftC = LowZBits + ShiftAmtC;
715 if (NewShiftC >= BitWidth)
716 return false;
717
718 // SRA need all high bits
719 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
720 return false;
721
722 // SRL high bits can be 0 or 1
723 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
724 return false;
725
726 if (LHSOpcode == ISD::SRL)
727 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
728 else
729 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
730 }
731
732 assert(NewShiftC < BitWidth && "Invalid shift amount");
733 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
734 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
735 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
736 NewShiftAmt, BitWidthMinus1),
737 0);
738 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
739 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
740 return true;
741}
742
743/// SelectShiftedRegister - Select a "shifted register" operand. If the value
744/// is not shifted, set the Shift operand to default of "LSL 0". The logical
745/// instructions allow the shifted register to be rotated, but the arithmetic
746/// instructions do not. The AllowROR parameter specifies whether ROR is
747/// supported.
748bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
749 SDValue &Reg, SDValue &Shift) {
750 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
751 return true;
752
754 if (ShType == AArch64_AM::InvalidShiftExtend)
755 return false;
756 if (!AllowROR && ShType == AArch64_AM::ROR)
757 return false;
758
759 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
760 unsigned BitSize = N.getValueSizeInBits();
761 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
762 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
763
764 Reg = N.getOperand(0);
765 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
766 return isWorthFolding(N);
767 }
768
769 return false;
770}
771
772/// getExtendTypeForNode - Translate an extend node to the corresponding
773/// ExtendType value.
775getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
776 if (N.getOpcode() == ISD::SIGN_EXTEND ||
777 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
778 EVT SrcVT;
779 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
780 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
781 else
782 SrcVT = N.getOperand(0).getValueType();
783
784 if (!IsLoadStore && SrcVT == MVT::i8)
785 return AArch64_AM::SXTB;
786 else if (!IsLoadStore && SrcVT == MVT::i16)
787 return AArch64_AM::SXTH;
788 else if (SrcVT == MVT::i32)
789 return AArch64_AM::SXTW;
790 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
791
793 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
794 N.getOpcode() == ISD::ANY_EXTEND) {
795 EVT SrcVT = N.getOperand(0).getValueType();
796 if (!IsLoadStore && SrcVT == MVT::i8)
797 return AArch64_AM::UXTB;
798 else if (!IsLoadStore && SrcVT == MVT::i16)
799 return AArch64_AM::UXTH;
800 else if (SrcVT == MVT::i32)
801 return AArch64_AM::UXTW;
802 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
803
805 } else if (N.getOpcode() == ISD::AND) {
806 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
807 if (!CSD)
809 uint64_t AndMask = CSD->getZExtValue();
810
811 switch (AndMask) {
812 default:
814 case 0xFF:
815 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
816 case 0xFFFF:
817 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
818 case 0xFFFFFFFF:
819 return AArch64_AM::UXTW;
820 }
821 }
822
824}
825
826// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
827static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
828 if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
829 DL->getOpcode() != AArch64ISD::DUPLANE32)
830 return false;
831
832 SDValue SV = DL->getOperand(0);
834 return false;
835
836 SDValue EV = SV.getOperand(1);
838 return false;
839
840 ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
841 ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
842 LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
843 LaneOp = EV.getOperand(0);
844
845 return true;
846}
847
848// Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
849// high lane extract.
850static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
851 SDValue &LaneOp, int &LaneIdx) {
852
853 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
854 std::swap(Op0, Op1);
855 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
856 return false;
857 }
858 StdOp = Op1;
859 return true;
860}
861
862/// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
863/// is a lane in the upper half of a 128-bit vector. Recognize and select this
864/// so that we don't emit unnecessary lane extracts.
865bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
866 SDLoc dl(N);
867 SDValue Op0 = N->getOperand(0);
868 SDValue Op1 = N->getOperand(1);
869 SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
870 SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
871 int LaneIdx = -1; // Will hold the lane index.
872
873 if (Op1.getOpcode() != ISD::MUL ||
874 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
875 LaneIdx)) {
876 std::swap(Op0, Op1);
877 if (Op1.getOpcode() != ISD::MUL ||
878 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
879 LaneIdx))
880 return false;
881 }
882
883 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
884
885 SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
886
887 unsigned MLAOpc = ~0U;
888
889 switch (N->getSimpleValueType(0).SimpleTy) {
890 default:
891 llvm_unreachable("Unrecognized MLA.");
892 case MVT::v4i16:
893 MLAOpc = AArch64::MLAv4i16_indexed;
894 break;
895 case MVT::v8i16:
896 MLAOpc = AArch64::MLAv8i16_indexed;
897 break;
898 case MVT::v2i32:
899 MLAOpc = AArch64::MLAv2i32_indexed;
900 break;
901 case MVT::v4i32:
902 MLAOpc = AArch64::MLAv4i32_indexed;
903 break;
904 }
905
906 ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
907 return true;
908}
909
910bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
911 SDLoc dl(N);
912 SDValue SMULLOp0;
913 SDValue SMULLOp1;
914 int LaneIdx;
915
916 if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
917 LaneIdx))
918 return false;
919
920 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
921
922 SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
923
924 unsigned SMULLOpc = ~0U;
925
926 if (IntNo == Intrinsic::aarch64_neon_smull) {
927 switch (N->getSimpleValueType(0).SimpleTy) {
928 default:
929 llvm_unreachable("Unrecognized SMULL.");
930 case MVT::v4i32:
931 SMULLOpc = AArch64::SMULLv4i16_indexed;
932 break;
933 case MVT::v2i64:
934 SMULLOpc = AArch64::SMULLv2i32_indexed;
935 break;
936 }
937 } else if (IntNo == Intrinsic::aarch64_neon_umull) {
938 switch (N->getSimpleValueType(0).SimpleTy) {
939 default:
940 llvm_unreachable("Unrecognized SMULL.");
941 case MVT::v4i32:
942 SMULLOpc = AArch64::UMULLv4i16_indexed;
943 break;
944 case MVT::v2i64:
945 SMULLOpc = AArch64::UMULLv2i32_indexed;
946 break;
947 }
948 } else
949 llvm_unreachable("Unrecognized intrinsic.");
950
951 ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
952 return true;
953}
954
955/// Instructions that accept extend modifiers like UXTW expect the register
956/// being extended to be a GPR32, but the incoming DAG might be acting on a
957/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
958/// this is the case.
960 if (N.getValueType() == MVT::i32)
961 return N;
962
963 SDLoc dl(N);
964 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
965}
966
967// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
968template<signed Low, signed High, signed Scale>
969bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
970 if (!isa<ConstantSDNode>(N))
971 return false;
972
973 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
974 if ((MulImm % std::abs(Scale)) == 0) {
975 int64_t RDVLImm = MulImm / Scale;
976 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
977 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
978 return true;
979 }
980 }
981
982 return false;
983}
984
985/// SelectArithExtendedRegister - Select a "extended register" operand. This
986/// operand folds in an extend followed by an optional left shift.
987bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
988 SDValue &Shift) {
989 unsigned ShiftVal = 0;
991
992 if (N.getOpcode() == ISD::SHL) {
993 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
994 if (!CSD)
995 return false;
996 ShiftVal = CSD->getZExtValue();
997 if (ShiftVal > 4)
998 return false;
999
1000 Ext = getExtendTypeForNode(N.getOperand(0));
1002 return false;
1003
1004 Reg = N.getOperand(0).getOperand(0);
1005 } else {
1008 return false;
1009
1010 Reg = N.getOperand(0);
1011
1012 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
1013 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
1014 auto isDef32 = [](SDValue N) {
1015 unsigned Opc = N.getOpcode();
1016 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
1017 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
1018 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
1019 Opc != ISD::FREEZE;
1020 };
1021 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
1022 isDef32(Reg))
1023 return false;
1024 }
1025
1026 // AArch64 mandates that the RHS of the operation must use the smallest
1027 // register class that could contain the size being extended from. Thus,
1028 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1029 // there might not be an actual 32-bit value in the program. We can
1030 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1031 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1032 Reg = narrowIfNeeded(CurDAG, Reg);
1033 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1034 MVT::i32);
1035 return isWorthFolding(N);
1036}
1037
1038/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1039/// operand is refered by the instructions have SP operand
1040bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1041 SDValue &Shift) {
1042 unsigned ShiftVal = 0;
1044
1045 if (N.getOpcode() != ISD::SHL)
1046 return false;
1047
1048 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1049 if (!CSD)
1050 return false;
1051 ShiftVal = CSD->getZExtValue();
1052 if (ShiftVal > 4)
1053 return false;
1054
1056 Reg = N.getOperand(0);
1057 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1058 MVT::i32);
1059 return isWorthFolding(N);
1060}
1061
1062/// If there's a use of this ADDlow that's not itself a load/store then we'll
1063/// need to create a real ADD instruction from it anyway and there's no point in
1064/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1065/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1066/// leads to duplicated ADRP instructions.
1068 for (auto *Use : N->uses()) {
1069 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
1070 Use->getOpcode() != ISD::ATOMIC_LOAD &&
1071 Use->getOpcode() != ISD::ATOMIC_STORE)
1072 return false;
1073
1074 // ldar and stlr have much more restrictive addressing modes (just a
1075 // register).
1076 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering()))
1077 return false;
1078 }
1079
1080 return true;
1081}
1082
1083/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1084/// immediate" address. The "Size" argument is the size in bytes of the memory
1085/// reference, which determines the scale.
1086bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1087 unsigned BW, unsigned Size,
1088 SDValue &Base,
1089 SDValue &OffImm) {
1090 SDLoc dl(N);
1091 const DataLayout &DL = CurDAG->getDataLayout();
1092 const TargetLowering *TLI = getTargetLowering();
1093 if (N.getOpcode() == ISD::FrameIndex) {
1094 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1095 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1096 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1097 return true;
1098 }
1099
1100 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1101 // selected here doesn't support labels/immediates, only base+offset.
1102 if (CurDAG->isBaseWithConstantOffset(N)) {
1103 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1104 if (IsSignedImm) {
1105 int64_t RHSC = RHS->getSExtValue();
1106 unsigned Scale = Log2_32(Size);
1107 int64_t Range = 0x1LL << (BW - 1);
1108
1109 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1110 RHSC < (Range << Scale)) {
1111 Base = N.getOperand(0);
1112 if (Base.getOpcode() == ISD::FrameIndex) {
1113 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1114 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1115 }
1116 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1117 return true;
1118 }
1119 } else {
1120 // unsigned Immediate
1121 uint64_t RHSC = RHS->getZExtValue();
1122 unsigned Scale = Log2_32(Size);
1123 uint64_t Range = 0x1ULL << BW;
1124
1125 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1126 Base = N.getOperand(0);
1127 if (Base.getOpcode() == ISD::FrameIndex) {
1128 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1129 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1130 }
1131 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1132 return true;
1133 }
1134 }
1135 }
1136 }
1137 // Base only. The address will be materialized into a register before
1138 // the memory is accessed.
1139 // add x0, Xbase, #offset
1140 // stp x1, x2, [x0]
1141 Base = N;
1142 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1143 return true;
1144}
1145
1146/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1147/// immediate" address. The "Size" argument is the size in bytes of the memory
1148/// reference, which determines the scale.
1149bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1150 SDValue &Base, SDValue &OffImm) {
1151 SDLoc dl(N);
1152 const DataLayout &DL = CurDAG->getDataLayout();
1153 const TargetLowering *TLI = getTargetLowering();
1154 if (N.getOpcode() == ISD::FrameIndex) {
1155 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1156 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1157 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1158 return true;
1159 }
1160
1161 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1162 GlobalAddressSDNode *GAN =
1163 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1164 Base = N.getOperand(0);
1165 OffImm = N.getOperand(1);
1166 if (!GAN)
1167 return true;
1168
1169 if (GAN->getOffset() % Size == 0 &&
1171 return true;
1172 }
1173
1174 if (CurDAG->isBaseWithConstantOffset(N)) {
1175 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1176 int64_t RHSC = (int64_t)RHS->getZExtValue();
1177 unsigned Scale = Log2_32(Size);
1178 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
1179 Base = N.getOperand(0);
1180 if (Base.getOpcode() == ISD::FrameIndex) {
1181 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1182 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1183 }
1184 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1185 return true;
1186 }
1187 }
1188 }
1189
1190 // Before falling back to our general case, check if the unscaled
1191 // instructions can handle this. If so, that's preferable.
1192 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1193 return false;
1194
1195 // Base only. The address will be materialized into a register before
1196 // the memory is accessed.
1197 // add x0, Xbase, #offset
1198 // ldr x0, [x0]
1199 Base = N;
1200 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1201 return true;
1202}
1203
1204/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1205/// immediate" address. This should only match when there is an offset that
1206/// is not valid for a scaled immediate addressing mode. The "Size" argument
1207/// is the size in bytes of the memory reference, which is needed here to know
1208/// what is valid for a scaled immediate.
1209bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1210 SDValue &Base,
1211 SDValue &OffImm) {
1212 if (!CurDAG->isBaseWithConstantOffset(N))
1213 return false;
1214 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1215 int64_t RHSC = RHS->getSExtValue();
1216 // If the offset is valid as a scaled immediate, don't match here.
1217 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
1218 RHSC < (0x1000 << Log2_32(Size)))
1219 return false;
1220 if (RHSC >= -256 && RHSC < 256) {
1221 Base = N.getOperand(0);
1222 if (Base.getOpcode() == ISD::FrameIndex) {
1223 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1224 const TargetLowering *TLI = getTargetLowering();
1225 Base = CurDAG->getTargetFrameIndex(
1226 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1227 }
1228 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1229 return true;
1230 }
1231 }
1232 return false;
1233}
1234
1236 SDLoc dl(N);
1237 SDValue ImpDef = SDValue(
1238 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1239 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1240 N);
1241}
1242
1243/// Check if the given SHL node (\p N), can be used to form an
1244/// extended register for an addressing mode.
1245bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1246 bool WantExtend, SDValue &Offset,
1247 SDValue &SignExtend) {
1248 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1249 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1250 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1251 return false;
1252
1253 SDLoc dl(N);
1254 if (WantExtend) {
1256 getExtendTypeForNode(N.getOperand(0), true);
1258 return false;
1259
1260 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1261 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1262 MVT::i32);
1263 } else {
1264 Offset = N.getOperand(0);
1265 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1266 }
1267
1268 unsigned LegalShiftVal = Log2_32(Size);
1269 unsigned ShiftVal = CSD->getZExtValue();
1270
1271 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1272 return false;
1273
1274 return isWorthFolding(N);
1275}
1276
1277bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1279 SDValue &SignExtend,
1280 SDValue &DoShift) {
1281 if (N.getOpcode() != ISD::ADD)
1282 return false;
1283 SDValue LHS = N.getOperand(0);
1284 SDValue RHS = N.getOperand(1);
1285 SDLoc dl(N);
1286
1287 // We don't want to match immediate adds here, because they are better lowered
1288 // to the register-immediate addressing modes.
1289 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1290 return false;
1291
1292 // Check if this particular node is reused in any non-memory related
1293 // operation. If yes, do not try to fold this node into the address
1294 // computation, since the computation will be kept.
1295 const SDNode *Node = N.getNode();
1296 for (SDNode *UI : Node->uses()) {
1297 if (!isa<MemSDNode>(*UI))
1298 return false;
1299 }
1300
1301 // Remember if it is worth folding N when it produces extended register.
1302 bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1303
1304 // Try to match a shifted extend on the RHS.
1305 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1306 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1307 Base = LHS;
1308 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1309 return true;
1310 }
1311
1312 // Try to match a shifted extend on the LHS.
1313 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1314 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1315 Base = RHS;
1316 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1317 return true;
1318 }
1319
1320 // There was no shift, whatever else we find.
1321 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1322
1324 // Try to match an unshifted extend on the LHS.
1325 if (IsExtendedRegisterWorthFolding &&
1326 (Ext = getExtendTypeForNode(LHS, true)) !=
1328 Base = RHS;
1329 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1330 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1331 MVT::i32);
1332 if (isWorthFolding(LHS))
1333 return true;
1334 }
1335
1336 // Try to match an unshifted extend on the RHS.
1337 if (IsExtendedRegisterWorthFolding &&
1338 (Ext = getExtendTypeForNode(RHS, true)) !=
1340 Base = LHS;
1341 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1342 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1343 MVT::i32);
1344 if (isWorthFolding(RHS))
1345 return true;
1346 }
1347
1348 return false;
1349}
1350
1351// Check if the given immediate is preferred by ADD. If an immediate can be
1352// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1353// encoded by one MOVZ, return true.
1354static bool isPreferredADD(int64_t ImmOff) {
1355 // Constant in [0x0, 0xfff] can be encoded in ADD.
1356 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1357 return true;
1358 // Check if it can be encoded in an "ADD LSL #12".
1359 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1360 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1361 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1362 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1363 return false;
1364}
1365
1366bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1368 SDValue &SignExtend,
1369 SDValue &DoShift) {
1370 if (N.getOpcode() != ISD::ADD)
1371 return false;
1372 SDValue LHS = N.getOperand(0);
1373 SDValue RHS = N.getOperand(1);
1374 SDLoc DL(N);
1375
1376 // Check if this particular node is reused in any non-memory related
1377 // operation. If yes, do not try to fold this node into the address
1378 // computation, since the computation will be kept.
1379 const SDNode *Node = N.getNode();
1380 for (SDNode *UI : Node->uses()) {
1381 if (!isa<MemSDNode>(*UI))
1382 return false;
1383 }
1384
1385 // Watch out if RHS is a wide immediate, it can not be selected into
1386 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1387 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1388 // instructions like:
1389 // MOV X0, WideImmediate
1390 // ADD X1, BaseReg, X0
1391 // LDR X2, [X1, 0]
1392 // For such situation, using [BaseReg, XReg] addressing mode can save one
1393 // ADD/SUB:
1394 // MOV X0, WideImmediate
1395 // LDR X2, [BaseReg, X0]
1396 if (isa<ConstantSDNode>(RHS)) {
1397 int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
1398 unsigned Scale = Log2_32(Size);
1399 // Skip the immediate can be selected by load/store addressing mode.
1400 // Also skip the immediate can be encoded by a single ADD (SUB is also
1401 // checked by using -ImmOff).
1402 if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
1403 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1404 return false;
1405
1406 SDValue Ops[] = { RHS };
1407 SDNode *MOVI =
1408 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1409 SDValue MOVIV = SDValue(MOVI, 0);
1410 // This ADD of two X register will be selected into [Reg+Reg] mode.
1411 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1412 }
1413
1414 // Remember if it is worth folding N when it produces extended register.
1415 bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1416
1417 // Try to match a shifted extend on the RHS.
1418 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1419 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1420 Base = LHS;
1421 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1422 return true;
1423 }
1424
1425 // Try to match a shifted extend on the LHS.
1426 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1427 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1428 Base = RHS;
1429 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1430 return true;
1431 }
1432
1433 // Match any non-shifted, non-extend, non-immediate add expression.
1434 Base = LHS;
1435 Offset = RHS;
1436 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1437 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1438 // Reg1 + Reg2 is free: no check needed.
1439 return true;
1440}
1441
1442SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1443 static const unsigned RegClassIDs[] = {
1444 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1445 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1446 AArch64::dsub2, AArch64::dsub3};
1447
1448 return createTuple(Regs, RegClassIDs, SubRegs);
1449}
1450
1451SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1452 static const unsigned RegClassIDs[] = {
1453 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1454 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1455 AArch64::qsub2, AArch64::qsub3};
1456
1457 return createTuple(Regs, RegClassIDs, SubRegs);
1458}
1459
1460SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1461 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1462 AArch64::ZPR3RegClassID,
1463 AArch64::ZPR4RegClassID};
1464 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1465 AArch64::zsub2, AArch64::zsub3};
1466
1467 return createTuple(Regs, RegClassIDs, SubRegs);
1468}
1469
1470SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1471 assert(Regs.size() == 2 || Regs.size() == 4);
1472
1473 // The createTuple interface requires 3 RegClassIDs for each possible
1474 // tuple type even though we only have them for ZPR2 and ZPR4.
1475 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1476 AArch64::ZPR4Mul4RegClassID};
1477 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1478 AArch64::zsub2, AArch64::zsub3};
1479 return createTuple(Regs, RegClassIDs, SubRegs);
1480}
1481
1482SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1483 const unsigned RegClassIDs[],
1484 const unsigned SubRegs[]) {
1485 // There's no special register-class for a vector-list of 1 element: it's just
1486 // a vector.
1487 if (Regs.size() == 1)
1488 return Regs[0];
1489
1490 assert(Regs.size() >= 2 && Regs.size() <= 4);
1491
1492 SDLoc DL(Regs[0]);
1493
1495
1496 // First operand of REG_SEQUENCE is the desired RegClass.
1497 Ops.push_back(
1498 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1499
1500 // Then we get pairs of source & subregister-position for the components.
1501 for (unsigned i = 0; i < Regs.size(); ++i) {
1502 Ops.push_back(Regs[i]);
1503 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1504 }
1505
1506 SDNode *N =
1507 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1508 return SDValue(N, 0);
1509}
1510
1511void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1512 bool isExt) {
1513 SDLoc dl(N);
1514 EVT VT = N->getValueType(0);
1515
1516 unsigned ExtOff = isExt;
1517
1518 // Form a REG_SEQUENCE to force register allocation.
1519 unsigned Vec0Off = ExtOff + 1;
1520 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1521 N->op_begin() + Vec0Off + NumVecs);
1522 SDValue RegSeq = createQTuple(Regs);
1523
1525 if (isExt)
1526 Ops.push_back(N->getOperand(1));
1527 Ops.push_back(RegSeq);
1528 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1529 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1530}
1531
1532bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1533 LoadSDNode *LD = cast<LoadSDNode>(N);
1534 if (LD->isUnindexed())
1535 return false;
1536 EVT VT = LD->getMemoryVT();
1537 EVT DstVT = N->getValueType(0);
1538 ISD::MemIndexedMode AM = LD->getAddressingMode();
1539 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1540
1541 // We're not doing validity checking here. That was done when checking
1542 // if we should mark the load as indexed or not. We're just selecting
1543 // the right instruction.
1544 unsigned Opcode = 0;
1545
1546 ISD::LoadExtType ExtType = LD->getExtensionType();
1547 bool InsertTo64 = false;
1548 if (VT == MVT::i64)
1549 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1550 else if (VT == MVT::i32) {
1551 if (ExtType == ISD::NON_EXTLOAD)
1552 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1553 else if (ExtType == ISD::SEXTLOAD)
1554 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1555 else {
1556 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1557 InsertTo64 = true;
1558 // The result of the load is only i32. It's the subreg_to_reg that makes
1559 // it into an i64.
1560 DstVT = MVT::i32;
1561 }
1562 } else if (VT == MVT::i16) {
1563 if (ExtType == ISD::SEXTLOAD) {
1564 if (DstVT == MVT::i64)
1565 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1566 else
1567 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1568 } else {
1569 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1570 InsertTo64 = DstVT == MVT::i64;
1571 // The result of the load is only i32. It's the subreg_to_reg that makes
1572 // it into an i64.
1573 DstVT = MVT::i32;
1574 }
1575 } else if (VT == MVT::i8) {
1576 if (ExtType == ISD::SEXTLOAD) {
1577 if (DstVT == MVT::i64)
1578 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1579 else
1580 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1581 } else {
1582 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1583 InsertTo64 = DstVT == MVT::i64;
1584 // The result of the load is only i32. It's the subreg_to_reg that makes
1585 // it into an i64.
1586 DstVT = MVT::i32;
1587 }
1588 } else if (VT == MVT::f16) {
1589 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1590 } else if (VT == MVT::bf16) {
1591 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1592 } else if (VT == MVT::f32) {
1593 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1594 } else if (VT == MVT::f64 || VT.is64BitVector()) {
1595 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1596 } else if (VT.is128BitVector()) {
1597 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1598 } else
1599 return false;
1600 SDValue Chain = LD->getChain();
1601 SDValue Base = LD->getBasePtr();
1602 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1603 int OffsetVal = (int)OffsetOp->getZExtValue();
1604 SDLoc dl(N);
1605 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1606 SDValue Ops[] = { Base, Offset, Chain };
1607 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1608 MVT::Other, Ops);
1609
1610 // Transfer memoperands.
1611 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1612 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1613
1614 // Either way, we're replacing the node, so tell the caller that.
1615 SDValue LoadedVal = SDValue(Res, 1);
1616 if (InsertTo64) {
1617 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1618 LoadedVal =
1619 SDValue(CurDAG->getMachineNode(
1620 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1621 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1622 SubReg),
1623 0);
1624 }
1625
1626 ReplaceUses(SDValue(N, 0), LoadedVal);
1627 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1628 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1629 CurDAG->RemoveDeadNode(N);
1630 return true;
1631}
1632
1633void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1634 unsigned SubRegIdx) {
1635 SDLoc dl(N);
1636 EVT VT = N->getValueType(0);
1637 SDValue Chain = N->getOperand(0);
1638
1639 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1640 Chain};
1641
1642 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1643
1644 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1645 SDValue SuperReg = SDValue(Ld, 0);
1646 for (unsigned i = 0; i < NumVecs; ++i)
1647 ReplaceUses(SDValue(N, i),
1648 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1649
1650 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1651
1652 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1653 // because it's too simple to have needed special treatment during lowering.
1654 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1655 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1656 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1657 }
1658
1659 CurDAG->RemoveDeadNode(N);
1660}
1661
1662void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1663 unsigned Opc, unsigned SubRegIdx) {
1664 SDLoc dl(N);
1665 EVT VT = N->getValueType(0);
1666 SDValue Chain = N->getOperand(0);
1667
1668 SDValue Ops[] = {N->getOperand(1), // Mem operand
1669 N->getOperand(2), // Incremental
1670 Chain};
1671
1672 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1674
1675 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1676
1677 // Update uses of write back register
1678 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1679
1680 // Update uses of vector list
1681 SDValue SuperReg = SDValue(Ld, 1);
1682 if (NumVecs == 1)
1683 ReplaceUses(SDValue(N, 0), SuperReg);
1684 else
1685 for (unsigned i = 0; i < NumVecs; ++i)
1686 ReplaceUses(SDValue(N, i),
1687 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1688
1689 // Update the chain
1690 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1691 CurDAG->RemoveDeadNode(N);
1692}
1693
1694/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1695/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1696/// new Base and an SDValue representing the new offset.
1697std::tuple<unsigned, SDValue, SDValue>
1698AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1699 unsigned Opc_ri,
1700 const SDValue &OldBase,
1701 const SDValue &OldOffset,
1702 unsigned Scale) {
1703 SDValue NewBase = OldBase;
1704 SDValue NewOffset = OldOffset;
1705 // Detect a possible Reg+Imm addressing mode.
1706 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1707 N, OldBase, NewBase, NewOffset);
1708
1709 // Detect a possible reg+reg addressing mode, but only if we haven't already
1710 // detected a Reg+Imm one.
1711 const bool IsRegReg =
1712 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1713
1714 // Select the instruction.
1715 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1716}
1717
1718enum class SelectTypeKind {
1719 Int1 = 0,
1720 Int = 1,
1721 FP = 2,
1722 AnyType = 3,
1723};
1724
1725/// This function selects an opcode from a list of opcodes, which is
1726/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1727/// element types, in this order.
1728template <SelectTypeKind Kind>
1729static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1730 // Only match scalable vector VTs
1731 if (!VT.isScalableVector())
1732 return 0;
1733
1734 EVT EltVT = VT.getVectorElementType();
1735 switch (Kind) {
1737 break;
1739 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1740 EltVT != MVT::i64)
1741 return 0;
1742 break;
1744 if (EltVT != MVT::i1)
1745 return 0;
1746 break;
1747 case SelectTypeKind::FP:
1748 if (EltVT != MVT::f16 && EltVT != MVT::f32 && EltVT != MVT::f64)
1749 return 0;
1750 break;
1751 }
1752
1753 unsigned Offset;
1754 switch (VT.getVectorMinNumElements()) {
1755 case 16: // 8-bit
1756 Offset = 0;
1757 break;
1758 case 8: // 16-bit
1759 Offset = 1;
1760 break;
1761 case 4: // 32-bit
1762 Offset = 2;
1763 break;
1764 case 2: // 64-bit
1765 Offset = 3;
1766 break;
1767 default:
1768 return 0;
1769 }
1770
1771 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1772}
1773
1774void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1775 SDLoc DL(N);
1776 EVT VT = N->getValueType(0);
1777
1778 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1779
1780 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1781 SDValue SuperReg = SDValue(WhilePair, 0);
1782
1783 for (unsigned I = 0; I < 2; ++I)
1784 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1785 AArch64::psub0 + I, DL, VT, SuperReg));
1786
1787 CurDAG->RemoveDeadNode(N);
1788}
1789
1790void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1791 unsigned Opcode) {
1792 EVT VT = N->getValueType(0);
1793 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1794 SDValue Ops = createZTuple(Regs);
1795 SDLoc DL(N);
1796 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1797 SDValue SuperReg = SDValue(Intrinsic, 0);
1798 for (unsigned i = 0; i < NumVecs; ++i)
1799 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1800 AArch64::zsub0 + i, DL, VT, SuperReg));
1801
1802 CurDAG->RemoveDeadNode(N);
1803 return;
1804}
1805
1806void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1807 unsigned NumVecs,
1808 bool IsZmMulti,
1809 unsigned Opcode) {
1810 assert(Opcode != 0 && "Unexpected opcode");
1811
1812 SDLoc DL(N);
1813 EVT VT = N->getValueType(0);
1814
1815 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1816 SmallVector<SDValue, 4> Regs(N->op_begin() + StartIdx,
1817 N->op_begin() + StartIdx + NumVecs);
1818 return createZMulTuple(Regs);
1819 };
1820
1821 SDValue Zdn = GetMultiVecOperand(1);
1822
1823 SDValue Zm;
1824 if (IsZmMulti)
1825 Zm = GetMultiVecOperand(NumVecs + 1);
1826 else
1827 Zm = N->getOperand(NumVecs + 1);
1828
1829 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1830
1831 SDValue SuperReg = SDValue(Intrinsic, 0);
1832 for (unsigned i = 0; i < NumVecs; ++i)
1833 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1834 AArch64::zsub0 + i, DL, VT, SuperReg));
1835
1836 CurDAG->RemoveDeadNode(N);
1837 return;
1838}
1839
1840void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1841 unsigned Scale, unsigned Opc_ri,
1842 unsigned Opc_rr, bool IsIntr) {
1843 assert(Scale < 4 && "Invalid scaling value.");
1844 SDLoc DL(N);
1845 EVT VT = N->getValueType(0);
1846 SDValue Chain = N->getOperand(0);
1847
1848 // Optimize addressing mode.
1850 unsigned Opc;
1851 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1852 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1853 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1854
1855 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1856 Base, // Memory operand
1857 Offset, Chain};
1858
1859 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1860
1861 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1862 SDValue SuperReg = SDValue(Load, 0);
1863 for (unsigned i = 0; i < NumVecs; ++i)
1864 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1865 AArch64::zsub0 + i, DL, VT, SuperReg));
1866
1867 // Copy chain
1868 unsigned ChainIdx = NumVecs;
1869 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1870 CurDAG->RemoveDeadNode(N);
1871}
1872
1873void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
1874 unsigned Opcode) {
1875 if (N->getValueType(0) != MVT::nxv4f32)
1876 return;
1877 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
1878}
1879
1880void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
1881 unsigned Op) {
1882 SDLoc DL(N);
1883 EVT VT = N->getValueType(0);
1884
1885 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1886 SDValue Zd = createZMulTuple(Regs);
1887 SDValue Zn = N->getOperand(1 + NumVecs);
1888 SDValue Zm = N->getOperand(2 + NumVecs);
1889
1890 SDValue Ops[] = {Zd, Zn, Zm};
1891
1892 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
1893 SDValue SuperReg = SDValue(Intrinsic, 0);
1894 for (unsigned i = 0; i < NumVecs; ++i)
1895 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1896 AArch64::zsub0 + i, DL, VT, SuperReg));
1897
1898 CurDAG->RemoveDeadNode(N);
1899 return;
1900}
1901
1902bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
1903 switch (BaseReg) {
1904 default:
1905 return false;
1906 case AArch64::ZA:
1907 case AArch64::ZAB0:
1908 if (TileNum == 0)
1909 break;
1910 return false;
1911 case AArch64::ZAH0:
1912 if (TileNum <= 1)
1913 break;
1914 return false;
1915 case AArch64::ZAS0:
1916 if (TileNum <= 3)
1917 break;
1918 return false;
1919 case AArch64::ZAD0:
1920 if (TileNum <= 7)
1921 break;
1922 return false;
1923 }
1924
1925 BaseReg += TileNum;
1926 return true;
1927}
1928
1929template <unsigned MaxIdx, unsigned Scale>
1930void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
1931 unsigned BaseReg, unsigned Op) {
1932 unsigned TileNum = 0;
1933 if (BaseReg != AArch64::ZA)
1934 TileNum = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
1935
1936 if (!SelectSMETile(BaseReg, TileNum))
1937 return;
1938
1939 SDValue SliceBase, Base, Offset;
1940 if (BaseReg == AArch64::ZA)
1941 SliceBase = N->getOperand(2);
1942 else
1943 SliceBase = N->getOperand(3);
1944
1945 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
1946 return;
1947
1948 SDLoc DL(N);
1949 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
1950 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
1951 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
1952
1953 EVT VT = N->getValueType(0);
1954 for (unsigned I = 0; I < NumVecs; ++I)
1955 ReplaceUses(SDValue(N, I),
1956 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
1957 SDValue(Mov, 0)));
1958 // Copy chain
1959 unsigned ChainIdx = NumVecs;
1960 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
1961 CurDAG->RemoveDeadNode(N);
1962}
1963
1964void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
1965 unsigned NumOutVecs,
1966 bool IsTupleInput,
1967 unsigned Opc) {
1968 SDLoc DL(N);
1969 EVT VT = N->getValueType(0);
1970 unsigned NumInVecs = N->getNumOperands() - 1;
1971
1973 if (IsTupleInput) {
1974 assert((NumInVecs == 2 || NumInVecs == 4) &&
1975 "Don't know how to handle multi-register input!");
1976 SmallVector<SDValue, 4> Regs(N->op_begin() + 1,
1977 N->op_begin() + 1 + NumInVecs);
1978 Ops.push_back(createZMulTuple(Regs));
1979 } else {
1980 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
1981 for (unsigned I = 0; I < NumInVecs; I++)
1982 Ops.push_back(N->getOperand(1 + I));
1983 }
1984
1985 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1986 SDValue SuperReg = SDValue(Res, 0);
1987
1988 for (unsigned I = 0; I < NumOutVecs; I++)
1989 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1990 AArch64::zsub0 + I, DL, VT, SuperReg));
1991 CurDAG->RemoveDeadNode(N);
1992}
1993
1994void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1995 unsigned Opc) {
1996 SDLoc dl(N);
1997 EVT VT = N->getOperand(2)->getValueType(0);
1998
1999 // Form a REG_SEQUENCE to force register allocation.
2000 bool Is128Bit = VT.getSizeInBits() == 128;
2001 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2002 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2003
2004 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2005 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2006
2007 // Transfer memoperands.
2008 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2009 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2010
2011 ReplaceNode(N, St);
2012}
2013
2014void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2015 unsigned Scale, unsigned Opc_rr,
2016 unsigned Opc_ri) {
2017 SDLoc dl(N);
2018
2019 // Form a REG_SEQUENCE to force register allocation.
2020 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2021 SDValue RegSeq = createZTuple(Regs);
2022
2023 // Optimize addressing mode.
2024 unsigned Opc;
2026 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2027 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2028 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2029
2030 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2031 Base, // address
2032 Offset, // offset
2033 N->getOperand(0)}; // chain
2034 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2035
2036 ReplaceNode(N, St);
2037}
2038
2039bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2040 SDValue &OffImm) {
2041 SDLoc dl(N);
2042 const DataLayout &DL = CurDAG->getDataLayout();
2043 const TargetLowering *TLI = getTargetLowering();
2044
2045 // Try to match it for the frame address
2046 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2047 int FI = FINode->getIndex();
2048 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2049 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2050 return true;
2051 }
2052
2053 return false;
2054}
2055
2056void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2057 unsigned Opc) {
2058 SDLoc dl(N);
2059 EVT VT = N->getOperand(2)->getValueType(0);
2060 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2061 MVT::Other}; // Type for the Chain
2062
2063 // Form a REG_SEQUENCE to force register allocation.
2064 bool Is128Bit = VT.getSizeInBits() == 128;
2065 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2066 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2067
2068 SDValue Ops[] = {RegSeq,
2069 N->getOperand(NumVecs + 1), // base register
2070 N->getOperand(NumVecs + 2), // Incremental
2071 N->getOperand(0)}; // Chain
2072 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2073
2074 ReplaceNode(N, St);
2075}
2076
2077namespace {
2078/// WidenVector - Given a value in the V64 register class, produce the
2079/// equivalent value in the V128 register class.
2080class WidenVector {
2081 SelectionDAG &DAG;
2082
2083public:
2084 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2085
2086 SDValue operator()(SDValue V64Reg) {
2087 EVT VT = V64Reg.getValueType();
2088 unsigned NarrowSize = VT.getVectorNumElements();
2089 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2090 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2091 SDLoc DL(V64Reg);
2092
2093 SDValue Undef =
2094 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2095 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2096 }
2097};
2098} // namespace
2099
2100/// NarrowVector - Given a value in the V128 register class, produce the
2101/// equivalent value in the V64 register class.
2103 EVT VT = V128Reg.getValueType();
2104 unsigned WideSize = VT.getVectorNumElements();
2105 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2106 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2107
2108 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2109 V128Reg);
2110}
2111
2112void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2113 unsigned Opc) {
2114 SDLoc dl(N);
2115 EVT VT = N->getValueType(0);
2116 bool Narrow = VT.getSizeInBits() == 64;
2117
2118 // Form a REG_SEQUENCE to force register allocation.
2119 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2120
2121 if (Narrow)
2122 transform(Regs, Regs.begin(),
2123 WidenVector(*CurDAG));
2124
2125 SDValue RegSeq = createQTuple(Regs);
2126
2127 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2128
2129 unsigned LaneNo =
2130 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
2131
2132 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2133 N->getOperand(NumVecs + 3), N->getOperand(0)};
2134 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2135 SDValue SuperReg = SDValue(Ld, 0);
2136
2137 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2138 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2139 AArch64::qsub2, AArch64::qsub3 };
2140 for (unsigned i = 0; i < NumVecs; ++i) {
2141 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2142 if (Narrow)
2143 NV = NarrowVector(NV, *CurDAG);
2144 ReplaceUses(SDValue(N, i), NV);
2145 }
2146
2147 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2148 CurDAG->RemoveDeadNode(N);
2149}
2150
2151void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2152 unsigned Opc) {
2153 SDLoc dl(N);
2154 EVT VT = N->getValueType(0);
2155 bool Narrow = VT.getSizeInBits() == 64;
2156
2157 // Form a REG_SEQUENCE to force register allocation.
2158 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2159
2160 if (Narrow)
2161 transform(Regs, Regs.begin(),
2162 WidenVector(*CurDAG));
2163
2164 SDValue RegSeq = createQTuple(Regs);
2165
2166 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2167 RegSeq->getValueType(0), MVT::Other};
2168
2169 unsigned LaneNo =
2170 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
2171
2172 SDValue Ops[] = {RegSeq,
2173 CurDAG->getTargetConstant(LaneNo, dl,
2174 MVT::i64), // Lane Number
2175 N->getOperand(NumVecs + 2), // Base register
2176 N->getOperand(NumVecs + 3), // Incremental
2177 N->getOperand(0)};
2178 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2179
2180 // Update uses of the write back register
2181 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2182
2183 // Update uses of the vector list
2184 SDValue SuperReg = SDValue(Ld, 1);
2185 if (NumVecs == 1) {
2186 ReplaceUses(SDValue(N, 0),
2187 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2188 } else {
2189 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2190 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2191 AArch64::qsub2, AArch64::qsub3 };
2192 for (unsigned i = 0; i < NumVecs; ++i) {
2193 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2194 SuperReg);
2195 if (Narrow)
2196 NV = NarrowVector(NV, *CurDAG);
2197 ReplaceUses(SDValue(N, i), NV);
2198 }
2199 }
2200
2201 // Update the Chain
2202 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2203 CurDAG->RemoveDeadNode(N);
2204}
2205
2206void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2207 unsigned Opc) {
2208 SDLoc dl(N);
2209 EVT VT = N->getOperand(2)->getValueType(0);
2210 bool Narrow = VT.getSizeInBits() == 64;
2211
2212 // Form a REG_SEQUENCE to force register allocation.
2213 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2214
2215 if (Narrow)
2216 transform(Regs, Regs.begin(),
2217 WidenVector(*CurDAG));
2218
2219 SDValue RegSeq = createQTuple(Regs);
2220
2221 unsigned LaneNo =
2222 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
2223
2224 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2225 N->getOperand(NumVecs + 3), N->getOperand(0)};
2226 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2227
2228 // Transfer memoperands.
2229 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2230 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2231
2232 ReplaceNode(N, St);
2233}
2234
2235void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2236 unsigned Opc) {
2237 SDLoc dl(N);
2238 EVT VT = N->getOperand(2)->getValueType(0);
2239 bool Narrow = VT.getSizeInBits() == 64;
2240
2241 // Form a REG_SEQUENCE to force register allocation.
2242 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2243
2244 if (Narrow)
2245 transform(Regs, Regs.begin(),
2246 WidenVector(*CurDAG));
2247
2248 SDValue RegSeq = createQTuple(Regs);
2249
2250 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2251 MVT::Other};
2252
2253 unsigned LaneNo =
2254 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
2255
2256 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2257 N->getOperand(NumVecs + 2), // Base Register
2258 N->getOperand(NumVecs + 3), // Incremental
2259 N->getOperand(0)};
2260 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2261
2262 // Transfer memoperands.
2263 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2264 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2265
2266 ReplaceNode(N, St);
2267}
2268
2270 unsigned &Opc, SDValue &Opd0,
2271 unsigned &LSB, unsigned &MSB,
2272 unsigned NumberOfIgnoredLowBits,
2273 bool BiggerPattern) {
2274 assert(N->getOpcode() == ISD::AND &&
2275 "N must be a AND operation to call this function");
2276
2277 EVT VT = N->getValueType(0);
2278
2279 // Here we can test the type of VT and return false when the type does not
2280 // match, but since it is done prior to that call in the current context
2281 // we turned that into an assert to avoid redundant code.
2282 assert((VT == MVT::i32 || VT == MVT::i64) &&
2283 "Type checking must have been done before calling this function");
2284
2285 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2286 // changed the AND node to a 32-bit mask operation. We'll have to
2287 // undo that as part of the transform here if we want to catch all
2288 // the opportunities.
2289 // Currently the NumberOfIgnoredLowBits argument helps to recover
2290 // from these situations when matching bigger pattern (bitfield insert).
2291
2292 // For unsigned extracts, check for a shift right and mask
2293 uint64_t AndImm = 0;
2294 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2295 return false;
2296
2297 const SDNode *Op0 = N->getOperand(0).getNode();
2298
2299 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2300 // simplified. Try to undo that
2301 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2302
2303 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2304 if (AndImm & (AndImm + 1))
2305 return false;
2306
2307 bool ClampMSB = false;
2308 uint64_t SrlImm = 0;
2309 // Handle the SRL + ANY_EXTEND case.
2310 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2311 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2312 // Extend the incoming operand of the SRL to 64-bit.
2313 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2314 // Make sure to clamp the MSB so that we preserve the semantics of the
2315 // original operations.
2316 ClampMSB = true;
2317 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2319 SrlImm)) {
2320 // If the shift result was truncated, we can still combine them.
2321 Opd0 = Op0->getOperand(0).getOperand(0);
2322
2323 // Use the type of SRL node.
2324 VT = Opd0->getValueType(0);
2325 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2326 Opd0 = Op0->getOperand(0);
2327 ClampMSB = (VT == MVT::i32);
2328 } else if (BiggerPattern) {
2329 // Let's pretend a 0 shift right has been performed.
2330 // The resulting code will be at least as good as the original one
2331 // plus it may expose more opportunities for bitfield insert pattern.
2332 // FIXME: Currently we limit this to the bigger pattern, because
2333 // some optimizations expect AND and not UBFM.
2334 Opd0 = N->getOperand(0);
2335 } else
2336 return false;
2337
2338 // Bail out on large immediates. This happens when no proper
2339 // combining/constant folding was performed.
2340 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2341 LLVM_DEBUG(
2342 (dbgs() << N
2343 << ": Found large shift immediate, this should not happen\n"));
2344 return false;
2345 }
2346
2347 LSB = SrlImm;
2348 MSB = SrlImm +
2349 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2350 : llvm::countr_one<uint64_t>(AndImm)) -
2351 1;
2352 if (ClampMSB)
2353 // Since we're moving the extend before the right shift operation, we need
2354 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2355 // the zeros which would get shifted in with the original right shift
2356 // operation.
2357 MSB = MSB > 31 ? 31 : MSB;
2358
2359 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2360 return true;
2361}
2362
2363static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2364 SDValue &Opd0, unsigned &Immr,
2365 unsigned &Imms) {
2366 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2367
2368 EVT VT = N->getValueType(0);
2369 unsigned BitWidth = VT.getSizeInBits();
2370 assert((VT == MVT::i32 || VT == MVT::i64) &&
2371 "Type checking must have been done before calling this function");
2372
2373 SDValue Op = N->getOperand(0);
2374 if (Op->getOpcode() == ISD::TRUNCATE) {
2375 Op = Op->getOperand(0);
2376 VT = Op->getValueType(0);
2377 BitWidth = VT.getSizeInBits();
2378 }
2379
2380 uint64_t ShiftImm;
2381 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2382 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2383 return false;
2384
2385 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2386 if (ShiftImm + Width > BitWidth)
2387 return false;
2388
2389 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2390 Opd0 = Op.getOperand(0);
2391 Immr = ShiftImm;
2392 Imms = ShiftImm + Width - 1;
2393 return true;
2394}
2395
2396static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2397 SDValue &Opd0, unsigned &LSB,
2398 unsigned &MSB) {
2399 // We are looking for the following pattern which basically extracts several
2400 // continuous bits from the source value and places it from the LSB of the
2401 // destination value, all other bits of the destination value or set to zero:
2402 //
2403 // Value2 = AND Value, MaskImm
2404 // SRL Value2, ShiftImm
2405 //
2406 // with MaskImm >> ShiftImm to search for the bit width.
2407 //
2408 // This gets selected into a single UBFM:
2409 //
2410 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2411 //
2412
2413 if (N->getOpcode() != ISD::SRL)
2414 return false;
2415
2416 uint64_t AndMask = 0;
2417 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2418 return false;
2419
2420 Opd0 = N->getOperand(0).getOperand(0);
2421
2422 uint64_t SrlImm = 0;
2423 if (!isIntImmediate(N->getOperand(1), SrlImm))
2424 return false;
2425
2426 // Check whether we really have several bits extract here.
2427 if (!isMask_64(AndMask >> SrlImm))
2428 return false;
2429
2430 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2431 LSB = SrlImm;
2432 MSB = llvm::Log2_64(AndMask);
2433 return true;
2434}
2435
2436static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2437 unsigned &Immr, unsigned &Imms,
2438 bool BiggerPattern) {
2439 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2440 "N must be a SHR/SRA operation to call this function");
2441
2442 EVT VT = N->getValueType(0);
2443
2444 // Here we can test the type of VT and return false when the type does not
2445 // match, but since it is done prior to that call in the current context
2446 // we turned that into an assert to avoid redundant code.
2447 assert((VT == MVT::i32 || VT == MVT::i64) &&
2448 "Type checking must have been done before calling this function");
2449
2450 // Check for AND + SRL doing several bits extract.
2451 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2452 return true;
2453
2454 // We're looking for a shift of a shift.
2455 uint64_t ShlImm = 0;
2456 uint64_t TruncBits = 0;
2457 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2458 Opd0 = N->getOperand(0).getOperand(0);
2459 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2460 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2461 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2462 // be considered as setting high 32 bits as zero. Our strategy here is to
2463 // always generate 64bit UBFM. This consistency will help the CSE pass
2464 // later find more redundancy.
2465 Opd0 = N->getOperand(0).getOperand(0);
2466 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2467 VT = Opd0.getValueType();
2468 assert(VT == MVT::i64 && "the promoted type should be i64");
2469 } else if (BiggerPattern) {
2470 // Let's pretend a 0 shift left has been performed.
2471 // FIXME: Currently we limit this to the bigger pattern case,
2472 // because some optimizations expect AND and not UBFM
2473 Opd0 = N->getOperand(0);
2474 } else
2475 return false;
2476
2477 // Missing combines/constant folding may have left us with strange
2478 // constants.
2479 if (ShlImm >= VT.getSizeInBits()) {
2480 LLVM_DEBUG(
2481 (dbgs() << N
2482 << ": Found large shift immediate, this should not happen\n"));
2483 return false;
2484 }
2485
2486 uint64_t SrlImm = 0;
2487 if (!isIntImmediate(N->getOperand(1), SrlImm))
2488 return false;
2489
2490 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2491 "bad amount in shift node!");
2492 int immr = SrlImm - ShlImm;
2493 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2494 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2495 // SRA requires a signed extraction
2496 if (VT == MVT::i32)
2497 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2498 else
2499 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2500 return true;
2501}
2502
2503bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2504 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2505
2506 EVT VT = N->getValueType(0);
2507 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2508 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2509 return false;
2510
2511 uint64_t ShiftImm;
2512 SDValue Op = N->getOperand(0);
2513 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2514 return false;
2515
2516 SDLoc dl(N);
2517 // Extend the incoming operand of the shift to 64-bits.
2518 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2519 unsigned Immr = ShiftImm;
2520 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2521 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2522 CurDAG->getTargetConstant(Imms, dl, VT)};
2523 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2524 return true;
2525}
2526
2527/// Try to form fcvtl2 instructions from a floating-point extend of a high-half
2528/// extract of a subvector.
2529bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) {
2530 assert(N->getOpcode() == ISD::FP_EXTEND);
2531
2532 // There are 2 forms of fcvtl2 - extend to double or extend to float.
2533 SDValue Extract = N->getOperand(0);
2534 EVT VT = N->getValueType(0);
2535 EVT NarrowVT = Extract.getValueType();
2536 if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) &&
2537 (VT != MVT::v4f32 || NarrowVT != MVT::v4f16))
2538 return false;
2539
2540 // Optionally look past a bitcast.
2541 Extract = peekThroughBitcasts(Extract);
2542 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2543 return false;
2544
2545 // Match extract from start of high half index.
2546 // Example: v8i16 -> v4i16 means the extract must begin at index 4.
2547 unsigned ExtractIndex = Extract.getConstantOperandVal(1);
2548 if (ExtractIndex != Extract.getValueType().getVectorNumElements())
2549 return false;
2550
2551 auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16;
2552 CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0));
2553 return true;
2554}
2555
2556static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2557 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2558 unsigned NumberOfIgnoredLowBits = 0,
2559 bool BiggerPattern = false) {
2560 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2561 return false;
2562
2563 switch (N->getOpcode()) {
2564 default:
2565 if (!N->isMachineOpcode())
2566 return false;
2567 break;
2568 case ISD::AND:
2569 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2570 NumberOfIgnoredLowBits, BiggerPattern);
2571 case ISD::SRL:
2572 case ISD::SRA:
2573 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2574
2576 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2577 }
2578
2579 unsigned NOpc = N->getMachineOpcode();
2580 switch (NOpc) {
2581 default:
2582 return false;
2583 case AArch64::SBFMWri:
2584 case AArch64::UBFMWri:
2585 case AArch64::SBFMXri:
2586 case AArch64::UBFMXri:
2587 Opc = NOpc;
2588 Opd0 = N->getOperand(0);
2589 Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
2590 Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
2591 return true;
2592 }
2593 // Unreachable
2594 return false;
2595}
2596
2597bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2598 unsigned Opc, Immr, Imms;
2599 SDValue Opd0;
2600 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2601 return false;
2602
2603 EVT VT = N->getValueType(0);
2604 SDLoc dl(N);
2605
2606 // If the bit extract operation is 64bit but the original type is 32bit, we
2607 // need to add one EXTRACT_SUBREG.
2608 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2609 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2610 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2611
2612 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2613 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2614 MVT::i32, SDValue(BFM, 0));
2615 ReplaceNode(N, Inner.getNode());
2616 return true;
2617 }
2618
2619 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2620 CurDAG->getTargetConstant(Imms, dl, VT)};
2621 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2622 return true;
2623}
2624
2625/// Does DstMask form a complementary pair with the mask provided by
2626/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2627/// this asks whether DstMask zeroes precisely those bits that will be set by
2628/// the other half.
2629static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2630 unsigned NumberOfIgnoredHighBits, EVT VT) {
2631 assert((VT == MVT::i32 || VT == MVT::i64) &&
2632 "i32 or i64 mask type expected!");
2633 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2634
2635 APInt SignificantDstMask = APInt(BitWidth, DstMask);
2636 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2637
2638 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2639 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2640}
2641
2642// Look for bits that will be useful for later uses.
2643// A bit is consider useless as soon as it is dropped and never used
2644// before it as been dropped.
2645// E.g., looking for useful bit of x
2646// 1. y = x & 0x7
2647// 2. z = y >> 2
2648// After #1, x useful bits are 0x7, then the useful bits of x, live through
2649// y.
2650// After #2, the useful bits of x are 0x4.
2651// However, if x is used on an unpredicatable instruction, then all its bits
2652// are useful.
2653// E.g.
2654// 1. y = x & 0x7
2655// 2. z = y >> 2
2656// 3. str x, [@x]
2657static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2658
2660 unsigned Depth) {
2661 uint64_t Imm =
2662 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2663 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2664 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2665 getUsefulBits(Op, UsefulBits, Depth + 1);
2666}
2667
2669 uint64_t Imm, uint64_t MSB,
2670 unsigned Depth) {
2671 // inherit the bitwidth value
2672 APInt OpUsefulBits(UsefulBits);
2673 OpUsefulBits = 1;
2674
2675 if (MSB >= Imm) {
2676 OpUsefulBits <<= MSB - Imm + 1;
2677 --OpUsefulBits;
2678 // The interesting part will be in the lower part of the result
2679 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2680 // The interesting part was starting at Imm in the argument
2681 OpUsefulBits <<= Imm;
2682 } else {
2683 OpUsefulBits <<= MSB + 1;
2684 --OpUsefulBits;
2685 // The interesting part will be shifted in the result
2686 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2687 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2688 // The interesting part was at zero in the argument
2689 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2690 }
2691
2692 UsefulBits &= OpUsefulBits;
2693}
2694
2695static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2696 unsigned Depth) {
2697 uint64_t Imm =
2698 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2699 uint64_t MSB =
2700 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2701
2702 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2703}
2704
2706 unsigned Depth) {
2707 uint64_t ShiftTypeAndValue =
2708 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2709 APInt Mask(UsefulBits);
2710 Mask.clearAllBits();
2711 Mask.flipAllBits();
2712
2713 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2714 // Shift Left
2715 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2716 Mask <<= ShiftAmt;
2717 getUsefulBits(Op, Mask, Depth + 1);
2718 Mask.lshrInPlace(ShiftAmt);
2719 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2720 // Shift Right
2721 // We do not handle AArch64_AM::ASR, because the sign will change the
2722 // number of useful bits
2723 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2724 Mask.lshrInPlace(ShiftAmt);
2725 getUsefulBits(Op, Mask, Depth + 1);
2726 Mask <<= ShiftAmt;
2727 } else
2728 return;
2729
2730 UsefulBits &= Mask;
2731}
2732
2733static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2734 unsigned Depth) {
2735 uint64_t Imm =
2736 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2737 uint64_t MSB =
2738 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2739
2740 APInt OpUsefulBits(UsefulBits);
2741 OpUsefulBits = 1;
2742
2743 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2744 ResultUsefulBits.flipAllBits();
2745 APInt Mask(UsefulBits.getBitWidth(), 0);
2746
2747 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2748
2749 if (MSB >= Imm) {
2750 // The instruction is a BFXIL.
2751 uint64_t Width = MSB - Imm + 1;
2752 uint64_t LSB = Imm;
2753
2754 OpUsefulBits <<= Width;
2755 --OpUsefulBits;
2756
2757 if (Op.getOperand(1) == Orig) {
2758 // Copy the low bits from the result to bits starting from LSB.
2759 Mask = ResultUsefulBits & OpUsefulBits;
2760 Mask <<= LSB;
2761 }
2762
2763 if (Op.getOperand(0) == Orig)
2764 // Bits starting from LSB in the input contribute to the result.
2765 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2766 } else {
2767 // The instruction is a BFI.
2768 uint64_t Width = MSB + 1;
2769 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2770
2771 OpUsefulBits <<= Width;
2772 --OpUsefulBits;
2773 OpUsefulBits <<= LSB;
2774
2775 if (Op.getOperand(1) == Orig) {
2776 // Copy the bits from the result to the zero bits.
2777 Mask = ResultUsefulBits & OpUsefulBits;
2778 Mask.lshrInPlace(LSB);
2779 }
2780
2781 if (Op.getOperand(0) == Orig)
2782 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2783 }
2784
2785 UsefulBits &= Mask;
2786}
2787
2788static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2789 SDValue Orig, unsigned Depth) {
2790
2791 // Users of this node should have already been instruction selected
2792 // FIXME: Can we turn that into an assert?
2793 if (!UserNode->isMachineOpcode())
2794 return;
2795
2796 switch (UserNode->getMachineOpcode()) {
2797 default:
2798 return;
2799 case AArch64::ANDSWri:
2800 case AArch64::ANDSXri:
2801 case AArch64::ANDWri:
2802 case AArch64::ANDXri:
2803 // We increment Depth only when we call the getUsefulBits
2804 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2805 Depth);
2806 case AArch64::UBFMWri:
2807 case AArch64::UBFMXri:
2808 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2809
2810 case AArch64::ORRWrs:
2811 case AArch64::ORRXrs:
2812 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
2813 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2814 Depth);
2815 return;
2816 case AArch64::BFMWri:
2817 case AArch64::BFMXri:
2818 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2819
2820 case AArch64::STRBBui:
2821 case AArch64::STURBBi:
2822 if (UserNode->getOperand(0) != Orig)
2823 return;
2824 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2825 return;
2826
2827 case AArch64::STRHHui:
2828 case AArch64::STURHHi:
2829 if (UserNode->getOperand(0) != Orig)
2830 return;
2831 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2832 return;
2833 }
2834}
2835
2836static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2838 return;
2839 // Initialize UsefulBits
2840 if (!Depth) {
2841 unsigned Bitwidth = Op.getScalarValueSizeInBits();
2842 // At the beginning, assume every produced bits is useful
2843 UsefulBits = APInt(Bitwidth, 0);
2844 UsefulBits.flipAllBits();
2845 }
2846 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2847
2848 for (SDNode *Node : Op.getNode()->uses()) {
2849 // A use cannot produce useful bits
2850 APInt UsefulBitsForUse = APInt(UsefulBits);
2851 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2852 UsersUsefulBits |= UsefulBitsForUse;
2853 }
2854 // UsefulBits contains the produced bits that are meaningful for the
2855 // current definition, thus a user cannot make a bit meaningful at
2856 // this point
2857 UsefulBits &= UsersUsefulBits;
2858}
2859
2860/// Create a machine node performing a notional SHL of Op by ShlAmount. If
2861/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2862/// 0, return Op unchanged.
2863static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2864 if (ShlAmount == 0)
2865 return Op;
2866
2867 EVT VT = Op.getValueType();
2868 SDLoc dl(Op);
2869 unsigned BitWidth = VT.getSizeInBits();
2870 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2871
2872 SDNode *ShiftNode;
2873 if (ShlAmount > 0) {
2874 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2875 ShiftNode = CurDAG->getMachineNode(
2876 UBFMOpc, dl, VT, Op,
2877 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2878 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2879 } else {
2880 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2881 assert(ShlAmount < 0 && "expected right shift");
2882 int ShrAmount = -ShlAmount;
2883 ShiftNode = CurDAG->getMachineNode(
2884 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2885 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2886 }
2887
2888 return SDValue(ShiftNode, 0);
2889}
2890
2891// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
2893 bool BiggerPattern,
2894 const uint64_t NonZeroBits,
2895 SDValue &Src, int &DstLSB,
2896 int &Width);
2897
2898// For bit-field-positioning pattern "shl VAL, N)".
2900 bool BiggerPattern,
2901 const uint64_t NonZeroBits,
2902 SDValue &Src, int &DstLSB,
2903 int &Width);
2904
2905/// Does this tree qualify as an attempt to move a bitfield into position,
2906/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
2908 bool BiggerPattern, SDValue &Src,
2909 int &DstLSB, int &Width) {
2910 EVT VT = Op.getValueType();
2911 unsigned BitWidth = VT.getSizeInBits();
2912 (void)BitWidth;
2913 assert(BitWidth == 32 || BitWidth == 64);
2914
2915 KnownBits Known = CurDAG->computeKnownBits(Op);
2916
2917 // Non-zero in the sense that they're not provably zero, which is the key
2918 // point if we want to use this value
2919 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2920 if (!isShiftedMask_64(NonZeroBits))
2921 return false;
2922
2923 switch (Op.getOpcode()) {
2924 default:
2925 break;
2926 case ISD::AND:
2927 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
2928 NonZeroBits, Src, DstLSB, Width);
2929 case ISD::SHL:
2930 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
2931 NonZeroBits, Src, DstLSB, Width);
2932 }
2933
2934 return false;
2935}
2936
2938 bool BiggerPattern,
2939 const uint64_t NonZeroBits,
2940 SDValue &Src, int &DstLSB,
2941 int &Width) {
2942 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
2943
2944 EVT VT = Op.getValueType();
2945 assert((VT == MVT::i32 || VT == MVT::i64) &&
2946 "Caller guarantees VT is one of i32 or i64");
2947 (void)VT;
2948
2949 uint64_t AndImm;
2950 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
2951 return false;
2952
2953 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
2954 // 1) (AndImm & (1 << POS) == 0)
2955 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
2956 //
2957 // 1) and 2) don't agree so something must be wrong (e.g., in
2958 // 'SelectionDAG::computeKnownBits')
2959 assert((~AndImm & NonZeroBits) == 0 &&
2960 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
2961
2962 SDValue AndOp0 = Op.getOperand(0);
2963
2964 uint64_t ShlImm;
2965 SDValue ShlOp0;
2966 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
2967 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
2968 ShlOp0 = AndOp0.getOperand(0);
2969 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
2971 ShlImm)) {
2972 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
2973
2974 // ShlVal == shl(val, N), which is a left shift on a smaller type.
2975 SDValue ShlVal = AndOp0.getOperand(0);
2976
2977 // Since this is after type legalization and ShlVal is extended to MVT::i64,
2978 // expect VT to be MVT::i32.
2979 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
2980
2981 // Widens 'val' to MVT::i64 as the source of bit field positioning.
2982 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
2983 } else
2984 return false;
2985
2986 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
2987 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
2988 // AndOp0+AND.
2989 if (!BiggerPattern && !AndOp0.hasOneUse())
2990 return false;
2991
2992 DstLSB = llvm::countr_zero(NonZeroBits);
2993 Width = llvm::countr_one(NonZeroBits >> DstLSB);
2994
2995 // Bail out on large Width. This happens when no proper combining / constant
2996 // folding was performed.
2997 if (Width >= (int)VT.getSizeInBits()) {
2998 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
2999 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3000 // "val".
3001 // If VT is i32, what Width >= 32 means:
3002 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3003 // demands at least 'Width' bits (after dag-combiner). This together with
3004 // `any_extend` Op (undefined higher bits) indicates missed combination
3005 // when lowering the 'and' IR instruction to an machine IR instruction.
3006 LLVM_DEBUG(
3007 dbgs()
3008 << "Found large Width in bit-field-positioning -- this indicates no "
3009 "proper combining / constant folding was performed\n");
3010 return false;
3011 }
3012
3013 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3014 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3015 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3016 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3017 // which case it is not profitable to insert an extra shift.
3018 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3019 return false;
3020
3021 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3022 return true;
3023}
3024
3025// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3026// UBFIZ.
3028 SDValue &Src, int &DstLSB,
3029 int &Width) {
3030 // Caller should have verified that N is a left shift with constant shift
3031 // amount; asserts that.
3032 assert(Op.getOpcode() == ISD::SHL &&
3033 "Op.getNode() should be a SHL node to call this function");
3034 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3035 "Op.getNode() should shift ShlImm to call this function");
3036
3037 uint64_t AndImm = 0;
3038 SDValue Op0 = Op.getOperand(0);
3039 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3040 return false;
3041
3042 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3043 if (isMask_64(ShiftedAndImm)) {
3044 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3045 // should end with Mask, and could be prefixed with random bits if those
3046 // bits are shifted out.
3047 //
3048 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3049 // the AND result corresponding to those bits are shifted out, so it's fine
3050 // to not extract them.
3051 Width = llvm::countr_one(ShiftedAndImm);
3052 DstLSB = ShlImm;
3053 Src = Op0.getOperand(0);
3054 return true;
3055 }
3056 return false;
3057}
3058
3060 bool BiggerPattern,
3061 const uint64_t NonZeroBits,
3062 SDValue &Src, int &DstLSB,
3063 int &Width) {
3064 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3065
3066 EVT VT = Op.getValueType();
3067 assert((VT == MVT::i32 || VT == MVT::i64) &&
3068 "Caller guarantees that type is i32 or i64");
3069 (void)VT;
3070
3071 uint64_t ShlImm;
3072 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3073 return false;
3074
3075 if (!BiggerPattern && !Op.hasOneUse())
3076 return false;
3077
3078 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3079 return true;
3080
3081 DstLSB = llvm::countr_zero(NonZeroBits);
3082 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3083
3084 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3085 return false;
3086
3087 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3088 return true;
3089}
3090
3091static bool isShiftedMask(uint64_t Mask, EVT VT) {
3092 assert(VT == MVT::i32 || VT == MVT::i64);
3093 if (VT == MVT::i32)
3094 return isShiftedMask_32(Mask);
3095 return isShiftedMask_64(Mask);
3096}
3097
3098// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3099// inserted only sets known zero bits.
3101 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3102
3103 EVT VT = N->getValueType(0);
3104 if (VT != MVT::i32 && VT != MVT::i64)
3105 return false;
3106
3107 unsigned BitWidth = VT.getSizeInBits();
3108
3109 uint64_t OrImm;
3110 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3111 return false;
3112
3113 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3114 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3115 // performance neutral.
3117 return false;
3118
3119 uint64_t MaskImm;
3120 SDValue And = N->getOperand(0);
3121 // Must be a single use AND with an immediate operand.
3122 if (!And.hasOneUse() ||
3123 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3124 return false;
3125
3126 // Compute the Known Zero for the AND as this allows us to catch more general
3127 // cases than just looking for AND with imm.
3128 KnownBits Known = CurDAG->computeKnownBits(And);
3129
3130 // Non-zero in the sense that they're not provably zero, which is the key
3131 // point if we want to use this value.
3132 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3133
3134 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3135 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3136 return false;
3137
3138 // The bits being inserted must only set those bits that are known to be zero.
3139 if ((OrImm & NotKnownZero) != 0) {
3140 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3141 // currently handle this case.
3142 return false;
3143 }
3144
3145 // BFI/BFXIL dst, src, #lsb, #width.
3146 int LSB = llvm::countr_one(NotKnownZero);
3147 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3148
3149 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3150 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3151 unsigned ImmS = Width - 1;
3152
3153 // If we're creating a BFI instruction avoid cases where we need more
3154 // instructions to materialize the BFI constant as compared to the original
3155 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3156 // should be no worse in this case.
3157 bool IsBFI = LSB != 0;
3158 uint64_t BFIImm = OrImm >> LSB;
3159 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3160 // We have a BFI instruction and we know the constant can't be materialized
3161 // with a ORR-immediate with the zero register.
3162 unsigned OrChunks = 0, BFIChunks = 0;
3163 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3164 if (((OrImm >> Shift) & 0xFFFF) != 0)
3165 ++OrChunks;
3166 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3167 ++BFIChunks;
3168 }
3169 if (BFIChunks > OrChunks)
3170 return false;
3171 }
3172
3173 // Materialize the constant to be inserted.
3174 SDLoc DL(N);
3175 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3176 SDNode *MOVI = CurDAG->getMachineNode(
3177 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3178
3179 // Create the BFI/BFXIL instruction.
3180 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3181 CurDAG->getTargetConstant(ImmR, DL, VT),
3182 CurDAG->getTargetConstant(ImmS, DL, VT)};
3183 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3184 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3185 return true;
3186}
3187
3189 SDValue &ShiftedOperand,
3190 uint64_t &EncodedShiftImm) {
3191 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3192 if (!Dst.hasOneUse())
3193 return false;
3194
3195 EVT VT = Dst.getValueType();
3196 assert((VT == MVT::i32 || VT == MVT::i64) &&
3197 "Caller should guarantee that VT is one of i32 or i64");
3198 const unsigned SizeInBits = VT.getSizeInBits();
3199
3200 SDLoc DL(Dst.getNode());
3201 uint64_t AndImm, ShlImm;
3202 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3203 isShiftedMask_64(AndImm)) {
3204 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3205 SDValue DstOp0 = Dst.getOperand(0);
3206 if (!DstOp0.hasOneUse())
3207 return false;
3208
3209 // An example to illustrate the transformation
3210 // From:
3211 // lsr x8, x1, #1
3212 // and x8, x8, #0x3f80
3213 // bfxil x8, x1, #0, #7
3214 // To:
3215 // and x8, x23, #0x7f
3216 // ubfx x9, x23, #8, #7
3217 // orr x23, x8, x9, lsl #7
3218 //
3219 // The number of instructions remains the same, but ORR is faster than BFXIL
3220 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3221 // the dependency chain is improved after the transformation.
3222 uint64_t SrlImm;
3223 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3224 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3225 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3226 unsigned MaskWidth =
3227 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3228 unsigned UBFMOpc =
3229 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3230 SDNode *UBFMNode = CurDAG->getMachineNode(
3231 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3232 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3233 VT),
3234 CurDAG->getTargetConstant(
3235 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3236 ShiftedOperand = SDValue(UBFMNode, 0);
3237 EncodedShiftImm = AArch64_AM::getShifterImm(
3238 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3239 return true;
3240 }
3241 }
3242 return false;
3243 }
3244
3245 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3246 ShiftedOperand = Dst.getOperand(0);
3247 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3248 return true;
3249 }
3250
3251 uint64_t SrlImm;
3252 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3253 ShiftedOperand = Dst.getOperand(0);
3254 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3255 return true;
3256 }
3257 return false;
3258}
3259
3260// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3261// the operands and select it to AArch64::ORR with shifted registers if
3262// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3263static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3264 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3265 const bool BiggerPattern) {
3266 EVT VT = N->getValueType(0);
3267 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3268 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3269 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3270 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3271 assert((VT == MVT::i32 || VT == MVT::i64) &&
3272 "Expect result type to be i32 or i64 since N is combinable to BFM");
3273 SDLoc DL(N);
3274
3275 // Bail out if BFM simplifies away one node in BFM Dst.
3276 if (OrOpd1 != Dst)
3277 return false;
3278
3279 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3280 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3281 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3282 if (BiggerPattern) {
3283 uint64_t SrcAndImm;
3284 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3285 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3286 // OrOpd0 = AND Src, #Mask
3287 // So BFM simplifies away one AND node from Src and doesn't simplify away
3288 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3289 // one node (from Rd), ORR is better since it has higher throughput and
3290 // smaller latency than BFM on many AArch64 processors (and for the rest
3291 // ORR is at least as good as BFM).
3292 SDValue ShiftedOperand;
3293 uint64_t EncodedShiftImm;
3294 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3295 EncodedShiftImm)) {
3296 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3297 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3298 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3299 return true;
3300 }
3301 }
3302 return false;
3303 }
3304
3305 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3306
3307 uint64_t ShlImm;
3308 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3309 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3310 SDValue Ops[] = {
3311 Dst, Src,
3312 CurDAG->getTargetConstant(
3314 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3315 return true;
3316 }
3317
3318 // Select the following pattern to left-shifted operand rather than BFI.
3319 // %val1 = op ..
3320 // %val2 = shl %val1, #imm
3321 // %res = or %val1, %val2
3322 //
3323 // If N is selected to be BFI, we know that
3324 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3325 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3326 //
3327 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3328 if (OrOpd0.getOperand(0) == OrOpd1) {
3329 SDValue Ops[] = {
3330 OrOpd1, OrOpd1,
3331 CurDAG->getTargetConstant(
3333 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3334 return true;
3335 }
3336 }
3337
3338 uint64_t SrlImm;
3339 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3340 // Select the following pattern to right-shifted operand rather than BFXIL.
3341 // %val1 = op ..
3342 // %val2 = lshr %val1, #imm
3343 // %res = or %val1, %val2
3344 //
3345 // If N is selected to be BFXIL, we know that
3346 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3347 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3348 //
3349 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3350 if (OrOpd0.getOperand(0) == OrOpd1) {
3351 SDValue Ops[] = {
3352 OrOpd1, OrOpd1,
3353 CurDAG->getTargetConstant(
3355 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3356 return true;
3357 }
3358 }
3359
3360 return false;
3361}
3362
3363static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3364 SelectionDAG *CurDAG) {
3365 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3366
3367 EVT VT = N->getValueType(0);
3368 if (VT != MVT::i32 && VT != MVT::i64)
3369 return false;
3370
3371 unsigned BitWidth = VT.getSizeInBits();
3372
3373 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3374 // have the expected shape. Try to undo that.
3375
3376 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3377 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3378
3379 // Given a OR operation, check if we have the following pattern
3380 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3381 // isBitfieldExtractOp)
3382 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3383 // countTrailingZeros(mask2) == imm2 - imm + 1
3384 // f = d | c
3385 // if yes, replace the OR instruction with:
3386 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3387
3388 // OR is commutative, check all combinations of operand order and values of
3389 // BiggerPattern, i.e.
3390 // Opd0, Opd1, BiggerPattern=false
3391 // Opd1, Opd0, BiggerPattern=false
3392 // Opd0, Opd1, BiggerPattern=true
3393 // Opd1, Opd0, BiggerPattern=true
3394 // Several of these combinations may match, so check with BiggerPattern=false
3395 // first since that will produce better results by matching more instructions
3396 // and/or inserting fewer extra instructions.
3397 for (int I = 0; I < 4; ++I) {
3398
3399 SDValue Dst, Src;
3400 unsigned ImmR, ImmS;
3401 bool BiggerPattern = I / 2;
3402 SDValue OrOpd0Val = N->getOperand(I % 2);
3403 SDNode *OrOpd0 = OrOpd0Val.getNode();
3404 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3405 SDNode *OrOpd1 = OrOpd1Val.getNode();
3406
3407 unsigned BFXOpc;
3408 int DstLSB, Width;
3409 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3410 NumberOfIgnoredLowBits, BiggerPattern)) {
3411 // Check that the returned opcode is compatible with the pattern,
3412 // i.e., same type and zero extended (U and not S)
3413 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3414 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3415 continue;
3416
3417 // Compute the width of the bitfield insertion
3418 DstLSB = 0;
3419 Width = ImmS - ImmR + 1;
3420 // FIXME: This constraint is to catch bitfield insertion we may
3421 // want to widen the pattern if we want to grab general bitfied
3422 // move case
3423 if (Width <= 0)
3424 continue;
3425
3426 // If the mask on the insertee is correct, we have a BFXIL operation. We
3427 // can share the ImmR and ImmS values from the already-computed UBFM.
3428 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3429 BiggerPattern,
3430 Src, DstLSB, Width)) {
3431 ImmR = (BitWidth - DstLSB) % BitWidth;
3432 ImmS = Width - 1;
3433 } else
3434 continue;
3435
3436 // Check the second part of the pattern
3437 EVT VT = OrOpd1Val.getValueType();
3438 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3439
3440 // Compute the Known Zero for the candidate of the first operand.
3441 // This allows to catch more general case than just looking for
3442 // AND with imm. Indeed, simplify-demanded-bits may have removed
3443 // the AND instruction because it proves it was useless.
3444 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3445
3446 // Check if there is enough room for the second operand to appear
3447 // in the first one
3448 APInt BitsToBeInserted =
3449 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3450
3451 if ((BitsToBeInserted & ~Known.Zero) != 0)
3452 continue;
3453
3454 // Set the first operand
3455 uint64_t Imm;
3456 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3457 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3458 // In that case, we can eliminate the AND
3459 Dst = OrOpd1->getOperand(0);
3460 else
3461 // Maybe the AND has been removed by simplify-demanded-bits
3462 // or is useful because it discards more bits
3463 Dst = OrOpd1Val;
3464
3465 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3466 // with shifted operand is more efficient.
3467 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3468 BiggerPattern))
3469 return true;
3470
3471 // both parts match
3472 SDLoc DL(N);
3473 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3474 CurDAG->getTargetConstant(ImmS, DL, VT)};
3475 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3476 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3477 return true;
3478 }
3479
3480 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3481 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3482 // mask (e.g., 0x000ffff0).
3483 uint64_t Mask0Imm, Mask1Imm;
3484 SDValue And0 = N->getOperand(0);
3485 SDValue And1 = N->getOperand(1);
3486 if (And0.hasOneUse() && And1.hasOneUse() &&
3487 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3488 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3489 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3490 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3491
3492 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3493 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3494 // bits to be inserted.
3495 if (isShiftedMask(Mask0Imm, VT)) {
3496 std::swap(And0, And1);
3497 std::swap(Mask0Imm, Mask1Imm);
3498 }
3499
3500 SDValue Src = And1->getOperand(0);
3501 SDValue Dst = And0->getOperand(0);
3502 unsigned LSB = llvm::countr_zero(Mask1Imm);
3503 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3504
3505 // The BFXIL inserts the low-order bits from a source register, so right
3506 // shift the needed bits into place.
3507 SDLoc DL(N);
3508 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3509 uint64_t LsrImm = LSB;
3510 if (Src->hasOneUse() &&
3511 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3512 (LsrImm + LSB) < BitWidth) {
3513 Src = Src->getOperand(0);
3514 LsrImm += LSB;
3515 }
3516
3517 SDNode *LSR = CurDAG->getMachineNode(
3518 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3519 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3520
3521 // BFXIL is an alias of BFM, so translate to BFM operands.
3522 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3523 unsigned ImmS = Width - 1;
3524
3525 // Create the BFXIL instruction.
3526 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3527 CurDAG->getTargetConstant(ImmR, DL, VT),
3528 CurDAG->getTargetConstant(ImmS, DL, VT)};
3529 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3530 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3531 return true;
3532 }
3533
3534 return false;
3535}
3536
3537bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3538 if (N->getOpcode() != ISD::OR)
3539 return false;
3540
3541 APInt NUsefulBits;
3542 getUsefulBits(SDValue(N, 0), NUsefulBits);
3543
3544 // If all bits are not useful, just return UNDEF.
3545 if (!NUsefulBits) {
3546 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3547 return true;
3548 }
3549
3550 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3551 return true;
3552
3553 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3554}
3555
3556/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3557/// equivalent of a left shift by a constant amount followed by an and masking
3558/// out a contiguous set of bits.
3559bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3560 if (N->getOpcode() != ISD::AND)
3561 return false;
3562
3563 EVT VT = N->getValueType(0);
3564 if (VT != MVT::i32 && VT != MVT::i64)
3565 return false;
3566
3567 SDValue Op0;
3568 int DstLSB, Width;
3569 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3570 Op0, DstLSB, Width))
3571 return false;
3572
3573 // ImmR is the rotate right amount.
3574 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3575 // ImmS is the most significant bit of the source to be moved.
3576 unsigned ImmS = Width - 1;
3577
3578 SDLoc DL(N);
3579 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3580 CurDAG->getTargetConstant(ImmS, DL, VT)};
3581 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3582 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3583 return true;
3584}
3585
3586/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3587/// variable shift/rotate instructions.
3588bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3589 EVT VT = N->getValueType(0);
3590
3591 unsigned Opc;
3592 switch (N->getOpcode()) {
3593 case ISD::ROTR:
3594 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3595 break;
3596 case ISD::SHL:
3597 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3598 break;
3599 case ISD::SRL:
3600 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3601 break;
3602 case ISD::SRA:
3603 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3604 break;
3605 default:
3606 return false;
3607 }
3608
3609 uint64_t Size;
3610 uint64_t Bits;
3611 if (VT == MVT::i32) {
3612 Bits = 5;
3613 Size = 32;
3614 } else if (VT == MVT::i64) {
3615 Bits = 6;
3616 Size = 64;
3617 } else
3618 return false;
3619
3620 SDValue ShiftAmt = N->getOperand(1);
3621 SDLoc DL(N);
3622 SDValue NewShiftAmt;
3623
3624 // Skip over an extend of the shift amount.
3625 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3626 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3627 ShiftAmt = ShiftAmt->getOperand(0);
3628
3629 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3630 SDValue Add0 = ShiftAmt->getOperand(0);
3631 SDValue Add1 = ShiftAmt->getOperand(1);
3632 uint64_t Add0Imm;
3633 uint64_t Add1Imm;
3634 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3635 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3636 // to avoid the ADD/SUB.
3637 NewShiftAmt = Add0;
3638 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3639 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3640 (Add0Imm % Size == 0)) {
3641 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3642 // to generate a NEG instead of a SUB from a constant.
3643 unsigned NegOpc;
3644 unsigned ZeroReg;
3645 EVT SubVT = ShiftAmt->getValueType(0);
3646 if (SubVT == MVT::i32) {
3647 NegOpc = AArch64::SUBWrr;
3648 ZeroReg = AArch64::WZR;
3649 } else {
3650 assert(SubVT == MVT::i64);
3651 NegOpc = AArch64::SUBXrr;
3652 ZeroReg = AArch64::XZR;
3653 }
3654 SDValue Zero =
3655 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3656 MachineSDNode *Neg =
3657 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3658 NewShiftAmt = SDValue(Neg, 0);
3659 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3660 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3661 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3662 // to generate a NOT instead of a SUB from a constant.
3663 unsigned NotOpc;
3664 unsigned ZeroReg;
3665 EVT SubVT = ShiftAmt->getValueType(0);
3666 if (SubVT == MVT::i32) {
3667 NotOpc = AArch64::ORNWrr;
3668 ZeroReg = AArch64::WZR;
3669 } else {
3670 assert(SubVT == MVT::i64);
3671 NotOpc = AArch64::ORNXrr;
3672 ZeroReg = AArch64::XZR;
3673 }
3674 SDValue Zero =
3675 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3676 MachineSDNode *Not =
3677 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3678 NewShiftAmt = SDValue(Not, 0);
3679 } else
3680 return false;
3681 } else {
3682 // If the shift amount is masked with an AND, check that the mask covers the
3683 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3684 // the AND.
3685 uint64_t MaskImm;
3686 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3687 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3688 return false;
3689
3690 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3691 return false;
3692
3693 NewShiftAmt = ShiftAmt->getOperand(0);
3694 }
3695
3696 // Narrow/widen the shift amount to match the size of the shift operation.
3697 if (VT == MVT::i32)
3698 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3699 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3700 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3701 MachineSDNode *Ext = CurDAG->getMachineNode(
3702 AArch64::SUBREG_TO_REG, DL, VT,
3703 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3704 NewShiftAmt = SDValue(Ext, 0);
3705 }
3706
3707 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3708 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3709 return true;
3710}
3711
3712bool
3713AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3714 unsigned RegWidth) {
3715 APFloat FVal(0.0);
3716 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3717 FVal = CN->getValueAPF();
3718 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3719 // Some otherwise illegal constants are allowed in this case.
3720 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3721 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3722 return false;
3723
3724 ConstantPoolSDNode *CN =
3725 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3726 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3727 } else
3728 return false;
3729
3730 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3731 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3732 // x-register.
3733 //
3734 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3735 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3736 // integers.
3737 bool IsExact;
3738
3739 // fbits is between 1 and 64 in the worst-case, which means the fmul
3740 // could have 2^64 as an actual operand. Need 65 bits of precision.
3741 APSInt IntVal(65, true);
3742 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3743
3744 // N.b. isPowerOf2 also checks for > 0.
3745 if (!IsExact || !IntVal.isPowerOf2()) return false;
3746 unsigned FBits = IntVal.logBase2();
3747
3748 // Checks above should have guaranteed that we haven't lost information in
3749 // finding FBits, but it must still be in range.
3750 if (FBits == 0 || FBits > RegWidth) return false;
3751
3752 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3753 return true;
3754}
3755
3756// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3757// of the string and obtains the integer values from them and combines these
3758// into a single value to be used in the MRS/MSR instruction.
3761 RegString.split(Fields, ':');
3762
3763 if (Fields.size() == 1)
3764 return -1;
3765
3766 assert(Fields.size() == 5
3767 && "Invalid number of fields in read register string");
3768
3770 bool AllIntFields = true;
3771
3772 for (StringRef Field : Fields) {
3773 unsigned IntField;
3774 AllIntFields &= !Field.getAsInteger(10, IntField);
3775 Ops.push_back(IntField);
3776 }
3777
3778 assert(AllIntFields &&
3779 "Unexpected non-integer value in special register string.");
3780 (void)AllIntFields;
3781
3782 // Need to combine the integer fields of the string into a single value
3783 // based on the bit encoding of MRS/MSR instruction.
3784 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3785 (Ops[3] << 3) | (Ops[4]);
3786}
3787
3788// Lower the read_register intrinsic to an MRS instruction node if the special
3789// register string argument is either of the form detailed in the ALCE (the
3790// form described in getIntOperandsFromRegsterString) or is a named register
3791// known by the MRS SysReg mapper.
3792bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3793 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3794 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3795 SDLoc DL(N);
3796
3797 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
3798
3799 unsigned Opcode64Bit = AArch64::MRS;
3800 int Imm = getIntOperandFromRegisterString(RegString->getString());
3801 if (Imm == -1) {
3802 // No match, Use the sysreg mapper to map the remaining possible strings to
3803 // the value for the register to be used for the instruction operand.
3804 const auto *TheReg =
3805 AArch64SysReg::lookupSysRegByName(RegString->getString());
3806 if (TheReg && TheReg->Readable &&
3807 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3808 Imm = TheReg->Encoding;
3809 else
3810 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3811
3812 if (Imm == -1) {
3813 // Still no match, see if this is "pc" or give up.
3814 if (!ReadIs128Bit && RegString->getString() == "pc") {
3815 Opcode64Bit = AArch64::ADR;
3816 Imm = 0;
3817 } else {
3818 return false;
3819 }
3820 }
3821 }
3822
3823 SDValue InChain = N->getOperand(0);
3824 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
3825 if (!ReadIs128Bit) {
3826 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
3827 {SysRegImm, InChain});
3828 } else {
3829 SDNode *MRRS = CurDAG->getMachineNode(
3830 AArch64::MRRS, DL,
3831 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
3832 {SysRegImm, InChain});
3833
3834 // Sysregs are not endian. The even register always contains the low half
3835 // of the register.
3836 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
3837 SDValue(MRRS, 0));
3838 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
3839 SDValue(MRRS, 0));
3840 SDValue OutChain = SDValue(MRRS, 1);
3841
3842 ReplaceUses(SDValue(N, 0), Lo);
3843 ReplaceUses(SDValue(N, 1), Hi);
3844 ReplaceUses(SDValue(N, 2), OutChain);
3845 };
3846 return true;
3847}
3848
3849// Lower the write_register intrinsic to an MSR instruction node if the special
3850// register string argument is either of the form detailed in the ALCE (the
3851// form described in getIntOperandsFromRegsterString) or is a named register
3852// known by the MSR SysReg mapper.
3853bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
3854 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3855 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3856 SDLoc DL(N);
3857
3858 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
3859
3860 if (!WriteIs128Bit) {
3861 // Check if the register was one of those allowed as the pstatefield value
3862 // in the MSR (immediate) instruction. To accept the values allowed in the
3863 // pstatefield for the MSR (immediate) instruction, we also require that an
3864 // immediate value has been provided as an argument, we know that this is
3865 // the case as it has been ensured by semantic checking.
3866 auto trySelectPState = [&](auto PMapper, unsigned State) {
3867 if (PMapper) {
3868 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
3869 "Expected a constant integer expression.");
3870 unsigned Reg = PMapper->Encoding;
3871 uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
3872 CurDAG->SelectNodeTo(
3873 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3874 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
3875 return true;
3876 }
3877 return false;
3878 };
3879
3880 if (trySelectPState(
3881 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
3882 AArch64::MSRpstateImm4))
3883 return true;
3884 if (trySelectPState(
3885 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
3886 AArch64::MSRpstateImm1))
3887 return true;
3888 }
3889
3890 int Imm = getIntOperandFromRegisterString(RegString->getString());
3891 if (Imm == -1) {
3892 // Use the sysreg mapper to attempt to map the remaining possible strings
3893 // to the value for the register to be used for the MSR (register)
3894 // instruction operand.
3895 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
3896 if (TheReg && TheReg->Writeable &&
3897 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3898 Imm = TheReg->Encoding;
3899 else
3900 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3901
3902 if (Imm == -1)
3903 return false;
3904 }
3905
3906 SDValue InChain = N->getOperand(0);
3907 if (!WriteIs128Bit) {
3908 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
3909 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3910 N->getOperand(2), InChain);
3911 } else {
3912 // No endian swap. The lower half always goes into the even subreg, and the
3913 // higher half always into the odd supreg.
3914 SDNode *Pair = CurDAG->getMachineNode(
3915 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
3916 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
3917 MVT::i32),
3918 N->getOperand(2),
3919 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
3920 N->getOperand(3),
3921 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
3922
3923 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
3924 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3925 SDValue(Pair, 0), InChain);
3926 }
3927
3928 return true;
3929}
3930
3931/// We've got special pseudo-instructions for these
3932bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3933 unsigned Opcode;
3934 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3935
3936 // Leave IR for LSE if subtarget supports it.
3937 if (Subtarget->hasLSE()) return false;
3938
3939 if (MemTy == MVT::i8)
3940 Opcode = AArch64::CMP_SWAP_8;
3941 else if (MemTy == MVT::i16)
3942 Opcode = AArch64::CMP_SWAP_16;
3943 else if (MemTy == MVT::i32)
3944 Opcode = AArch64::CMP_SWAP_32;
3945 else if (MemTy == MVT::i64)
3946 Opcode = AArch64::CMP_SWAP_64;
3947 else
3948 llvm_unreachable("Unknown AtomicCmpSwap type");
3949
3950 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
3951 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3952 N->getOperand(0)};
3953 SDNode *CmpSwap = CurDAG->getMachineNode(
3954 Opcode, SDLoc(N),
3955 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
3956
3957 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3958 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3959
3960 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3961 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3962 CurDAG->RemoveDeadNode(N);
3963
3964 return true;
3965}
3966
3967bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
3968 SDValue &Shift) {
3969 if (!isa<ConstantSDNode>(N))
3970 return false;
3971
3972 SDLoc DL(N);
3973 uint64_t Val = cast<ConstantSDNode>(N)
3974 ->getAPIntValue()
3975 .trunc(VT.getFixedSizeInBits())
3976 .getZExtValue();
3977
3978 switch (VT.SimpleTy) {
3979 case MVT::i8:
3980 // All immediates are supported.
3981 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3982 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
3983 return true;
3984 case MVT::i16:
3985 case MVT::i32:
3986 case MVT::i64:
3987 // Support 8bit unsigned immediates.
3988 if (Val <= 255) {
3989 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3990 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
3991 return true;
3992 }
3993 // Support 16bit unsigned immediates that are a multiple of 256.
3994 if (Val <= 65280 && Val % 256 == 0) {
3995 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
3996 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
3997 return true;
3998 }
3999 break;
4000 default:
4001 break;
4002 }
4003
4004 return false;
4005}
4006
4007bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4008 SDValue &Shift) {
4009 if (!isa<ConstantSDNode>(N))
4010 return false;
4011
4012 SDLoc DL(N);
4013 int64_t Val = cast<ConstantSDNode>(N)
4014 ->getAPIntValue()
4015 .trunc(VT.getFixedSizeInBits())
4016 .getSExtValue();
4017
4018 switch (VT.SimpleTy) {
4019 case MVT::i8:
4020 // All immediates are supported.
4021 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4022 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4023 return true;
4024 case MVT::i16:
4025 case MVT::i32:
4026 case MVT::i64:
4027 // Support 8bit signed immediates.
4028 if (Val >= -128 && Val <= 127) {
4029 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4030 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4031 return true;
4032 }
4033 // Support 16bit signed immediates that are a multiple of 256.
4034 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4035 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4036 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4037 return true;
4038 }
4039 break;
4040 default:
4041 break;
4042 }
4043
4044 return false;
4045}
4046
4047bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4048 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4049 int64_t ImmVal = CNode->getSExtValue();
4050 SDLoc DL(N);
4051 if (ImmVal >= -128 && ImmVal < 128) {
4052 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4053 return true;
4054 }
4055 }
4056 return false;
4057}
4058
4059bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4060 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4061 uint64_t ImmVal = CNode->getZExtValue();
4062
4063 switch (VT.SimpleTy) {
4064 case MVT::i8:
4065 ImmVal &= 0xFF;
4066 break;
4067 case MVT::i16:
4068 ImmVal &= 0xFFFF;
4069 break;
4070 case MVT::i32:
4071 ImmVal &= 0xFFFFFFFF;
4072 break;
4073 case MVT::i64:
4074 break;
4075 default:
4076 llvm_unreachable("Unexpected type");
4077 }
4078
4079 if (ImmVal < 256) {
4080 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4081 return true;
4082 }
4083 }
4084 return false;
4085}
4086
4087bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4088 bool Invert) {
4089 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4090 uint64_t ImmVal = CNode->getZExtValue();
4091 SDLoc DL(N);
4092
4093 if (Invert)
4094 ImmVal = ~ImmVal;
4095
4096 // Shift mask depending on type size.
4097 switch (VT.SimpleTy) {
4098 case MVT::i8:
4099 ImmVal &= 0xFF;
4100 ImmVal |= ImmVal << 8;
4101 ImmVal |= ImmVal << 16;
4102 ImmVal |= ImmVal << 32;
4103 break;
4104 case MVT::i16:
4105 ImmVal &= 0xFFFF;
4106 ImmVal |= ImmVal << 16;
4107 ImmVal |= ImmVal << 32;
4108 break;
4109 case MVT::i32:
4110 ImmVal &= 0xFFFFFFFF;
4111 ImmVal |= ImmVal << 32;
4112 break;
4113 case MVT::i64:
4114 break;
4115 default:
4116 llvm_unreachable("Unexpected type");
4117 }
4118
4119 uint64_t encoding;
4120 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4121 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4122 return true;
4123 }
4124 }
4125 return false;
4126}
4127
4128// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4129// Rather than attempt to normalise everything we can sometimes saturate the
4130// shift amount during selection. This function also allows for consistent
4131// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4132// required by the instructions.
4133bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4134 uint64_t High, bool AllowSaturation,
4135 SDValue &Imm) {
4136 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4137 uint64_t ImmVal = CN->getZExtValue();
4138
4139 // Reject shift amounts that are too small.
4140 if (ImmVal < Low)
4141 return false;
4142
4143 // Reject or saturate shift amounts that are too big.
4144 if (ImmVal > High) {
4145 if (!AllowSaturation)
4146 return false;
4147 ImmVal = High;
4148 }
4149
4150 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4151 return true;
4152 }
4153
4154 return false;
4155}
4156
4157bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4158 // tagp(FrameIndex, IRGstack, tag_offset):
4159 // since the offset between FrameIndex and IRGstack is a compile-time
4160 // constant, this can be lowered to a single ADDG instruction.
4161 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4162 return false;
4163 }
4164
4165 SDValue IRG_SP = N->getOperand(2);
4166 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4167 cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
4168 Intrinsic::aarch64_irg_sp) {
4169 return false;
4170 }
4171
4172 const TargetLowering *TLI = getTargetLowering();
4173 SDLoc DL(N);
4174 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4175 SDValue FiOp = CurDAG->getTargetFrameIndex(
4176 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4177 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
4178
4179 SDNode *Out = CurDAG->getMachineNode(
4180 AArch64::TAGPstack, DL, MVT::i64,
4181 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4182 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4183 ReplaceNode(N, Out);
4184 return true;
4185}
4186
4187void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4188 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4189 "llvm.aarch64.tagp third argument must be an immediate");
4190 if (trySelectStackSlotTagP(N))
4191 return;
4192 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4193 // compile-time constant, not just for stack allocations.
4194
4195 // General case for unrelated pointers in Op1 and Op2.
4196 SDLoc DL(N);
4197 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
4198 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4199 {N->getOperand(1), N->getOperand(2)});
4200 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4201 {SDValue(N1, 0), N->getOperand(2)});
4202 SDNode *N3 = CurDAG->getMachineNode(
4203 AArch64::ADDG, DL, MVT::i64,
4204 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4205 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4206 ReplaceNode(N, N3);
4207}
4208
4209bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4210 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4211
4212 // Bail when not a "cast" like insert_subvector.
4213 if (cast<ConstantSDNode>(N->getOperand(2))->getZExtValue() != 0)
4214 return false;
4215 if (!N->getOperand(0).isUndef())
4216 return false;
4217
4218 // Bail when normal isel should do the job.
4219 EVT VT = N->getValueType(0);
4220 EVT InVT = N->getOperand(1).getValueType();
4221 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4222 return false;
4223 if (InVT.getSizeInBits() <= 128)
4224 return false;
4225
4226 // NOTE: We can only get here when doing fixed length SVE code generation.
4227 // We do manual selection because the types involved are not linked to real
4228 // registers (despite being legal) and must be coerced into SVE registers.
4229
4231 "Expected to insert into a packed scalable vector!");
4232
4233 SDLoc DL(N);
4234 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4235 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4236 N->getOperand(1), RC));
4237 return true;
4238}
4239
4240bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4241 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4242
4243 // Bail when not a "cast" like extract_subvector.
4244 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 0)
4245 return false;
4246
4247 // Bail when normal isel can do the job.
4248 EVT VT = N->getValueType(0);
4249 EVT InVT = N->getOperand(0).getValueType();
4250 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4251 return false;
4252 if (VT.getSizeInBits() <= 128)
4253 return false;
4254
4255 // NOTE: We can only get here when doing fixed length SVE code generation.
4256 // We do manual selection because the types involved are not linked to real
4257 // registers (despite being legal) and must be coerced into SVE registers.
4258
4260 "Expected to extract from a packed scalable vector!");
4261
4262 SDLoc DL(N);
4263 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4264 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4265 N->getOperand(0), RC));
4266 return true;
4267}
4268
4269void AArch64DAGToDAGISel::Select(SDNode *Node) {
4270 // If we have a custom node, we already have selected!
4271 if (Node->isMachineOpcode()) {
4272 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4273 Node->setNodeId(-1);
4274 return;
4275 }
4276
4277 // Few custom selection stuff.
4278 EVT VT = Node->getValueType(0);
4279
4280 switch (Node->getOpcode()) {
4281 default:
4282 break;
4283
4285 if (SelectCMP_SWAP(Node))
4286 return;
4287 break;
4288
4289 case ISD::READ_REGISTER:
4290 case AArch64ISD::MRRS:
4291 if (tryReadRegister(Node))
4292 return;
4293 break;
4294
4296 case AArch64ISD::MSRR:
4297 if (tryWriteRegister(Node))
4298 return;
4299 break;
4300
4301 case ISD::ADD:
4302 if (tryMLAV64LaneV128(Node))
4303 return;
4304 break;
4305
4306 case ISD::LOAD: {
4307 // Try to select as an indexed load. Fall through to normal processing
4308 // if we can't.
4309 if (tryIndexedLoad(Node))
4310 return;
4311 break;
4312 }
4313
4314 case ISD::SRL:
4315 case ISD::AND:
4316 case ISD::SRA:
4318 if (tryBitfieldExtractOp(Node))
4319 return;
4320 if (tryBitfieldInsertInZeroOp(Node))
4321 return;
4322 [[fallthrough]];
4323 case ISD::ROTR:
4324 case ISD::SHL:
4325 if (tryShiftAmountMod(Node))
4326 return;
4327 break;
4328
4329 case ISD::SIGN_EXTEND:
4330 if (tryBitfieldExtractOpFromSExt(Node))
4331 return;
4332 break;
4333
4334 case ISD::FP_EXTEND:
4335 if (tryHighFPExt(Node))
4336 return;
4337 break;
4338
4339 case ISD::OR:
4340 if (tryBitfieldInsertOp(Node))
4341 return;
4342 break;
4343
4345 if (trySelectCastScalableToFixedLengthVector(Node))
4346 return;
4347 break;
4348 }
4349
4350 case ISD::INSERT_SUBVECTOR: {
4351 if (trySelectCastFixedLengthToScalableVector(Node))
4352 return;
4353 break;
4354 }
4355
4356 case ISD::Constant: {
4357 // Materialize zero constants as copies from WZR/XZR. This allows
4358 // the coalescer to propagate these into other instructions.
4359 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4360 if (ConstNode->isZero()) {
4361 if (VT == MVT::i32) {
4362 SDValue New = CurDAG->getCopyFromReg(
4363 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4364 ReplaceNode(Node, New.getNode());
4365 return;
4366 } else if (VT == MVT::i64) {
4367 SDValue New = CurDAG->getCopyFromReg(
4368 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4369 ReplaceNode(Node, New.getNode());
4370 return;
4371 }
4372 }
4373 break;
4374 }
4375
4376 case ISD::FrameIndex: {
4377 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4378 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4379 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4380 const TargetLowering *TLI = getTargetLowering();
4381 SDValue TFI = CurDAG->getTargetFrameIndex(
4382 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4383 SDLoc DL(Node);
4384 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4385 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4386 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4387 return;
4388 }
4390 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
4391 switch (IntNo) {
4392 default:
4393 break;
4394 case Intrinsic::aarch64_ldaxp:
4395 case Intrinsic::aarch64_ldxp: {
4396 unsigned Op =
4397 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4398 SDValue MemAddr = Node->getOperand(2);
4399 SDLoc DL(Node);
4400 SDValue Chain = Node->getOperand(0);
4401
4402 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4403 MVT::Other, MemAddr, Chain);
4404
4405 // Transfer memoperands.
4407 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4408 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4409 ReplaceNode(Node, Ld);
4410 return;
4411 }
4412 case Intrinsic::aarch64_stlxp:
4413 case Intrinsic::aarch64_stxp: {
4414 unsigned Op =
4415 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4416 SDLoc DL(Node);
4417 SDValue Chain = Node->getOperand(0);
4418 SDValue ValLo = Node->getOperand(2);
4419 SDValue ValHi = Node->getOperand(3);
4420 SDValue MemAddr = Node->getOperand(4);
4421
4422 // Place arguments in the right order.
4423 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4424
4425 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4426 // Transfer memoperands.
4428 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4429 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4430
4431 ReplaceNode(Node, St);
4432 return;
4433 }
4434 case Intrinsic::aarch64_neon_ld1x2:
4435 if (VT == MVT::v8i8) {
4436 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4437 return;
4438 } else if (VT == MVT::v16i8) {
4439 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4440 return;
4441 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4442 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4443 return;
4444 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4445 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4446 return;
4447 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4448 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4449 return;
4450 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4451 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4452 return;
4453 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4454 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4455 return;
4456 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4457 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4458 return;
4459 }
4460 break;
4461 case Intrinsic::aarch64_neon_ld1x3:
4462 if (VT == MVT::v8i8) {
4463 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4464 return;
4465 } else if (VT == MVT::v16i8) {
4466 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4467 return;
4468 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4469 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4470 return;
4471 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4472 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4473 return;
4474 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4475 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4476 return;
4477 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4478 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4479 return;
4480 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4481 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4482 return;
4483 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4484 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4485 return;
4486 }
4487 break;
4488 case Intrinsic::aarch64_neon_ld1x4:
4489 if (VT == MVT::v8i8) {
4490 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4491 return;
4492 } else if (VT == MVT::v16i8) {
4493 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4494 return;
4495 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4496 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4497 return;
4498 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4499 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4500 return;
4501 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4502 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4503 return;
4504 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4505 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4506 return;
4507 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4508 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4509 return;
4510 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4511 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4512 return;
4513 }
4514 break;
4515 case Intrinsic::aarch64_neon_ld2:
4516 if (VT == MVT::v8i8) {
4517 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4518 return;
4519 } else if (VT == MVT::v16i8) {
4520 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4521 return;
4522 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4523 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4524 return;
4525 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4526 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4527 return;
4528 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4529 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4530 return;
4531 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4532 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4533 return;
4534 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4535 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4536 return;
4537 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4538 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4539 return;
4540 }
4541 break;
4542 case Intrinsic::aarch64_neon_ld3:
4543 if (VT == MVT::v8i8) {
4544 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4545 return;
4546 } else if (VT == MVT::v16i8) {
4547 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4548 return;
4549 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4550 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4551 return;
4552 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4553 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4554 return;
4555 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4556 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4557 return;
4558 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4559 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4560 return;
4561 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4562 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4563 return;
4564 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4565 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4566 return;
4567 }
4568 break;
4569 case Intrinsic::aarch64_neon_ld4:
4570 if (VT == MVT::v8i8) {
4571 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4572 return;
4573 } else if (VT == MVT::v16i8) {
4574 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4575 return;
4576 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4577 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4578 return;
4579 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4580 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4581 return;
4582 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4583 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4584 return;
4585 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4586 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4587 return;
4588 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4589 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4590 return;
4591 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4592 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4593 return;
4594 }
4595 break;
4596 case Intrinsic::aarch64_neon_ld2r:
4597 if (VT == MVT::v8i8) {
4598 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4599 return;
4600 } else if (VT == MVT::v16i8) {
4601 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4602 return;
4603 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4604 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4605 return;
4606 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4607 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4608 return;
4609 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4610 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4611 return;
4612 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4613 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4614 return;
4615 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4616 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4617 return;
4618 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4619 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4620 return;
4621 }
4622 break;
4623 case Intrinsic::aarch64_neon_ld3r:
4624 if (VT == MVT::v8i8) {
4625 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4626 return;
4627 } else if (VT == MVT::v16i8) {
4628 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4629 return;
4630 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4631 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4632 return;
4633 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4634 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4635 return;
4636 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4637 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4638 return;
4639 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4640 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4641 return;
4642 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4643 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
4644 return;
4645 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4646 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
4647 return;
4648 }
4649 break;
4650 case Intrinsic::aarch64_neon_ld4r:
4651 if (VT == MVT::v8i8) {
4652 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
4653 return;
4654 } else if (VT == MVT::v16i8) {
4655 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
4656 return;
4657 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4658 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
4659 return;
4660 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4661 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
4662 return;
4663 } else if