Line data Source code
1 : //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file defines an instruction selector for the AArch64 target.
11 : //
12 : //===----------------------------------------------------------------------===//
13 :
14 : #include "AArch64TargetMachine.h"
15 : #include "MCTargetDesc/AArch64AddressingModes.h"
16 : #include "llvm/ADT/APSInt.h"
17 : #include "llvm/CodeGen/SelectionDAGISel.h"
18 : #include "llvm/IR/Function.h" // To access function attributes.
19 : #include "llvm/IR/GlobalValue.h"
20 : #include "llvm/IR/Intrinsics.h"
21 : #include "llvm/Support/Debug.h"
22 : #include "llvm/Support/ErrorHandling.h"
23 : #include "llvm/Support/KnownBits.h"
24 : #include "llvm/Support/MathExtras.h"
25 : #include "llvm/Support/raw_ostream.h"
26 :
27 : using namespace llvm;
28 :
29 : #define DEBUG_TYPE "aarch64-isel"
30 :
31 : //===--------------------------------------------------------------------===//
32 : /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
33 : /// instructions for SelectionDAG operations.
34 : ///
35 : namespace {
36 :
37 : class AArch64DAGToDAGISel : public SelectionDAGISel {
38 :
39 : /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
40 : /// make the right decision when generating code for different targets.
41 : const AArch64Subtarget *Subtarget;
42 :
43 : bool ForCodeSize;
44 :
45 : public:
46 : explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
47 : CodeGenOpt::Level OptLevel)
48 1206 : : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
49 1206 : ForCodeSize(false) {}
50 :
51 50 : StringRef getPassName() const override {
52 50 : return "AArch64 Instruction Selection";
53 : }
54 :
55 14750 : bool runOnMachineFunction(MachineFunction &MF) override {
56 14750 : ForCodeSize = MF.getFunction().optForSize();
57 14750 : Subtarget = &MF.getSubtarget<AArch64Subtarget>();
58 14750 : return SelectionDAGISel::runOnMachineFunction(MF);
59 : }
60 :
61 : void Select(SDNode *Node) override;
62 :
63 : /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
64 : /// inline asm expressions.
65 : bool SelectInlineAsmMemoryOperand(const SDValue &Op,
66 : unsigned ConstraintID,
67 : std::vector<SDValue> &OutOps) override;
68 :
69 : bool tryMLAV64LaneV128(SDNode *N);
70 : bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
71 : bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
72 : bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73 : bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
74 : bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
75 2096 : return SelectShiftedRegister(N, false, Reg, Shift);
76 : }
77 : bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
78 1478 : return SelectShiftedRegister(N, true, Reg, Shift);
79 : }
80 : bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
81 : return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
82 : }
83 : bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
84 : return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
85 : }
86 : bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
87 15 : return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
88 : }
89 : bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
90 18 : return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
91 : }
92 : bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
93 : return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
94 : }
95 : bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
96 465 : return SelectAddrModeIndexed(N, 1, Base, OffImm);
97 : }
98 : bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
99 326 : return SelectAddrModeIndexed(N, 2, Base, OffImm);
100 : }
101 : bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
102 1940 : return SelectAddrModeIndexed(N, 4, Base, OffImm);
103 : }
104 : bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
105 6360 : return SelectAddrModeIndexed(N, 8, Base, OffImm);
106 : }
107 : bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
108 2006 : return SelectAddrModeIndexed(N, 16, Base, OffImm);
109 : }
110 : bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
111 49 : return SelectAddrModeUnscaled(N, 1, Base, OffImm);
112 : }
113 : bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
114 66 : return SelectAddrModeUnscaled(N, 2, Base, OffImm);
115 : }
116 : bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
117 114 : return SelectAddrModeUnscaled(N, 4, Base, OffImm);
118 : }
119 : bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
120 99 : return SelectAddrModeUnscaled(N, 8, Base, OffImm);
121 : }
122 : bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
123 73 : return SelectAddrModeUnscaled(N, 16, Base, OffImm);
124 : }
125 :
126 : template<int Width>
127 0 : bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
128 : SDValue &SignExtend, SDValue &DoShift) {
129 11603 : return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
130 : }
131 0 :
132 : template<int Width>
133 0 : bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
134 : SDValue &SignExtend, SDValue &DoShift) {
135 0 : return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
136 : }
137 0 :
138 :
139 0 : /// Form sequences of consecutive 64/128-bit registers for use in NEON
140 : /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
141 0 : /// between 1 and 4 elements. If it contains a single element that is returned
142 : /// unchanged; otherwise a REG_SEQUENCE value is returned.
143 0 : SDValue createDTuple(ArrayRef<SDValue> Vecs);
144 : SDValue createQTuple(ArrayRef<SDValue> Vecs);
145 0 :
146 : /// Generic helper for the createDTuple/createQTuple
147 0 : /// functions. Those should almost always be called instead.
148 : SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
149 0 : const unsigned SubRegs[]);
150 :
151 : void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
152 :
153 : bool tryIndexedLoad(SDNode *N);
154 :
155 11503 : void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
156 : unsigned SubRegIdx);
157 : void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
158 : unsigned SubRegIdx);
159 : void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
160 : void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
161 :
162 : void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
163 : void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
164 : void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
165 : void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
166 :
167 : bool tryBitfieldExtractOp(SDNode *N);
168 : bool tryBitfieldExtractOpFromSExt(SDNode *N);
169 : bool tryBitfieldInsertOp(SDNode *N);
170 : bool tryBitfieldInsertInZeroOp(SDNode *N);
171 : bool tryShiftAmountMod(SDNode *N);
172 :
173 : bool tryReadRegister(SDNode *N);
174 : bool tryWriteRegister(SDNode *N);
175 :
176 : // Include the pieces autogenerated from the target description.
177 : #include "AArch64GenDAGISel.inc"
178 :
179 : private:
180 : bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
181 : SDValue &Shift);
182 : bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
183 : SDValue &OffImm);
184 : bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
185 : SDValue &OffImm);
186 : bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
187 : SDValue &OffImm);
188 : bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
189 : SDValue &Offset, SDValue &SignExtend,
190 : SDValue &DoShift);
191 : bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
192 : SDValue &Offset, SDValue &SignExtend,
193 : SDValue &DoShift);
194 : bool isWorthFolding(SDValue V) const;
195 : bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
196 : SDValue &Offset, SDValue &SignExtend);
197 :
198 : template<unsigned RegWidth>
199 : bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
200 : return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
201 : }
202 :
203 : bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
204 :
205 : bool SelectCMP_SWAP(SDNode *N);
206 :
207 : };
208 : } // end anonymous namespace
209 :
210 : /// isIntImmediate - This method tests to see if the node is a constant
211 : /// operand. If so Imm will receive the 32-bit value.
212 : static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
213 : if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
214 : Imm = C->getZExtValue();
215 : return true;
216 : }
217 : return false;
218 : }
219 0 :
220 32 : // isIntImmediate - This method tests to see if a constant operand.
221 : // If so Imm will receive the value.
222 0 : static bool isIntImmediate(SDValue N, uint64_t &Imm) {
223 0 : return isIntImmediate(N.getNode(), Imm);
224 : }
225 0 :
226 0 : // isOpcWithIntImmediate - This method tests to see if the node is a specific
227 : // opcode and that it has a immediate integer right operand.
228 : // If so Imm will receive the 32 bit value.
229 : static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
230 : uint64_t &Imm) {
231 : return N->getOpcode() == Opc &&
232 : isIntImmediate(N->getOperand(1).getNode(), Imm);
233 : }
234 :
235 : bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
236 : const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
237 : switch(ConstraintID) {
238 : default:
239 : llvm_unreachable("Unexpected asm memory constraint");
240 1206 : case InlineAsm::Constraint_i:
241 : case InlineAsm::Constraint_m:
242 : case InlineAsm::Constraint_Q:
243 : // We need to make sure that this one operand does not end up in XZR, thus
244 : // require the address to be in a PointerRegClass register.
245 : const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
246 : const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
247 : SDLoc dl(Op);
248 0 : SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
249 0 : SDValue NewOp =
250 : SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
251 : dl, Op.getValueType(),
252 : Op, RC), 0);
253 : OutOps.push_back(NewOp);
254 : return false;
255 : }
256 : return true;
257 2695 : }
258 2824 :
259 : /// SelectArithImmed - Select an immediate value that can be represented as
260 : /// a 12-bit value shifted left by either 0 or 12. If so, return true with
261 2 : /// Val set to the 12-bit value and Shift set to the shifter operand.
262 : bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
263 : SDValue &Shift) {
264 0 : // This function is called from the addsub_shifted_imm ComplexPattern,
265 0 : // which lists [imm] as the list of opcode it's interested in, however
266 2 : // we still need to check whether the operand is actually an immediate
267 : // here because the ComplexPattern opcode list is only used in
268 : // root-level opcode matching.
269 : if (!isa<ConstantSDNode>(N.getNode()))
270 : return false;
271 2 :
272 2 : uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
273 : unsigned ShiftAmt;
274 4 :
275 : if (Immed >> 12 == 0) {
276 4 : ShiftAmt = 0;
277 : } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
278 : ShiftAmt = 12;
279 2 : Immed = Immed >> 12;
280 : } else
281 : return false;
282 :
283 : unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
284 : SDLoc dl(N);
285 : Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
286 : Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
287 : return true;
288 0 : }
289 :
290 : /// SelectNegArithImmed - As above, but negates the value before trying to
291 : /// select it.
292 : bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
293 : SDValue &Shift) {
294 : // This function is called from the addsub_shifted_imm ComplexPattern,
295 : // which lists [imm] as the list of opcode it's interested in, however
296 0 : // we still need to check whether the operand is actually an immediate
297 : // here because the ComplexPattern opcode list is only used in
298 0 : // root-level opcode matching.
299 : if (!isa<ConstantSDNode>(N.getNode()))
300 : return false;
301 0 :
302 : // The immediate operand must be a 24-bit zero-extended immediate.
303 0 : uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
304 :
305 : // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
306 : // have the opposite effect on the C flag, so this pattern mustn't match under
307 0 : // those circumstances.
308 : if (Immed == 0)
309 : return false;
310 0 :
311 0 : if (N.getValueType() == MVT::i32)
312 0 : Immed = ~((uint32_t)Immed) + 1;
313 : else
314 : Immed = ~Immed + 1ULL;
315 : if (Immed & 0xFFFFFFFFFF000000ULL)
316 : return false;
317 :
318 0 : Immed &= 0xFFFFFFULL;
319 : return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
320 : Shift);
321 : }
322 :
323 : /// getShiftTypeForNode - Translate a shift node to the corresponding
324 : /// ShiftType value.
325 : static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
326 0 : switch (N.getOpcode()) {
327 : default:
328 : return AArch64_AM::InvalidShiftExtend;
329 0 : case ISD::SHL:
330 : return AArch64_AM::LSL;
331 : case ISD::SRL:
332 : return AArch64_AM::LSR;
333 : case ISD::SRA:
334 0 : return AArch64_AM::ASR;
335 0 : case ISD::ROTR:
336 : return AArch64_AM::ROR;
337 0 : }
338 0 : }
339 :
340 0 : /// Determine whether it is worth it to fold SHL into the addressing
341 0 : /// mode.
342 0 : static bool isWorthFoldingSHL(SDValue V) {
343 : assert(V.getOpcode() == ISD::SHL && "invalid opcode");
344 0 : // It is worth folding logical shift of up to three places.
345 0 : auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
346 : if (!CSD)
347 : return false;
348 : unsigned ShiftVal = CSD->getZExtValue();
349 : if (ShiftVal > 3)
350 : return false;
351 0 :
352 0 : // Check if this particular node is reused in any non-memory related
353 : // operation. If yes, do not try to fold this node into the address
354 : // computation, since the computation will be kept.
355 0 : const SDNode *Node = V.getNode();
356 0 : for (SDNode *UI : Node->uses())
357 0 : if (!isa<MemSDNode>(*UI))
358 0 : for (SDNode *UII : UI->uses())
359 0 : if (!isa<MemSDNode>(*UII))
360 0 : return false;
361 0 : return true;
362 0 : }
363 :
364 : /// Determine whether it is worth to fold V into an extended register.
365 : bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
366 : // Trivial if we are optimizing for code size or if there is only
367 : // one use of the value.
368 0 : if (ForCodeSize || V.hasOneUse())
369 : return true;
370 : // If a subtarget has a fastpath LSL we can fold a logical shift into
371 : // the addressing mode and save a cycle.
372 : if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
373 0 : isWorthFoldingSHL(V))
374 0 : return true;
375 0 : if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
376 0 : const SDValue LHS = V.getOperand(0);
377 : const SDValue RHS = V.getOperand(1);
378 : if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
379 : return true;
380 : if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
381 : return true;
382 0 : }
383 0 :
384 0 : // It hurts otherwise, since the value will be reused.
385 0 : return false;
386 0 : }
387 :
388 : /// SelectShiftedRegister - Select a "shifted register" operand. If the value
389 : /// is not shifted, set the Shift operand to default of "LSL 0". The logical
390 : /// instructions allow the shifted register to be rotated, but the arithmetic
391 0 : /// instructions do not. The AllowROR parameter specifies whether ROR is
392 : /// supported.
393 : bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
394 0 : SDValue &Reg, SDValue &Shift) {
395 0 : AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
396 : if (ShType == AArch64_AM::InvalidShiftExtend)
397 : return false;
398 0 : if (!AllowROR && ShType == AArch64_AM::ROR)
399 0 : return false;
400 0 :
401 0 : if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
402 0 : unsigned BitSize = N.getValueSizeInBits();
403 0 : unsigned Val = RHS->getZExtValue() & (BitSize - 1);
404 0 : unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
405 0 :
406 0 : Reg = N.getOperand(0);
407 0 : Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
408 : return isWorthFolding(N);
409 : }
410 :
411 : return false;
412 : }
413 :
414 : /// getExtendTypeForNode - Translate an extend node to the corresponding
415 : /// ExtendType value.
416 : static AArch64_AM::ShiftExtendType
417 : getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
418 : if (N.getOpcode() == ISD::SIGN_EXTEND ||
419 3574 : N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
420 : EVT SrcVT;
421 3574 : if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
422 369 : SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
423 : else
424 369 : SrcVT = N.getOperand(0).getValueType();
425 :
426 : if (!IsLoadStore && SrcVT == MVT::i8)
427 : return AArch64_AM::SXTB;
428 267 : else if (!IsLoadStore && SrcVT == MVT::i16)
429 534 : return AArch64_AM::SXTH;
430 : else if (SrcVT == MVT::i32)
431 : return AArch64_AM::SXTW;
432 267 : assert(SrcVT != MVT::i64 && "extend from 64-bits?");
433 267 :
434 267 : return AArch64_AM::InvalidShiftExtend;
435 : } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
436 : N.getOpcode() == ISD::ANY_EXTEND) {
437 : EVT SrcVT = N.getOperand(0).getValueType();
438 : if (!IsLoadStore && SrcVT == MVT::i8)
439 : return AArch64_AM::UXTB;
440 : else if (!IsLoadStore && SrcVT == MVT::i16)
441 : return AArch64_AM::UXTH;
442 : else if (SrcVT == MVT::i32)
443 3998 : return AArch64_AM::UXTW;
444 3998 : assert(SrcVT != MVT::i64 && "extend from 64-bits?");
445 :
446 : return AArch64_AM::InvalidShiftExtend;
447 212 : } else if (N.getOpcode() == ISD::AND) {
448 : ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
449 : if (!CSD)
450 119 : return AArch64_AM::InvalidShiftExtend;
451 : uint64_t AndMask = CSD->getZExtValue();
452 212 :
453 37 : switch (AndMask) {
454 175 : default:
455 45 : return AArch64_AM::InvalidShiftExtend;
456 : case 0xFF:
457 130 : return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
458 : case 0xFFFF:
459 : return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
460 0 : case 0xFFFFFFFF:
461 3786 : return AArch64_AM::UXTW;
462 : }
463 62 : }
464 62 :
465 0 : return AArch64_AM::InvalidShiftExtend;
466 62 : }
467 0 :
468 : // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
469 62 : static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
470 : if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
471 : DL->getOpcode() != AArch64ISD::DUPLANE32)
472 0 : return false;
473 3724 :
474 : SDValue SV = DL->getOperand(0);
475 : if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
476 : return false;
477 170 :
478 : SDValue EV = SV.getOperand(1);
479 170 : if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
480 : return false;
481 :
482 45 : ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
483 78 : ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
484 50 : LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
485 98 : LaneOp = EV.getOperand(0);
486 12 :
487 12 : return true;
488 : }
489 :
490 : // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
491 : // high lane extract.
492 : static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
493 : SDValue &LaneOp, int &LaneIdx) {
494 :
495 540 : if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
496 1080 : std::swap(Op0, Op1);
497 : if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
498 : return false;
499 : }
500 109 : StdOp = Op1;
501 109 : return true;
502 : }
503 :
504 52 : /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
505 52 : /// is a lane in the upper half of a 128-bit vector. Recognize and select this
506 : /// so that we don't emit unnecessary lane extracts.
507 : bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
508 0 : SDLoc dl(N);
509 0 : SDValue Op0 = N->getOperand(0);
510 0 : SDValue Op1 = N->getOperand(1);
511 0 : SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
512 : SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
513 0 : int LaneIdx = -1; // Will hold the lane index.
514 :
515 : if (Op1.getOpcode() != ISD::MUL ||
516 : !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
517 : LaneIdx)) {
518 270 : std::swap(Op0, Op1);
519 : if (Op1.getOpcode() != ISD::MUL ||
520 : !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
521 270 : LaneIdx))
522 : return false;
523 270 : }
524 :
525 : SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
526 0 :
527 0 : SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
528 :
529 : unsigned MLAOpc = ~0U;
530 :
531 : switch (N->getSimpleValueType(0).SimpleTy) {
532 : default:
533 2584 : llvm_unreachable("Unrecognized MLA.");
534 : case MVT::v4i16:
535 2584 : MLAOpc = AArch64::MLAv4i16_indexed;
536 2584 : break;
537 2584 : case MVT::v8i16:
538 2584 : MLAOpc = AArch64::MLAv8i16_indexed;
539 2584 : break;
540 : case MVT::v2i32:
541 2639 : MLAOpc = AArch64::MLAv2i32_indexed;
542 55 : break;
543 : case MVT::v4i32:
544 : MLAOpc = AArch64::MLAv4i32_indexed;
545 2653 : break;
546 69 : }
547 :
548 2584 : ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
549 : return true;
550 : }
551 0 :
552 : bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
553 0 : SDLoc dl(N);
554 : SDValue SMULLOp0;
555 : SDValue SMULLOp1;
556 : int LaneIdx;
557 0 :
558 0 : if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
559 0 : LaneIdx))
560 : return false;
561 :
562 : SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
563 0 :
564 : SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
565 0 :
566 0 : unsigned SMULLOpc = ~0U;
567 :
568 0 : if (IntNo == Intrinsic::aarch64_neon_smull) {
569 0 : switch (N->getSimpleValueType(0).SimpleTy) {
570 : default:
571 0 : llvm_unreachable("Unrecognized SMULL.");
572 : case MVT::v4i32:
573 : SMULLOpc = AArch64::SMULLv4i16_indexed;
574 0 : break;
575 0 : case MVT::v2i64:
576 : SMULLOpc = AArch64::SMULLv2i32_indexed;
577 : break;
578 146 : }
579 : } else if (IntNo == Intrinsic::aarch64_neon_umull) {
580 146 : switch (N->getSimpleValueType(0).SimpleTy) {
581 146 : default:
582 : llvm_unreachable("Unrecognized SMULL.");
583 : case MVT::v4i32:
584 292 : SMULLOpc = AArch64::UMULLv4i16_indexed;
585 : break;
586 : case MVT::v2i64:
587 : SMULLOpc = AArch64::UMULLv2i32_indexed;
588 0 : break;
589 : }
590 0 : } else
591 : llvm_unreachable("Unrecognized intrinsic.");
592 :
593 : ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
594 0 : return true;
595 0 : }
596 0 :
597 0 : /// Instructions that accept extend modifiers like UXTW expect the register
598 : /// being extended to be a GPR32, but the incoming DAG might be acting on a
599 : /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
600 : /// this is the case.
601 0 : static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
602 : if (N.getValueType() == MVT::i32)
603 0 : return N;
604 :
605 0 : SDLoc dl(N);
606 0 : SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
607 0 : MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
608 0 : dl, MVT::i32, N, SubReg);
609 : return SDValue(Node, 0);
610 : }
611 :
612 0 :
613 : /// SelectArithExtendedRegister - Select a "extended register" operand. This
614 0 : /// operand folds in an extend followed by an optional left shift.
615 : bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
616 : SDValue &Shift) {
617 0 : unsigned ShiftVal = 0;
618 : AArch64_AM::ShiftExtendType Ext;
619 0 :
620 0 : if (N.getOpcode() == ISD::SHL) {
621 : ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
622 : if (!CSD)
623 : return false;
624 : ShiftVal = CSD->getZExtValue();
625 : if (ShiftVal > 4)
626 : return false;
627 396 :
628 : Ext = getExtendTypeForNode(N.getOperand(0));
629 315 : if (Ext == AArch64_AM::InvalidShiftExtend)
630 : return false;
631 :
632 81 : Reg = N.getOperand(0).getOperand(0);
633 81 : } else {
634 : Ext = getExtendTypeForNode(N);
635 81 : if (Ext == AArch64_AM::InvalidShiftExtend)
636 : return false;
637 :
638 : Reg = N.getOperand(0);
639 :
640 : // Don't match if free 32-bit -> 64-bit zext can be used instead.
641 3051 : if (Ext == AArch64_AM::UXTW &&
642 : Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
643 : return false;
644 : }
645 :
646 3051 : // AArch64 mandates that the RHS of the operation must use the smallest
647 : // register class that could contain the size being extended from. Thus,
648 : // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
649 : // there might not be an actual 32-bit value in the program. We can
650 191 : // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
651 191 : assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
652 : Reg = narrowIfNeeded(CurDAG, Reg);
653 : Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
654 159 : MVT::i32);
655 159 : return isWorthFolding(N);
656 : }
657 :
658 154 : /// If there's a use of this ADDlow that's not itself a load/store then we'll
659 : /// need to create a real ADD instruction from it anyway and there's no point in
660 2856 : /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
661 2856 : /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
662 : /// leads to duplicated ADRP instructions.
663 : static bool isWorthFoldingADDlow(SDValue N) {
664 181 : for (auto Use : N->uses()) {
665 : if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
666 : Use->getOpcode() != ISD::ATOMIC_LOAD &&
667 23 : Use->getOpcode() != ISD::ATOMIC_STORE)
668 197 : return false;
669 6 :
670 : // ldar and stlr have much more restrictive addressing modes (just a
671 : // register).
672 : if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering()))
673 : return false;
674 : }
675 :
676 : return true;
677 : }
678 252 :
679 252 : /// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
680 252 : /// immediate" address. The "Size" argument is the size in bytes of the memory
681 252 : /// reference, which determines the scale.
682 : bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
683 : SDValue &Base,
684 : SDValue &OffImm) {
685 : SDLoc dl(N);
686 : const DataLayout &DL = CurDAG->getDataLayout();
687 : const TargetLowering *TLI = getTargetLowering();
688 : if (N.getOpcode() == ISD::FrameIndex) {
689 0 : int FI = cast<FrameIndexSDNode>(N)->getIndex();
690 0 : Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
691 0 : OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
692 0 : return true;
693 : }
694 0 :
695 : // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
696 : // selected here doesn't support labels/immediates, only base+offset.
697 :
698 0 : if (CurDAG->isBaseWithConstantOffset(N)) {
699 0 : if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
700 : int64_t RHSC = RHS->getSExtValue();
701 : unsigned Scale = Log2_32(Size);
702 : if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) &&
703 : RHSC < (0x40 << Scale)) {
704 : Base = N.getOperand(0);
705 : if (Base.getOpcode() == ISD::FrameIndex) {
706 : int FI = cast<FrameIndexSDNode>(Base)->getIndex();
707 : Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
708 0 : }
709 : OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
710 : return true;
711 0 : }
712 0 : }
713 : }
714 0 :
715 0 : // Base only. The address will be materialized into a register before
716 0 : // the memory is accessed.
717 0 : // add x0, Xbase, #offset
718 0 : // stp x1, x2, [x0]
719 : Base = N;
720 : OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
721 : return true;
722 : }
723 :
724 0 : /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
725 : /// immediate" address. The "Size" argument is the size in bytes of the memory
726 0 : /// reference, which determines the scale.
727 : bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
728 0 : SDValue &Base, SDValue &OffImm) {
729 0 : SDLoc dl(N);
730 0 : const DataLayout &DL = CurDAG->getDataLayout();
731 0 : const TargetLowering *TLI = getTargetLowering();
732 0 : if (N.getOpcode() == ISD::FrameIndex) {
733 0 : int FI = cast<FrameIndexSDNode>(N)->getIndex();
734 : Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
735 0 : OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
736 0 : return true;
737 : }
738 :
739 : if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
740 : GlobalAddressSDNode *GAN =
741 : dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
742 : Base = N.getOperand(0);
743 : OffImm = N.getOperand(1);
744 : if (!GAN)
745 0 : return true;
746 0 :
747 0 : if (GAN->getOffset() % Size == 0) {
748 : const GlobalValue *GV = GAN->getGlobal();
749 : unsigned Alignment = GV->getAlignment();
750 : Type *Ty = GV->getValueType();
751 : if (Alignment == 0 && Ty->isSized())
752 : Alignment = DL.getABITypeAlignment(Ty);
753 11097 :
754 : if (Alignment >= Size)
755 : return true;
756 11097 : }
757 : }
758 11097 :
759 881 : if (CurDAG->isBaseWithConstantOffset(N)) {
760 881 : if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
761 881 : int64_t RHSC = (int64_t)RHS->getZExtValue();
762 881 : unsigned Scale = Log2_32(Size);
763 : if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
764 : Base = N.getOperand(0);
765 10216 : if (Base.getOpcode() == ISD::FrameIndex) {
766 : int FI = cast<FrameIndexSDNode>(Base)->getIndex();
767 1503 : Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
768 1503 : }
769 1503 : OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
770 1503 : return true;
771 : }
772 : }
773 1352 : }
774 1351 :
775 1351 : // Before falling back to our general case, check if the unscaled
776 1351 : // instructions can handle this. If so, that's preferable.
777 1351 : if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
778 1119 : return false;
779 :
780 1351 : // Base only. The address will be materialized into a register before
781 : // the memory is accessed.
782 : // add x0, Xbase, #offset
783 : // ldr x0, [x0]
784 : Base = N;
785 8733 : OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
786 : return true;
787 9370 : }
788 :
789 4685 : /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
790 4265 : /// immediate" address. This should only match when there is an offset that
791 8530 : /// is not valid for a scaled immediate addressing mode. The "Size" argument
792 390 : /// is the size in bytes of the memory reference, which is needed here to know
793 780 : /// what is valid for a scaled immediate.
794 : bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
795 4265 : SDValue &Base,
796 4265 : SDValue &OffImm) {
797 : if (!CurDAG->isBaseWithConstantOffset(N))
798 : return false;
799 : if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
800 : int64_t RHSC = RHS->getSExtValue();
801 : // If the offset is valid as a scaled immediate, don't match here.
802 : if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
803 4468 : RHSC < (0x1000 << Log2_32(Size)))
804 : return false;
805 : if (RHSC >= -256 && RHSC < 256) {
806 : Base = N.getOperand(0);
807 : if (Base.getOpcode() == ISD::FrameIndex) {
808 : int FI = cast<FrameIndexSDNode>(Base)->getIndex();
809 : const TargetLowering *TLI = getTargetLowering();
810 4067 : Base = CurDAG->getTargetFrameIndex(
811 4067 : FI, TLI->getPointerTy(CurDAG->getDataLayout()));
812 4067 : }
813 : OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
814 : return true;
815 : }
816 : }
817 : return false;
818 : }
819 :
820 0 : static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
821 : SDLoc dl(N);
822 : SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
823 0 : SDValue ImpDef = SDValue(
824 0 : CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
825 : MachineSDNode *Node = CurDAG->getMachineNode(
826 0 : TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
827 : return SDValue(Node, 0);
828 0 : }
829 0 :
830 0 : /// Check if the given SHL node (\p N), can be used to form an
831 0 : /// extended register for an addressing mode.
832 0 : bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
833 0 : bool WantExtend, SDValue &Offset,
834 0 : SDValue &SignExtend) {
835 : assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
836 0 : ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
837 0 : if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
838 : return false;
839 0 :
840 0 : SDLoc dl(N);
841 : if (WantExtend) {
842 : AArch64_AM::ShiftExtendType Ext =
843 : getExtendTypeForNode(N.getOperand(0), true);
844 : if (Ext == AArch64_AM::InvalidShiftExtend)
845 : return false;
846 11 :
847 : Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
848 11 : SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
849 : MVT::i32);
850 11 : } else {
851 11 : Offset = N.getOperand(0);
852 : SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
853 11 : }
854 :
855 : unsigned LegalShiftVal = Log2_32(Size);
856 : unsigned ShiftVal = CSD->getZExtValue();
857 :
858 471 : if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
859 : return false;
860 :
861 : return isWorthFolding(N);
862 : }
863 942 :
864 : bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
865 : SDValue &Base, SDValue &Offset,
866 : SDValue &SignExtend,
867 471 : SDValue &DoShift) {
868 : if (N.getOpcode() != ISD::ADD)
869 264 : return false;
870 264 : SDValue LHS = N.getOperand(0);
871 : SDValue RHS = N.getOperand(1);
872 : SDLoc dl(N);
873 118 :
874 118 : // We don't want to match immediate adds here, because they are better lowered
875 59 : // to the register-immediate addressing modes.
876 : if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
877 207 : return false;
878 207 :
879 : // Check if this particular node is reused in any non-memory related
880 : // operation. If yes, do not try to fold this node into the address
881 : // computation, since the computation will be kept.
882 266 : const SDNode *Node = N.getNode();
883 : for (SDNode *UI : Node->uses()) {
884 266 : if (!isa<MemSDNode>(*UI))
885 : return false;
886 : }
887 241 :
888 : // Remember if it is worth folding N when it produces extended register.
889 : bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
890 0 :
891 : // Try to match a shifted extend on the RHS.
892 : if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
893 : SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
894 0 : Base = LHS;
895 0 : DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
896 0 : return true;
897 0 : }
898 0 :
899 : // Try to match a shifted extend on the LHS.
900 : if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
901 : SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
902 : Base = RHS;
903 0 : DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
904 : return true;
905 : }
906 :
907 : // There was no shift, whatever else we find.
908 : DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
909 0 :
910 0 : AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
911 0 : // Try to match an unshifted extend on the LHS.
912 : if (IsExtendedRegisterWorthFolding &&
913 : (Ext = getExtendTypeForNode(LHS, true)) !=
914 : AArch64_AM::InvalidShiftExtend) {
915 0 : Base = RHS;
916 : Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
917 : SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
918 0 : MVT::i32);
919 0 : if (isWorthFolding(LHS))
920 0 : return true;
921 0 : }
922 0 :
923 : // Try to match an unshifted extend on the RHS.
924 : if (IsExtendedRegisterWorthFolding &&
925 : (Ext = getExtendTypeForNode(RHS, true)) !=
926 0 : AArch64_AM::InvalidShiftExtend) {
927 0 : Base = LHS;
928 0 : Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
929 0 : SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
930 0 : MVT::i32);
931 : if (isWorthFolding(RHS))
932 : return true;
933 : }
934 0 :
935 : return false;
936 : }
937 :
938 0 : // Check if the given immediate is preferred by ADD. If an immediate can be
939 0 : // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
940 : // encoded by one MOVZ, return true.
941 0 : static bool isPreferredADD(int64_t ImmOff) {
942 0 : // Constant in [0x0, 0xfff] can be encoded in ADD.
943 0 : if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
944 0 : return true;
945 0 : // Check if it can be encoded in an "ADD LSL #12".
946 0 : if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
947 : // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
948 : return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
949 : (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
950 0 : return false;
951 0 : }
952 :
953 0 : bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
954 0 : SDValue &Base, SDValue &Offset,
955 0 : SDValue &SignExtend,
956 0 : SDValue &DoShift) {
957 0 : if (N.getOpcode() != ISD::ADD)
958 0 : return false;
959 : SDValue LHS = N.getOperand(0);
960 : SDValue RHS = N.getOperand(1);
961 : SDLoc DL(N);
962 :
963 : // Check if this particular node is reused in any non-memory related
964 : // operation. If yes, do not try to fold this node into the address
965 : // computation, since the computation will be kept.
966 : const SDNode *Node = N.getNode();
967 : for (SDNode *UI : Node->uses()) {
968 : if (!isa<MemSDNode>(*UI))
969 457 : return false;
970 : }
971 :
972 366 : // Watch out if RHS is a wide immediate, it can not be selected into
973 : // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
974 16 : // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
975 15 : // instructions like:
976 : // MOV X0, WideImmediate
977 : // ADD X1, BaseReg, X0
978 : // LDR X2, [X1, 0]
979 11503 : // For such situation, using [BaseReg, XReg] addressing mode can save one
980 : // ADD/SUB:
981 : // MOV X0, WideImmediate
982 : // LDR X2, [BaseReg, X0]
983 11503 : if (isa<ConstantSDNode>(RHS)) {
984 : int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
985 5060 : unsigned Scale = Log2_32(Size);
986 5060 : // Skip the immediate can be selected by load/store addressing mode.
987 : // Also skip the immediate can be encoded by a single ADD (SUB is also
988 : // checked by using -ImmOff).
989 : if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
990 : isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
991 : return false;
992 :
993 11324 : SDValue Ops[] = { RHS };
994 6313 : SDNode *MOVI =
995 : CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
996 : SDValue MOVIV = SDValue(MOVI, 0);
997 : // This ADD of two X register will be selected into [Reg+Reg] mode.
998 : N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
999 : }
1000 :
1001 : // Remember if it is worth folding N when it produces extended register.
1002 : bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1003 :
1004 : // Try to match a shifted extend on the RHS.
1005 : if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1006 : SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1007 : Base = LHS;
1008 : DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1009 : return true;
1010 9388 : }
1011 :
1012 : // Try to match a shifted extend on the LHS.
1013 : if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1014 : SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1015 4552 : Base = RHS;
1016 5025 : DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1017 4669 : return true;
1018 : }
1019 25 :
1020 : // Match any non-shifted, non-extend, non-immediate add expression.
1021 50 : Base = LHS;
1022 : Offset = RHS;
1023 : SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1024 50 : DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1025 : // Reg1 + Reg2 is free: no check needed.
1026 : return true;
1027 : }
1028 342 :
1029 : SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1030 : static const unsigned RegClassIDs[] = {
1031 517 : AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1032 175 : static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1033 119 : AArch64::dsub2, AArch64::dsub3};
1034 119 :
1035 119 : return createTuple(Regs, RegClassIDs, SubRegs);
1036 : }
1037 :
1038 : SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1039 255 : static const unsigned RegClassIDs[] = {
1040 32 : AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1041 32 : static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1042 32 : AArch64::qsub2, AArch64::qsub3};
1043 32 :
1044 : return createTuple(Regs, RegClassIDs, SubRegs);
1045 : }
1046 :
1047 191 : SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1048 191 : const unsigned RegClassIDs[],
1049 191 : const unsigned SubRegs[]) {
1050 191 : // There's no special register-class for a vector-list of 1 element: it's just
1051 : // a vector.
1052 191 : if (Regs.size() == 1)
1053 : return Regs[0];
1054 :
1055 : assert(Regs.size() >= 2 && Regs.size() <= 4);
1056 :
1057 : SDLoc DL(Regs[0]);
1058 :
1059 : SmallVector<SDValue, 4> Ops;
1060 :
1061 149 : // First operand of REG_SEQUENCE is the desired RegClass.
1062 : Ops.push_back(
1063 : CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1064 :
1065 : // Then we get pairs of source & subregister-position for the components.
1066 : for (unsigned i = 0; i < Regs.size(); ++i) {
1067 : Ops.push_back(Regs[i]);
1068 : Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1069 : }
1070 385 :
1071 : SDNode *N =
1072 : CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1073 0 : return SDValue(N, 0);
1074 : }
1075 :
1076 : void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1077 : bool isExt) {
1078 0 : SDLoc dl(N);
1079 0 : EVT VT = N->getValueType(0);
1080 :
1081 : unsigned ExtOff = isExt;
1082 :
1083 0 : // Form a REG_SEQUENCE to force register allocation.
1084 : unsigned Vec0Off = ExtOff + 1;
1085 : SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1086 : N->op_begin() + Vec0Off + NumVecs);
1087 : SDValue RegSeq = createQTuple(Regs);
1088 0 :
1089 0 : SmallVector<SDValue, 6> Ops;
1090 : if (isExt)
1091 : Ops.push_back(N->getOperand(1));
1092 0 : Ops.push_back(RegSeq);
1093 0 : Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1094 0 : ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1095 : }
1096 :
1097 : bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1098 0 : LoadSDNode *LD = cast<LoadSDNode>(N);
1099 0 : if (LD->isUnindexed())
1100 : return false;
1101 : EVT VT = LD->getMemoryVT();
1102 14 : EVT DstVT = N->getValueType(0);
1103 : ISD::MemIndexedMode AM = LD->getAddressingMode();
1104 : bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1105 14 :
1106 : // We're not doing validity checking here. That was done when checking
1107 14 : // if we should mark the load as indexed or not. We're just selecting
1108 : // the right instruction.
1109 : unsigned Opcode = 0;
1110 14 :
1111 14 : ISD::LoadExtType ExtType = LD->getExtensionType();
1112 14 : bool InsertTo64 = false;
1113 14 : if (VT == MVT::i64)
1114 : Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1115 : else if (VT == MVT::i32) {
1116 14 : if (ExtType == ISD::NON_EXTLOAD)
1117 12 : Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1118 14 : else if (ExtType == ISD::SEXTLOAD)
1119 28 : Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1120 28 : else {
1121 14 : Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1122 : InsertTo64 = true;
1123 6792 : // The result of the load is only i32. It's the subreg_to_reg that makes
1124 : // it into an i64.
1125 6792 : DstVT = MVT::i32;
1126 : }
1127 49 : } else if (VT == MVT::i16) {
1128 98 : if (ExtType == ISD::SEXTLOAD) {
1129 : if (DstVT == MVT::i64)
1130 49 : Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1131 : else
1132 : Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1133 : } else {
1134 : Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1135 : InsertTo64 = DstVT == MVT::i64;
1136 : // The result of the load is only i32. It's the subreg_to_reg that makes
1137 : // it into an i64.
1138 : DstVT = MVT::i32;
1139 : }
1140 3 : } else if (VT == MVT::i8) {
1141 : if (ExtType == ISD::SEXTLOAD) {
1142 8 : if (DstVT == MVT::i64)
1143 7 : Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1144 1 : else
1145 1 : Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1146 : } else {
1147 0 : Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1148 : InsertTo64 = DstVT == MVT::i64;
1149 : // The result of the load is only i32. It's the subreg_to_reg that makes
1150 : // it into an i64.
1151 0 : DstVT = MVT::i32;
1152 : }
1153 : } else if (VT == MVT::f16) {
1154 4 : Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1155 : } else if (VT == MVT::f32) {
1156 1 : Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1157 : } else if (VT == MVT::f64 || VT.is64BitVector()) {
1158 1 : Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1159 : } else if (VT.is128BitVector()) {
1160 2 : Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1161 : } else
1162 : return false;
1163 : SDValue Chain = LD->getChain();
1164 2 : SDValue Base = LD->getBasePtr();
1165 : ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1166 : int OffsetVal = (int)OffsetOp->getZExtValue();
1167 5 : SDLoc dl(N);
1168 : SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1169 1 : SDValue Ops[] = { Base, Offset, Chain };
1170 : SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1171 1 : MVT::Other, Ops);
1172 : // Either way, we're replacing the node, so tell the caller that.
1173 3 : SDValue LoadedVal = SDValue(Res, 1);
1174 : if (InsertTo64) {
1175 : SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1176 : LoadedVal =
1177 3 : SDValue(CurDAG->getMachineNode(
1178 : AArch64::SUBREG_TO_REG, dl, MVT::i64,
1179 : CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1180 1 : SubReg),
1181 : 0);
1182 1 : }
1183 25 :
1184 14 : ReplaceUses(SDValue(N, 0), LoadedVal);
1185 13 : ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1186 13 : ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1187 : CurDAG->RemoveDeadNode(N);
1188 : return true;
1189 49 : }
1190 49 :
1191 : void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1192 98 : unsigned SubRegIdx) {
1193 : SDLoc dl(N);
1194 49 : EVT VT = N->getValueType(0);
1195 49 : SDValue Chain = N->getOperand(0);
1196 98 :
1197 : SDValue Ops[] = {N->getOperand(2), // Mem operand;
1198 : Chain};
1199 :
1200 49 : const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1201 2 :
1202 : SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1203 2 : SDValue SuperReg = SDValue(Ld, 0);
1204 : for (unsigned i = 0; i < NumVecs; ++i)
1205 : ReplaceUses(SDValue(N, i),
1206 : CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1207 :
1208 : ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1209 :
1210 49 : // Transfer memoperands.
1211 49 : MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1212 49 : CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1213 49 :
1214 : CurDAG->RemoveDeadNode(N);
1215 : }
1216 :
1217 120 : void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1218 : unsigned Opc, unsigned SubRegIdx) {
1219 : SDLoc dl(N);
1220 120 : EVT VT = N->getValueType(0);
1221 120 : SDValue Chain = N->getOperand(0);
1222 :
1223 : SDValue Ops[] = {N->getOperand(1), // Mem operand
1224 120 : N->getOperand(2), // Incremental
1225 : Chain};
1226 120 :
1227 : const EVT ResTys[] = {MVT::i64, // Type of the write back register
1228 240 : MVT::Untyped, MVT::Other};
1229 :
1230 475 : SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1231 710 :
1232 355 : // Update uses of write back register
1233 : ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1234 120 :
1235 : // Update uses of vector list
1236 : SDValue SuperReg = SDValue(Ld, 1);
1237 120 : if (NumVecs == 1)
1238 240 : ReplaceUses(SDValue(N, 0), SuperReg);
1239 : else
1240 120 : for (unsigned i = 0; i < NumVecs; ++i)
1241 120 : ReplaceUses(SDValue(N, i),
1242 : CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1243 236 :
1244 : // Update the chain
1245 : ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1246 236 : CurDAG->RemoveDeadNode(N);
1247 236 : }
1248 :
1249 : void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1250 : unsigned Opc) {
1251 236 : SDLoc dl(N);
1252 : EVT VT = N->getOperand(2)->getValueType(0);
1253 236 :
1254 : // Form a REG_SEQUENCE to force register allocation.
1255 : bool Is128Bit = VT.getSizeInBits() == 128;
1256 472 : SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1257 : SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1258 :
1259 472 : SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1260 : SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1261 :
1262 : // Transfer memoperands.
1263 236 : MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1264 40 : CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1265 :
1266 864 : ReplaceNode(N, St);
1267 1296 : }
1268 648 :
1269 : void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1270 : unsigned Opc) {
1271 236 : SDLoc dl(N);
1272 236 : EVT VT = N->getOperand(2)->getValueType(0);
1273 236 : const EVT ResTys[] = {MVT::i64, // Type of the write back register
1274 : MVT::Other}; // Type for the Chain
1275 149 :
1276 : // Form a REG_SEQUENCE to force register allocation.
1277 : bool Is128Bit = VT.getSizeInBits() == 128;
1278 149 : SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1279 : SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1280 :
1281 149 : SDValue Ops[] = {RegSeq,
1282 149 : N->getOperand(NumVecs + 1), // base register
1283 298 : N->getOperand(NumVecs + 2), // Incremental
1284 : N->getOperand(0)}; // Chain
1285 149 : SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1286 447 :
1287 : ReplaceNode(N, St);
1288 : }
1289 149 :
1290 298 : namespace {
1291 : /// WidenVector - Given a value in the V64 register class, produce the
1292 149 : /// equivalent value in the V128 register class.
1293 149 : class WidenVector {
1294 : SelectionDAG &DAG;
1295 144 :
1296 : public:
1297 : WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1298 144 :
1299 144 : SDValue operator()(SDValue V64Reg) {
1300 : EVT VT = V64Reg.getValueType();
1301 : unsigned NarrowSize = VT.getVectorNumElements();
1302 : MVT EltTy = VT.getVectorElementType().getSimpleVT();
1303 144 : MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1304 144 : SDLoc DL(V64Reg);
1305 288 :
1306 : SDValue Undef =
1307 : SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1308 144 : return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1309 144 : }
1310 288 : };
1311 288 : } // namespace
1312 :
1313 144 : /// NarrowVector - Given a value in the V128 register class, produce the
1314 144 : /// equivalent value in the V64 register class.
1315 : static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1316 : EVT VT = V128Reg.getValueType();
1317 : unsigned WideSize = VT.getVectorNumElements();
1318 : MVT EltTy = VT.getVectorElementType().getSimpleVT();
1319 : MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1320 :
1321 : return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1322 : V128Reg);
1323 85 : }
1324 :
1325 0 : void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1326 0 : unsigned Opc) {
1327 : SDLoc dl(N);
1328 0 : EVT VT = N->getValueType(0);
1329 0 : bool Narrow = VT.getSizeInBits() == 64;
1330 0 :
1331 : // Form a REG_SEQUENCE to force register allocation.
1332 : SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1333 0 :
1334 0 : if (Narrow)
1335 : transform(Regs, Regs.begin(),
1336 : WidenVector(*CurDAG));
1337 :
1338 : SDValue RegSeq = createQTuple(Regs);
1339 :
1340 : const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1341 122 :
1342 122 : unsigned LaneNo =
1343 : cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1344 122 :
1345 122 : SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1346 : N->getOperand(NumVecs + 3), N->getOperand(0)};
1347 122 : SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1348 122 : SDValue SuperReg = SDValue(Ld, 0);
1349 :
1350 : EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1351 26 : static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1352 : AArch64::qsub2, AArch64::qsub3 };
1353 : for (unsigned i = 0; i < NumVecs; ++i) {
1354 26 : SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1355 26 : if (Narrow)
1356 : NV = NarrowVector(NV, *CurDAG);
1357 : ReplaceUses(SDValue(N, i), NV);
1358 26 : }
1359 :
1360 26 : ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1361 5 : CurDAG->RemoveDeadNode(N);
1362 5 : }
1363 :
1364 26 : void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1365 : unsigned Opc) {
1366 26 : SDLoc dl(N);
1367 : EVT VT = N->getValueType(0);
1368 : bool Narrow = VT.getSizeInBits() == 64;
1369 52 :
1370 : // Form a REG_SEQUENCE to force register allocation.
1371 52 : SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1372 26 :
1373 52 : if (Narrow)
1374 : transform(Regs, Regs.begin(),
1375 : WidenVector(*CurDAG));
1376 52 :
1377 : SDValue RegSeq = createQTuple(Regs);
1378 :
1379 102 : const EVT ResTys[] = {MVT::i64, // Type of the write back register
1380 76 : RegSeq->getValueType(0), MVT::Other};
1381 76 :
1382 13 : unsigned LaneNo =
1383 152 : cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1384 :
1385 : SDValue Ops[] = {RegSeq,
1386 26 : CurDAG->getTargetConstant(LaneNo, dl,
1387 26 : MVT::i64), // Lane Number
1388 26 : N->getOperand(NumVecs + 2), // Base register
1389 : N->getOperand(NumVecs + 3), // Incremental
1390 94 : N->getOperand(0)};
1391 : SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1392 :
1393 94 : // Update uses of the write back register
1394 94 : ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1395 :
1396 : // Update uses of the vector list
1397 94 : SDValue SuperReg = SDValue(Ld, 1);
1398 : if (NumVecs == 1) {
1399 94 : ReplaceUses(SDValue(N, 0),
1400 37 : Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1401 37 : } else {
1402 : EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1403 94 : static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1404 : AArch64::qsub2, AArch64::qsub3 };
1405 : for (unsigned i = 0; i < NumVecs; ++i) {
1406 94 : SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1407 : SuperReg);
1408 : if (Narrow)
1409 188 : NV = NarrowVector(NV, *CurDAG);
1410 : ReplaceUses(SDValue(N, i), NV);
1411 : }
1412 94 : }
1413 94 :
1414 94 : // Update the Chain
1415 94 : ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1416 94 : CurDAG->RemoveDeadNode(N);
1417 188 : }
1418 :
1419 : void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1420 188 : unsigned Opc) {
1421 : SDLoc dl(N);
1422 : EVT VT = N->getOperand(2)->getValueType(0);
1423 : bool Narrow = VT.getSizeInBits() == 64;
1424 94 :
1425 45 : // Form a REG_SEQUENCE to force register allocation.
1426 1 : SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1427 :
1428 144 : if (Narrow)
1429 : transform(Regs, Regs.begin(),
1430 : WidenVector(*CurDAG));
1431 288 :
1432 216 : SDValue RegSeq = createQTuple(Regs);
1433 216 :
1434 216 : unsigned LaneNo =
1435 108 : cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1436 432 :
1437 : SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1438 : N->getOperand(NumVecs + 3), N->getOperand(0)};
1439 : SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1440 :
1441 94 : // Transfer memoperands.
1442 94 : MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1443 94 : CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1444 :
1445 35 : ReplaceNode(N, St);
1446 : }
1447 :
1448 35 : void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1449 35 : unsigned Opc) {
1450 : SDLoc dl(N);
1451 : EVT VT = N->getOperand(2)->getValueType(0);
1452 35 : bool Narrow = VT.getSizeInBits() == 64;
1453 :
1454 35 : // Form a REG_SEQUENCE to force register allocation.
1455 7 : SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1456 7 :
1457 : if (Narrow)
1458 35 : transform(Regs, Regs.begin(),
1459 : WidenVector(*CurDAG));
1460 :
1461 70 : SDValue RegSeq = createQTuple(Regs);
1462 :
1463 70 : const EVT ResTys[] = {MVT::i64, // Type of the write back register
1464 35 : MVT::Other};
1465 70 :
1466 : unsigned LaneNo =
1467 : cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1468 35 :
1469 70 : SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1470 : N->getOperand(NumVecs + 2), // Base Register
1471 35 : N->getOperand(NumVecs + 3), // Incremental
1472 35 : N->getOperand(0)};
1473 : SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1474 72 :
1475 : // Transfer memoperands.
1476 : MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1477 72 : CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1478 72 :
1479 : ReplaceNode(N, St);
1480 : }
1481 72 :
1482 : static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
1483 72 : unsigned &Opc, SDValue &Opd0,
1484 36 : unsigned &LSB, unsigned &MSB,
1485 36 : unsigned NumberOfIgnoredLowBits,
1486 : bool BiggerPattern) {
1487 72 : assert(N->getOpcode() == ISD::AND &&
1488 : "N must be a AND operation to call this function");
1489 72 :
1490 : EVT VT = N->getValueType(0);
1491 :
1492 : // Here we can test the type of VT and return false when the type does not
1493 144 : // match, but since it is done prior to that call in the current context
1494 : // we turned that into an assert to avoid redundant code.
1495 144 : assert((VT == MVT::i32 || VT == MVT::i64) &&
1496 72 : "Type checking must have been done before calling this function");
1497 72 :
1498 72 : // FIXME: simplify-demanded-bits in DAGCombine will probably have
1499 144 : // changed the AND node to a 32-bit mask operation. We'll have to
1500 : // undo that as part of the transform here if we want to catch all
1501 : // the opportunities.
1502 72 : // Currently the NumberOfIgnoredLowBits argument helps to recover
1503 144 : // form these situations when matching bigger pattern (bitfield insert).
1504 :
1505 72 : // For unsigned extracts, check for a shift right and mask
1506 72 : uint64_t AndImm = 0;
1507 : if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
1508 1184 : return false;
1509 :
1510 : const SDNode *Op0 = N->getOperand(0).getNode();
1511 :
1512 : // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1513 : // simplified. Try to undo that
1514 : AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
1515 :
1516 2368 : // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1517 : if (AndImm & (AndImm + 1))
1518 : return false;
1519 :
1520 : bool ClampMSB = false;
1521 : uint64_t SrlImm = 0;
1522 : // Handle the SRL + ANY_EXTEND case.
1523 : if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1524 : isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
1525 : // Extend the incoming operand of the SRL to 64-bit.
1526 : Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1527 : // Make sure to clamp the MSB so that we preserve the semantics of the
1528 : // original operations.
1529 : ClampMSB = true;
1530 : } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1531 : isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
1532 : SrlImm)) {
1533 1184 : // If the shift result was truncated, we can still combine them.
1534 : Opd0 = Op0->getOperand(0).getOperand(0);
1535 :
1536 851 : // Use the type of SRL node.
1537 : VT = Opd0->getValueType(0);
1538 : } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
1539 : Opd0 = Op0->getOperand(0);
1540 851 : } else if (BiggerPattern) {
1541 : // Let's pretend a 0 shift right has been performed.
1542 : // The resulting code will be at least as good as the original one
1543 851 : // plus it may expose more opportunities for bitfield insert pattern.
1544 : // FIXME: Currently we limit this to the bigger pattern, because
1545 : // some optimizations expect AND and not UBFM.
1546 : Opd0 = N->getOperand(0);
1547 : } else
1548 : return false;
1549 88 :
1550 23 : // Bail out on large immediates. This happens when no proper
1551 : // combining/constant folding was performed.
1552 10 : if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
1553 : LLVM_DEBUG(
1554 : (dbgs() << N
1555 : << ": Found large shift immediate, this should not happen\n"));
1556 407 : return false;
1557 5 : }
1558 :
1559 : LSB = SrlImm;
1560 4 : MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
1561 : : countTrailingOnes<uint64_t>(AndImm)) -
1562 : 1;
1563 8 : if (ClampMSB)
1564 : // Since we're moving the extend before the right shift operation, we need
1565 38 : // to clamp the MSB to make sure we don't shift in undefined bits instead of
1566 443 : // the zeros which would get shifted in with the original right shift
1567 : // operation.
1568 : MSB = MSB > 31 ? 31 : MSB;
1569 :
1570 : Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1571 : return true;
1572 13 : }
1573 :
1574 : static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
1575 : SDValue &Opd0, unsigned &Immr,
1576 : unsigned &Imms) {
1577 : assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
1578 65 :
1579 : EVT VT = N->getValueType(0);
1580 : unsigned BitWidth = VT.getSizeInBits();
1581 : assert((VT == MVT::i32 || VT == MVT::i64) &&
1582 : "Type checking must have been done before calling this function");
1583 :
1584 : SDValue Op = N->getOperand(0);
1585 65 : if (Op->getOpcode() == ISD::TRUNCATE) {
1586 94 : Op = Op->getOperand(0);
1587 65 : VT = Op->getValueType(0);
1588 : BitWidth = VT.getSizeInBits();
1589 65 : }
1590 :
1591 : uint64_t ShiftImm;
1592 : if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
1593 : !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1594 10 : return false;
1595 :
1596 65 : unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1597 65 : if (ShiftImm + Width > BitWidth)
1598 : return false;
1599 :
1600 251 : Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
1601 : Opd0 = Op.getOperand(0);
1602 : Immr = ShiftImm;
1603 : Imms = ShiftImm + Width - 1;
1604 : return true;
1605 251 : }
1606 251 :
1607 : static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
1608 : SDValue &Opd0, unsigned &LSB,
1609 : unsigned &MSB) {
1610 251 : // We are looking for the following pattern which basically extracts several
1611 251 : // continuous bits from the source value and places it from the LSB of the
1612 11 : // destination value, all other bits of the destination value or set to zero:
1613 11 : //
1614 11 : // Value2 = AND Value, MaskImm
1615 : // SRL Value2, ShiftImm
1616 : //
1617 : // with MaskImm >> ShiftImm to search for the bit width.
1618 495 : //
1619 : // This gets selected into a single UBFM:
1620 : //
1621 : // UBFM Value, ShiftImm, BitWide + SrlImm -1
1622 7 : //
1623 7 :
1624 : if (N->getOpcode() != ISD::SRL)
1625 : return false;
1626 7 :
1627 7 : uint64_t AndMask = 0;
1628 7 : if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
1629 7 : return false;
1630 7 :
1631 : Opd0 = N->getOperand(0).getOperand(0);
1632 :
1633 348 : uint64_t SrlImm = 0;
1634 : if (!isIntImmediate(N->getOperand(1), SrlImm))
1635 : return false;
1636 :
1637 : // Check whether we really have several bits extract here.
1638 : unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
1639 : if (BitWide && isMask_64(AndMask >> SrlImm)) {
1640 : if (N->getValueType(0) == MVT::i32)
1641 : Opc = AArch64::UBFMWri;
1642 : else
1643 : Opc = AArch64::UBFMXri;
1644 :
1645 : LSB = SrlImm;
1646 : MSB = BitWide + SrlImm - 1;
1647 : return true;
1648 : }
1649 :
1650 348 : return false;
1651 : }
1652 :
1653 : static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1654 307 : unsigned &Immr, unsigned &Imms,
1655 : bool BiggerPattern) {
1656 : assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1657 30 : "N must be a SHR/SRA operation to call this function");
1658 :
1659 : EVT VT = N->getValueType(0);
1660 60 :
1661 : // Here we can test the type of VT and return false when the type does not
1662 : // match, but since it is done prior to that call in the current context
1663 : // we turned that into an assert to avoid redundant code.
1664 15 : assert((VT == MVT::i32 || VT == MVT::i64) &&
1665 15 : "Type checking must have been done before calling this function");
1666 12 :
1667 12 : // Check for AND + SRL doing several bits extract.
1668 : if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
1669 0 : return true;
1670 :
1671 12 : // We're looking for a shift of a shift.
1672 12 : uint64_t ShlImm = 0;
1673 12 : uint64_t TruncBits = 0;
1674 : if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
1675 : Opd0 = N->getOperand(0).getOperand(0);
1676 : } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1677 : N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1678 : // We are looking for a shift of truncate. Truncate from i64 to i32 could
1679 348 : // be considered as setting high 32 bits as zero. Our strategy here is to
1680 : // always generate 64bit UBFM. This consistency will help the CSE pass
1681 : // later find more redundancy.
1682 : Opd0 = N->getOperand(0).getOperand(0);
1683 : TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1684 : VT = Opd0.getValueType();
1685 348 : assert(VT == MVT::i64 && "the promoted type should be i64");
1686 : } else if (BiggerPattern) {
1687 : // Let's pretend a 0 shift left has been performed.
1688 : // FIXME: Currently we limit this to the bigger pattern case,
1689 : // because some optimizations expect AND and not UBFM
1690 : Opd0 = N->getOperand(0);
1691 : } else
1692 : return false;
1693 :
1694 348 : // Missing combines/constant folding may have left us with strange
1695 : // constants.
1696 : if (ShlImm >= VT.getSizeInBits()) {
1697 : LLVM_DEBUG(
1698 : (dbgs() << N
1699 : << ": Found large shift immediate, this should not happen\n"));
1700 336 : return false;
1701 15 : }
1702 130 :
1703 : uint64_t SrlImm = 0;
1704 : if (!isIntImmediate(N->getOperand(1), SrlImm))
1705 : return false;
1706 :
1707 : assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
1708 2 : "bad amount in shift node!");
1709 2 : int immr = SrlImm - ShlImm;
1710 4 : Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
1711 : Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
1712 319 : // SRA requires a signed extraction
1713 : if (VT == MVT::i32)
1714 : Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1715 : else
1716 12 : Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1717 : return true;
1718 : }
1719 :
1720 : bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
1721 : assert(N->getOpcode() == ISD::SIGN_EXTEND);
1722 29 :
1723 : EVT VT = N->getValueType(0);
1724 : EVT NarrowVT = N->getOperand(0)->getValueType(0);
1725 : if (VT != MVT::i64 || NarrowVT != MVT::i32)
1726 : return false;
1727 :
1728 : uint64_t ShiftImm;
1729 : SDValue Op = N->getOperand(0);
1730 58 : if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1731 : return false;
1732 :
1733 : SDLoc dl(N);
1734 : // Extend the incoming operand of the shift to 64-bits.
1735 20 : SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
1736 20 : unsigned Immr = ShiftImm;
1737 20 : unsigned Imms = NarrowVT.getSizeInBits() - 1;
1738 : SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1739 : CurDAG->getTargetConstant(Imms, dl, VT)};
1740 16 : CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
1741 : return true;
1742 16 : }
1743 :
1744 : static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
1745 : SDValue &Opd0, unsigned &Immr, unsigned &Imms,
1746 0 : unsigned NumberOfIgnoredLowBits = 0,
1747 : bool BiggerPattern = false) {
1748 : if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
1749 0 : return false;
1750 0 :
1751 0 : switch (N->getOpcode()) {
1752 0 : default:
1753 : if (!N->isMachineOpcode())
1754 : return false;
1755 : break;
1756 0 : case ISD::AND:
1757 0 : return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
1758 : NumberOfIgnoredLowBits, BiggerPattern);
1759 : case ISD::SRL:
1760 : case ISD::SRA:
1761 0 : return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
1762 :
1763 0 : case ISD::SIGN_EXTEND_INREG:
1764 0 : return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
1765 0 : }
1766 0 :
1767 : unsigned NOpc = N->getMachineOpcode();
1768 : switch (NOpc) {
1769 : default:
1770 2338 : return false;
1771 : case AArch64::SBFMWri:
1772 : case AArch64::UBFMWri:
1773 : case AArch64::SBFMXri:
1774 2338 : case AArch64::UBFMXri:
1775 114 : Opc = NOpc;
1776 : Opd0 = N->getOperand(0);
1777 4448 : Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
1778 441 : Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
1779 441 : return true;
1780 : }
1781 : // Unreachable
1782 1184 : return false;
1783 1184 : }
1784 1184 :
1785 348 : bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
1786 : unsigned Opc, Immr, Imms;
1787 348 : SDValue Opd0;
1788 : if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
1789 251 : return false;
1790 251 :
1791 : EVT VT = N->getValueType(0);
1792 : SDLoc dl(N);
1793 :
1794 6 : // If the bit extract operation is 64bit but the original type is 32bit, we
1795 : // need to add one EXTRACT_SUBREG.
1796 : if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
1797 0 : SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
1798 : CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
1799 :
1800 : SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
1801 0 : SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1802 0 : ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
1803 0 : MVT::i32, SDValue(BFM, 0), SubReg));
1804 0 : return true;
1805 0 : }
1806 :
1807 : SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1808 : CurDAG->getTargetConstant(Imms, dl, VT)};
1809 : CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1810 : return true;
1811 1507 : }
1812 :
1813 1507 : /// Does DstMask form a complementary pair with the mask provided by
1814 1507 : /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
1815 : /// this asks whether DstMask zeroes precisely those bits that will be set by
1816 : /// the other half.
1817 134 : static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
1818 : unsigned NumberOfIgnoredHighBits, EVT VT) {
1819 : assert((VT == MVT::i32 || VT == MVT::i64) &&
1820 : "i32 or i64 mask type expected!");
1821 : unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
1822 67 :
1823 14 : APInt SignificantDstMask = APInt(BitWidth, DstMask);
1824 7 : APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
1825 :
1826 14 : return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
1827 7 : (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
1828 14 : }
1829 :
1830 : // Look for bits that will be useful for later uses.
1831 : // A bit is consider useless as soon as it is dropped and never used
1832 : // before it as been dropped.
1833 120 : // E.g., looking for useful bit of x
1834 60 : // 1. y = x & 0x7
1835 120 : // 2. z = y >> 2
1836 60 : // After #1, x useful bits are 0x7, then the useful bits of x, live through
1837 : // y.
1838 : // After #2, the useful bits of x are 0x4.
1839 : // However, if x is used on an unpredicatable instruction, then all its bits
1840 : // are useful.
1841 : // E.g.
1842 : // 1. y = x & 0x7
1843 39 : // 2. z = y >> 2
1844 : // 3. str x, [@x]
1845 : static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
1846 :
1847 39 : static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
1848 : unsigned Depth) {
1849 : uint64_t Imm =
1850 39 : cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1851 : Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
1852 117 : UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
1853 117 : getUsefulBits(Op, UsefulBits, Depth + 1);
1854 : }
1855 :
1856 : static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
1857 : uint64_t Imm, uint64_t MSB,
1858 : unsigned Depth) {
1859 : // inherit the bitwidth value
1860 : APInt OpUsefulBits(UsefulBits);
1861 : OpUsefulBits = 1;
1862 :
1863 : if (MSB >= Imm) {
1864 : OpUsefulBits <<= MSB - Imm + 1;
1865 : --OpUsefulBits;
1866 : // The interesting part will be in the lower part of the result
1867 : getUsefulBits(Op, OpUsefulBits, Depth + 1);
1868 : // The interesting part was starting at Imm in the argument
1869 : OpUsefulBits <<= Imm;
1870 : } else {
1871 : OpUsefulBits <<= MSB + 1;
1872 : --OpUsefulBits;
1873 10 : // The interesting part will be shifted in the result
1874 : OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
1875 : getUsefulBits(Op, OpUsefulBits, Depth + 1);
1876 20 : // The interesting part was at zero in the argument
1877 10 : OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
1878 10 : }
1879 10 :
1880 10 : UsefulBits &= OpUsefulBits;
1881 : }
1882 20 :
1883 : static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
1884 : unsigned Depth) {
1885 : uint64_t Imm =
1886 : cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1887 20 : uint64_t MSB =
1888 : cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1889 20 :
1890 8 : getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1891 8 : }
1892 :
1893 8 : static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
1894 : unsigned Depth) {
1895 8 : uint64_t ShiftTypeAndValue =
1896 : cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1897 12 : APInt Mask(UsefulBits);
1898 12 : Mask.clearAllBits();
1899 : Mask.flipAllBits();
1900 12 :
1901 12 : if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
1902 : // Shift Left
1903 12 : uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1904 : Mask <<= ShiftAmt;
1905 : getUsefulBits(Op, Mask, Depth + 1);
1906 : Mask.lshrInPlace(ShiftAmt);
1907 20 : } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
1908 : // Shift Right
1909 20 : // We do not handle AArch64_AM::ASR, because the sign will change the
1910 : // number of useful bits
1911 : uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1912 20 : Mask.lshrInPlace(ShiftAmt);
1913 : getUsefulBits(Op, Mask, Depth + 1);
1914 20 : Mask <<= ShiftAmt;
1915 : } else
1916 20 : return;
1917 20 :
1918 : UsefulBits &= Mask;
1919 0 : }
1920 :
1921 : static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
1922 0 : unsigned Depth) {
1923 : uint64_t Imm =
1924 0 : cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1925 0 : uint64_t MSB =
1926 : cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
1927 0 :
1928 : APInt OpUsefulBits(UsefulBits);
1929 : OpUsefulBits = 1;
1930 0 :
1931 0 : APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
1932 : ResultUsefulBits.flipAllBits();
1933 0 : APInt Mask(UsefulBits.getBitWidth(), 0);
1934 :
1935 : getUsefulBits(Op, ResultUsefulBits, Depth + 1);
1936 :
1937 : if (MSB >= Imm) {
1938 : // The instruction is a BFXIL.
1939 0 : uint64_t Width = MSB - Imm + 1;
1940 0 : uint64_t LSB = Imm;
1941 :
1942 : OpUsefulBits <<= Width;
1943 : --OpUsefulBits;
1944 :
1945 : if (Op.getOperand(1) == Orig) {
1946 : // Copy the low bits from the result to bits starting from LSB.
1947 0 : Mask = ResultUsefulBits & OpUsefulBits;
1948 : Mask <<= LSB;
1949 : }
1950 0 :
1951 : if (Op.getOperand(0) == Orig)
1952 0 : // Bits starting from LSB in the input contribute to the result.
1953 : Mask |= (ResultUsefulBits & ~OpUsefulBits);
1954 : } else {
1955 0 : // The instruction is a BFI.
1956 : uint64_t Width = MSB + 1;
1957 0 : uint64_t LSB = UsefulBits.getBitWidth() - Imm;
1958 0 :
1959 0 : OpUsefulBits <<= Width;
1960 : --OpUsefulBits;
1961 0 : OpUsefulBits <<= LSB;
1962 :
1963 0 : if (Op.getOperand(1) == Orig) {
1964 : // Copy the bits from the result to the zero bits.
1965 0 : Mask = ResultUsefulBits & OpUsefulBits;
1966 : Mask.lshrInPlace(LSB);
1967 : }
1968 0 :
1969 0 : if (Op.getOperand(0) == Orig)
1970 : Mask |= (ResultUsefulBits & ~OpUsefulBits);
1971 0 : }
1972 :
1973 0 : UsefulBits &= Mask;
1974 0 : }
1975 :
1976 : static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
1977 0 : SDValue Orig, unsigned Depth) {
1978 :
1979 0 : // Users of this node should have already been instruction selected
1980 : // FIXME: Can we turn that into an assert?
1981 : if (!UserNode->isMachineOpcode())
1982 0 : return;
1983 0 :
1984 : switch (UserNode->getMachineOpcode()) {
1985 0 : default:
1986 0 : return;
1987 0 : case AArch64::ANDSWri:
1988 : case AArch64::ANDSXri:
1989 0 : case AArch64::ANDWri:
1990 : case AArch64::ANDXri:
1991 0 : // We increment Depth only when we call the getUsefulBits
1992 : return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
1993 : Depth);
1994 : case AArch64::UBFMWri:
1995 0 : case AArch64::UBFMXri:
1996 0 : return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
1997 :
1998 : case AArch64::ORRWrs:
1999 : case AArch64::ORRXrs:
2000 0 : if (UserNode->getOperand(1) != Orig)
2001 : return;
2002 551 : return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2003 : Depth);
2004 : case AArch64::BFMWri:
2005 : case AArch64::BFMXri:
2006 : return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2007 551 :
2008 : case AArch64::STRBBui:
2009 : case AArch64::STURBBi:
2010 256 : if (UserNode->getOperand(0) != Orig)
2011 : return;
2012 : UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2013 : return;
2014 :
2015 : case AArch64::STRHHui:
2016 : case AArch64::STURHHi:
2017 : if (UserNode->getOperand(0) != Orig)
2018 10 : return;
2019 10 : UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2020 : return;
2021 : }
2022 20 : }
2023 :
2024 6 : static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2025 : if (Depth >= 6)
2026 6 : return;
2027 : // Initialize UsefulBits
2028 0 : if (!Depth) {
2029 0 : unsigned Bitwidth = Op.getScalarValueSizeInBits();
2030 : // At the beginning, assume every produced bits is useful
2031 : UsefulBits = APInt(Bitwidth, 0);
2032 25 : UsefulBits.flipAllBits();
2033 : }
2034 7 : APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2035 :
2036 7 : for (SDNode *Node : Op.getNode()->uses()) {
2037 : // A use cannot produce useful bits
2038 7 : APInt UsefulBitsForUse = APInt(UsefulBits);
2039 7 : getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2040 : UsersUsefulBits |= UsefulBitsForUse;
2041 5 : }
2042 : // UsefulBits contains the produced bits that are meaningful for the
2043 5 : // current definition, thus a user cannot make a bit meaningful at
2044 : // this point
2045 5 : UsefulBits &= UsersUsefulBits;
2046 5 : }
2047 :
2048 : /// Create a machine node performing a notional SHL of Op by ShlAmount. If
2049 : /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2050 522 : /// 0, return Op unchanged.
2051 522 : static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2052 0 : if (ShlAmount == 0)
2053 : return Op;
2054 522 :
2055 467 : EVT VT = Op.getValueType();
2056 : SDLoc dl(Op);
2057 467 : unsigned BitWidth = VT.getSizeInBits();
2058 467 : unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2059 :
2060 522 : SDNode *ShiftNode;
2061 : if (ShlAmount > 0) {
2062 1073 : // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2063 : ShiftNode = CurDAG->getMachineNode(
2064 : UBFMOpc, dl, VT, Op,
2065 551 : CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2066 : CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2067 : } else {
2068 : // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2069 : assert(ShlAmount < 0 && "expected right shift");
2070 : int ShrAmount = -ShlAmount;
2071 : ShiftNode = CurDAG->getMachineNode(
2072 : UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2073 : CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2074 : }
2075 :
2076 : return SDValue(ShiftNode, 0);
2077 80 : }
2078 80 :
2079 78 : /// Does this tree qualify as an attempt to move a bitfield into position,
2080 : /// essentially "(and (shl VAL, N), Mask)".
2081 2 : static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
2082 : bool BiggerPattern,
2083 2 : SDValue &Src, int &ShiftAmount,
2084 2 : int &MaskWidth) {
2085 : EVT VT = Op.getValueType();
2086 : unsigned BitWidth = VT.getSizeInBits();
2087 2 : (void)BitWidth;
2088 : assert(BitWidth == 32 || BitWidth == 64);
2089 0 :
2090 : KnownBits Known;
2091 : CurDAG->computeKnownBits(Op, Known);
2092 0 :
2093 : // Non-zero in the sense that they're not provably zero, which is the key
2094 : // point if we want to use this value
2095 : uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2096 2 :
2097 2 : // Discard a constant AND mask if present. It's safe because the node will
2098 : // already have been factored into the computeKnownBits calculation above.
2099 2 : uint64_t AndImm;
2100 : if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
2101 : assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
2102 2 : Op = Op.getOperand(0);
2103 : }
2104 :
2105 : // Don't match if the SHL has more than one use, since then we'll end up
2106 : // generating SHL+UBFIZ instead of just keeping SHL+AND.
2107 1589 : if (!BiggerPattern && !Op.hasOneUse())
2108 : return false;
2109 :
2110 : uint64_t ShlImm;
2111 1589 : if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2112 : return false;
2113 : Op = Op.getOperand(0);
2114 :
2115 : if (!isShiftedMask_64(NonZeroBits))
2116 1589 : return false;
2117 1589 :
2118 : ShiftAmount = countTrailingZeros(NonZeroBits);
2119 : MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
2120 :
2121 1589 : // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2122 : // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2123 : // amount. BiggerPattern is true when this pattern is being matched for BFI,
2124 : // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2125 : // which case it is not profitable to insert an extra shift.
2126 : if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
2127 : return false;
2128 786 : Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
2129 :
2130 : return true;
2131 : }
2132 :
2133 2843 : static bool isShiftedMask(uint64_t Mask, EVT VT) {
2134 : assert(VT == MVT::i32 || VT == MVT::i64);
2135 : if (VT == MVT::i32)
2136 : return isShiftedMask_32(Mask);
2137 1460 : return isShiftedMask_64(Mask);
2138 : }
2139 88 :
2140 : // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
2141 : // inserted only sets known zero bits.
2142 : static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
2143 : assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2144 82 :
2145 82 : EVT VT = N->getValueType(0);
2146 : if (VT != MVT::i32 && VT != MVT::i64)
2147 : return false;
2148 :
2149 : unsigned BitWidth = VT.getSizeInBits();
2150 :
2151 : uint64_t OrImm;
2152 82 : if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
2153 : return false;
2154 80 :
2155 : // Skip this transformation if the ORR immediate can be encoded in the ORR.
2156 80 : // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2157 : // performance neutral.
2158 : if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
2159 0 : return false;
2160 :
2161 0 : uint64_t MaskImm;
2162 27 : SDValue And = N->getOperand(0);
2163 : // Must be a single use AND with an immediate operand.
2164 : if (!And.hasOneUse() ||
2165 : !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
2166 : return false;
2167 :
2168 373 : // Compute the Known Zero for the AND as this allows us to catch more general
2169 : // cases than just looking for AND with imm.
2170 : KnownBits Known;
2171 746 : CurDAG->computeKnownBits(And, Known);
2172 :
2173 : // Non-zero in the sense that they're not provably zero, which is the key
2174 : // point if we want to use this value.
2175 159 : uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
2176 :
2177 : // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2178 159 : if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
2179 : return false;
2180 :
2181 : // The bits being inserted must only set those bits that are known to be zero.
2182 : if ((OrImm & NotKnownZero) != 0) {
2183 : // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2184 34 : // currently handle this case.
2185 : return false;
2186 : }
2187 :
2188 9 : // BFI/BFXIL dst, src, #lsb, #width.
2189 : int LSB = countTrailingOnes(NotKnownZero);
2190 9 : int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2191 :
2192 : // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2193 : unsigned ImmR = (BitWidth - LSB) % BitWidth;
2194 : unsigned ImmS = Width - 1;
2195 :
2196 7 : // If we're creating a BFI instruction avoid cases where we need more
2197 7 : // instructions to materialize the BFI constant as compared to the original
2198 : // ORR. A BFXIL will use the same constant as the original ORR, so the code
2199 : // should be no worse in this case.
2200 : bool IsBFI = LSB != 0;
2201 7 : uint64_t BFIImm = OrImm >> LSB;
2202 : if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
2203 : // We have a BFI instruction and we know the constant can't be materialized
2204 7 : // with a ORR-immediate with the zero register.
2205 : unsigned OrChunks = 0, BFIChunks = 0;
2206 : for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
2207 : if (((OrImm >> Shift) & 0xFFFF) != 0)
2208 7 : ++OrChunks;
2209 : if (((BFIImm >> Shift) & 0xFFFF) != 0)
2210 : ++BFIChunks;
2211 : }
2212 : if (BFIChunks > OrChunks)
2213 : return false;
2214 : }
2215 7 :
2216 7 : // Materialize the constant to be inserted.
2217 : SDLoc DL(N);
2218 : unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2219 7 : SDNode *MOVI = CurDAG->getMachineNode(
2220 7 : MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2221 :
2222 : // Create the BFI/BFXIL instruction.
2223 : SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2224 : CurDAG->getTargetConstant(ImmR, DL, VT),
2225 : CurDAG->getTargetConstant(ImmS, DL, VT)};
2226 : unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2227 7 : CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2228 11 : return true;
2229 : }
2230 :
2231 : static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
2232 16 : SelectionDAG *CurDAG) {
2233 12 : assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2234 6 :
2235 12 : EVT VT = N->getValueType(0);
2236 7 : if (VT != MVT::i32 && VT != MVT::i64)
2237 : return false;
2238 4 :
2239 : unsigned BitWidth = VT.getSizeInBits();
2240 :
2241 : // Because of simplify-demanded-bits in DAGCombine, involved masks may not
2242 : // have the expected shape. Try to undo that.
2243 :
2244 : unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
2245 6 : unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
2246 :
2247 : // Given a OR operation, check if we have the following pattern
2248 : // ubfm c, b, imm, imm2 (or something that does the same jobs, see
2249 : // isBitfieldExtractOp)
2250 6 : // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
2251 6 : // countTrailingZeros(mask2) == imm2 - imm + 1
2252 : // f = d | c
2253 6 : // if yes, replace the OR instruction with:
2254 : // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
2255 :
2256 : // OR is commutative, check all combinations of operand order and values of
2257 466 : // BiggerPattern, i.e.
2258 : // Opd0, Opd1, BiggerPattern=false
2259 : // Opd1, Opd0, BiggerPattern=false
2260 : // Opd0, Opd1, BiggerPattern=true
2261 932 : // Opd1, Opd0, BiggerPattern=true
2262 : // Several of these combinations may match, so check with BiggerPattern=false
2263 : // first since that will produce better results by matching more instructions
2264 : // and/or inserting fewer extra instructions.
2265 252 : for (int I = 0; I < 4; ++I) {
2266 :
2267 : SDValue Dst, Src;
2268 : unsigned ImmR, ImmS;
2269 : bool BiggerPattern = I / 2;
2270 252 : SDValue OrOpd0Val = N->getOperand(I % 2);
2271 252 : SDNode *OrOpd0 = OrOpd0Val.getNode();
2272 : SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
2273 : SDNode *OrOpd1 = OrOpd1Val.getNode();
2274 :
2275 : unsigned BFXOpc;
2276 : int DstLSB, Width;
2277 : if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
2278 : NumberOfIgnoredLowBits, BiggerPattern)) {
2279 : // Check that the returned opcode is compatible with the pattern,
2280 : // i.e., same type and zero extended (U and not S)
2281 : if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
2282 : (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
2283 : continue;
2284 :
2285 : // Compute the width of the bitfield insertion
2286 : DstLSB = 0;
2287 : Width = ImmS - ImmR + 1;
2288 : // FIXME: This constraint is to catch bitfield insertion we may
2289 : // want to widen the pattern if we want to grab general bitfied
2290 : // move case
2291 998 : if (Width <= 0)
2292 : continue;
2293 831 :
2294 : // If the mask on the insertee is correct, we have a BFXIL operation. We
2295 831 : // can share the ImmR and ImmS values from the already-computed UBFM.
2296 831 : } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
2297 : BiggerPattern,
2298 831 : Src, DstLSB, Width)) {
2299 : ImmR = (BitWidth - DstLSB) % BitWidth;
2300 : ImmS = Width - 1;
2301 : } else
2302 : continue;
2303 831 :
2304 : // Check the second part of the pattern
2305 : EVT VT = OrOpd1Val.getValueType();
2306 : assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
2307 60 :
2308 : // Compute the Known Zero for the candidate of the first operand.
2309 746 : // This allows to catch more general case than just looking for
2310 : // AND with imm. Indeed, simplify-demanded-bits may have removed
2311 : // the AND instruction because it proves it was useless.
2312 35 : KnownBits Known;
2313 35 : CurDAG->computeKnownBits(OrOpd1Val, Known);
2314 :
2315 : // Check if there is enough room for the second operand to appear
2316 : // in the first one
2317 35 : APInt BitsToBeInserted =
2318 : APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
2319 :
2320 : if ((BitsToBeInserted & ~Known.Zero) != 0)
2321 : continue;
2322 794 :
2323 : // Set the first operand
2324 : uint64_t Imm;
2325 78 : if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
2326 78 : isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
2327 : // In that case, we can eliminate the AND
2328 : Dst = OrOpd1->getOperand(0);
2329 : else
2330 : // Maybe the AND has been removed by simplify-demanded-bits
2331 113 : // or is useful because it discards more bits
2332 : Dst = OrOpd1Val;
2333 :
2334 : // both parts match
2335 : SDLoc DL(N);
2336 : SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
2337 : CurDAG->getTargetConstant(ImmS, DL, VT)};
2338 : unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2339 113 : CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2340 : return true;
2341 : }
2342 :
2343 : // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
2344 226 : // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
2345 : // mask (e.g., 0x000ffff0).
2346 113 : uint64_t Mask0Imm, Mask1Imm;
2347 : SDValue And0 = N->getOperand(0);
2348 : SDValue And1 = N->getOperand(1);
2349 : if (And0.hasOneUse() && And1.hasOneUse() &&
2350 : isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
2351 39 : isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
2352 39 : APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
2353 : (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
2354 29 :
2355 : // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
2356 : // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
2357 : // bits to be inserted.
2358 56 : if (isShiftedMask(Mask0Imm, VT)) {
2359 : std::swap(And0, And1);
2360 : std::swap(Mask0Imm, Mask1Imm);
2361 : }
2362 170 :
2363 85 : SDValue Src = And1->getOperand(0);
2364 : SDValue Dst = And0->getOperand(0);
2365 85 : unsigned LSB = countTrailingZeros(Mask1Imm);
2366 : int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
2367 :
2368 : // The BFXIL inserts the low-order bits from a source register, so right
2369 : // shift the needed bits into place.
2370 : SDLoc DL(N);
2371 : unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2372 : SDNode *LSR = CurDAG->getMachineNode(
2373 167 : ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
2374 167 : CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
2375 134 :
2376 : // BFXIL is an alias of BFM, so translate to BFM operands.
2377 21 : unsigned ImmR = (BitWidth - LSB) % BitWidth;
2378 364 : unsigned ImmS = Width - 1;
2379 15 :
2380 : // Create the BFXIL instruction.
2381 : SDValue Ops[] = {Dst, SDValue(LSR, 0),
2382 : CurDAG->getTargetConstant(ImmR, DL, VT),
2383 : CurDAG->getTargetConstant(ImmS, DL, VT)};
2384 8 : unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2385 : CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2386 : return true;
2387 : }
2388 :
2389 8 : return false;
2390 8 : }
2391 8 :
2392 8 : bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
2393 : if (N->getOpcode() != ISD::OR)
2394 : return false;
2395 :
2396 : APInt NUsefulBits;
2397 : getUsefulBits(SDValue(N, 0), NUsefulBits);
2398 8 :
2399 : // If all bits are not useful, just return UNDEF.
2400 8 : if (!NUsefulBits) {
2401 : CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
2402 : return true;
2403 8 : }
2404 8 :
2405 : if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
2406 : return true;
2407 :
2408 8 : return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
2409 8 : }
2410 :
2411 8 : /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
2412 : /// equivalent of a left shift by a constant amount followed by an and masking
2413 : /// out a contiguous set of bits.
2414 : bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
2415 : if (N->getOpcode() != ISD::AND)
2416 : return false;
2417 :
2418 0 : EVT VT = N->getValueType(0);
2419 0 : if (VT != MVT::i32 && VT != MVT::i64)
2420 0 : return false;
2421 :
2422 : SDValue Op0;
2423 0 : int DstLSB, Width;
2424 : if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
2425 : Op0, DstLSB, Width))
2426 0 : return false;
2427 0 :
2428 0 : // ImmR is the rotate right amount.
2429 : unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
2430 : // ImmS is the most significant bit of the source to be moved.
2431 0 : unsigned ImmS = Width - 1;
2432 0 :
2433 : SDLoc DL(N);
2434 0 : SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
2435 : CurDAG->getTargetConstant(ImmS, DL, VT)};
2436 : unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2437 : CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2438 : return true;
2439 : }
2440 0 :
2441 0 : /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
2442 0 : /// variable shift/rotate instructions.
2443 : bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
2444 0 : EVT VT = N->getValueType(0);
2445 0 :
2446 0 : unsigned Opc;
2447 : switch (N->getOpcode()) {
2448 0 : case ISD::ROTR:
2449 : Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
2450 0 : break;
2451 : case ISD::SHL:
2452 0 : Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
2453 : break;
2454 : case ISD::SRL:
2455 0 : Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
2456 : break;
2457 0 : case ISD::SRA:
2458 : Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
2459 : break;
2460 0 : default:
2461 0 : return false;
2462 0 : }
2463 0 :
2464 : uint64_t Size;
2465 : uint64_t Bits;
2466 : if (VT == MVT::i32) {
2467 : Bits = 5;
2468 : Size = 32;
2469 0 : } else if (VT == MVT::i64) {
2470 0 : Bits = 6;
2471 : Size = 64;
2472 : } else
2473 0 : return false;
2474 :
2475 0 : SDValue ShiftAmt = N->getOperand(1);
2476 0 : SDLoc DL(N);
2477 : SDValue NewShiftAmt;
2478 0 :
2479 0 : // Skip over an extend of the shift amount.
2480 : if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
2481 0 : ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
2482 0 : ShiftAmt = ShiftAmt->getOperand(0);
2483 :
2484 0 : if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
2485 0 : SDValue Add0 = ShiftAmt->getOperand(0);
2486 : SDValue Add1 = ShiftAmt->getOperand(1);
2487 : uint64_t Add0Imm;
2488 : uint64_t Add1Imm;
2489 : // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
2490 : // to avoid the ADD/SUB.
2491 : if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0))
2492 0 : NewShiftAmt = Add0;
2493 : // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2494 : // generate a NEG instead of a SUB of a constant.
2495 0 : else if (ShiftAmt->getOpcode() == ISD::SUB &&
2496 : isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
2497 : (Add0Imm % Size == 0)) {
2498 : unsigned NegOpc;
2499 0 : unsigned ZeroReg;
2500 : EVT SubVT = ShiftAmt->getValueType(0);
2501 0 : if (SubVT == MVT::i32) {
2502 : NegOpc = AArch64::SUBWrr;
2503 : ZeroReg = AArch64::WZR;
2504 : } else {
2505 : assert(SubVT == MVT::i64);
2506 0 : NegOpc = AArch64::SUBXrr;
2507 : ZeroReg = AArch64::XZR;
2508 0 : }
2509 : SDValue Zero =
2510 0 : CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
2511 0 : MachineSDNode *Neg =
2512 0 : CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
2513 : NewShiftAmt = SDValue(Neg, 0);
2514 : } else
2515 : return false;
2516 : } else {
2517 0 : // If the shift amount is masked with an AND, check that the mask covers the
2518 0 : // bits that are implicitly ANDed off by the above opcodes and if so, skip
2519 : // the AND.
2520 : uint64_t MaskImm;
2521 : if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm))
2522 0 : return false;
2523 0 :
2524 : if (countTrailingOnes(MaskImm) < Bits)
2525 : return false;
2526 0 :
2527 0 : NewShiftAmt = ShiftAmt->getOperand(0);
2528 : }
2529 :
2530 : // Narrow/widen the shift amount to match the size of the shift operation.
2531 : if (VT == MVT::i32)
2532 : NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
2533 : else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
2534 : SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
2535 : MachineSDNode *Ext = CurDAG->getMachineNode(
2536 0 : AArch64::SUBREG_TO_REG, DL, VT,
2537 : CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
2538 0 : NewShiftAmt = SDValue(Ext, 0);
2539 0 : }
2540 :
2541 0 : SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
2542 : CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2543 : return true;
2544 : }
2545 :
2546 : bool
2547 0 : AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2548 0 : unsigned RegWidth) {
2549 : APFloat FVal(0.0);
2550 0 : if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2551 0 : FVal = CN->getValueAPF();
2552 : else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2553 0 : // Some otherwise illegal constants are allowed in this case.
2554 : if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2555 : !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2556 : return false;
2557 0 :
2558 0 : ConstantPoolSDNode *CN =
2559 0 : dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2560 0 : FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2561 0 : } else
2562 : return false;
2563 :
2564 0 : // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2565 : // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2566 : // x-register.
2567 0 : //
2568 0 : // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2569 0 : // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2570 : // integers.
2571 : bool IsExact;
2572 :
2573 0 : // fbits is between 1 and 64 in the worst-case, which means the fmul
2574 : // could have 2^64 as an actual operand. Need 65 bits of precision.
2575 0 : APSInt IntVal(65, true);
2576 : FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2577 0 :
2578 : // N.b. isPowerOf2 also checks for > 0.
2579 : if (!IsExact || !IntVal.isPowerOf2()) return false;
2580 0 : unsigned FBits = IntVal.logBase2();
2581 0 :
2582 0 : // Checks above should have guaranteed that we haven't lost information in
2583 : // finding FBits, but it must still be in range.
2584 : if (FBits == 0 || FBits > RegWidth) return false;
2585 :
2586 0 : FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
2587 : return true;
2588 0 : }
2589 :
2590 : // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
2591 : // of the string and obtains the integer values from them and combines these
2592 : // into a single value to be used in the MRS/MSR instruction.
2593 : static int getIntOperandFromRegisterString(StringRef RegString) {
2594 : SmallVector<StringRef, 5> Fields;
2595 : RegString.split(Fields, ':');
2596 :
2597 : if (Fields.size() == 1)
2598 : return -1;
2599 :
2600 : assert(Fields.size() == 5
2601 : && "Invalid number of fields in read register string");
2602 0 :
2603 : SmallVector<int, 5> Ops;
2604 : bool AllIntFields = true;
2605 0 :
2606 : for (StringRef Field : Fields) {
2607 : unsigned IntField;
2608 : AllIntFields &= !Field.getAsInteger(10, IntField);
2609 : Ops.push_back(IntField);
2610 0 : }
2611 :
2612 0 : assert(AllIntFields &&
2613 0 : "Unexpected non-integer value in special register string.");
2614 :
2615 : // Need to combine the integer fields of the string into a single value
2616 : // based on the bit encoding of MRS/MSR instruction.
2617 : return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2618 : (Ops[3] << 3) | (Ops[4]);
2619 15 : }
2620 :
2621 15 : // Lower the read_register intrinsic to an MRS instruction node if the special
2622 : // register string argument is either of the form detailed in the ALCE (the
2623 30 : // form described in getIntOperandsFromRegsterString) or is a named register
2624 : // known by the MRS SysReg mapper.
2625 : bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
2626 : const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2627 : const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2628 : SDLoc DL(N);
2629 :
2630 : int Reg = getIntOperandFromRegisterString(RegString->getString());
2631 : if (Reg != -1) {
2632 12 : ReplaceNode(N, CurDAG->getMachineNode(
2633 : AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2634 10 : CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2635 10 : N->getOperand(0)));
2636 : return true;
2637 : }
2638 :
2639 : // Use the sysreg mapper to map the remaining possible strings to the
2640 : // value for the register to be used for the instruction operand.
2641 : auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2642 : if (TheReg && TheReg->Readable &&
2643 4 : TheReg->haveFeatures(Subtarget->getFeatureBits()))
2644 2 : Reg = TheReg->Encoding;
2645 : else
2646 : Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2647 :
2648 : if (Reg != -1) {
2649 : ReplaceNode(N, CurDAG->getMachineNode(
2650 : AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2651 7 : CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2652 7 : N->getOperand(0)));
2653 7 : return true;
2654 : }
2655 :
2656 7 : return false;
2657 7 : }
2658 1 :
2659 : // Lower the write_register intrinsic to an MSR instruction node if the special
2660 : // register string argument is either of the form detailed in the ALCE (the
2661 1 : // form described in getIntOperandsFromRegsterString) or is a named register
2662 1 : // known by the MSR SysReg mapper.
2663 : bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
2664 : const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2665 : const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2666 : SDLoc DL(N);
2667 6 :
2668 6 : int Reg = getIntOperandFromRegisterString(RegString->getString());
2669 2 : if (Reg != -1) {
2670 1 : ReplaceNode(
2671 : N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2672 5 : CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2673 : N->getOperand(2), N->getOperand(0)));
2674 6 : return true;
2675 1 : }
2676 :
2677 : // Check if the register was one of those allowed as the pstatefield value in
2678 1 : // the MSR (immediate) instruction. To accept the values allowed in the
2679 1 : // pstatefield for the MSR (immediate) instruction, we also require that an
2680 : // immediate value has been provided as an argument, we know that this is
2681 : // the case as it has been ensured by semantic checking.
2682 : auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
2683 : if (PMapper) {
2684 : assert (isa<ConstantSDNode>(N->getOperand(2))
2685 : && "Expected a constant integer expression.");
2686 : unsigned Reg = PMapper->Encoding;
2687 : uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
2688 : unsigned State;
2689 8 : if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) {
2690 8 : assert(Immed < 2 && "Bad imm");
2691 8 : State = AArch64::MSRpstateImm1;
2692 : } else {
2693 : assert(Immed < 16 && "Bad imm");
2694 8 : State = AArch64::MSRpstateImm4;
2695 8 : }
2696 1 : ReplaceNode(N, CurDAG->getMachineNode(
2697 1 : State, DL, MVT::Other,
2698 : CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2699 1 : CurDAG->getTargetConstant(Immed, DL, MVT::i16),
2700 1 : N->getOperand(0)));
2701 : return true;
2702 : }
2703 :
2704 : // Use the sysreg mapper to attempt to map the remaining possible strings
2705 : // to the value for the register to be used for the MSR (register)
2706 : // instruction operand.
2707 : auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2708 7 : if (TheReg && TheReg->Writeable &&
2709 7 : TheReg->haveFeatures(Subtarget->getFeatureBits()))
2710 : Reg = TheReg->Encoding;
2711 : else
2712 1 : Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2713 2 : if (Reg != -1) {
2714 : ReplaceNode(N, CurDAG->getMachineNode(
2715 1 : AArch64::MSR, DL, MVT::Other,
2716 : CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2717 : N->getOperand(2), N->getOperand(0)));
2718 : return true;
2719 : }
2720 :
2721 : return false;
2722 3 : }
2723 :
2724 1 : /// We've got special pseudo-instructions for these
2725 : bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2726 : unsigned Opcode;
2727 1 : EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2728 :
2729 : // Leave IR for LSE if subtarget supports it.
2730 : if (Subtarget->hasLSE()) return false;
2731 :
2732 : if (MemTy == MVT::i8)
2733 6 : Opcode = AArch64::CMP_SWAP_8;
2734 6 : else if (MemTy == MVT::i16)
2735 2 : Opcode = AArch64::CMP_SWAP_16;
2736 1 : else if (MemTy == MVT::i32)
2737 : Opcode = AArch64::CMP_SWAP_32;
2738 5 : else if (MemTy == MVT::i64)
2739 6 : Opcode = AArch64::CMP_SWAP_64;
2740 1 : else
2741 : llvm_unreachable("Unknown AtomicCmpSwap type");
2742 :
2743 1 : MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
2744 1 : SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2745 : N->getOperand(0)};
2746 : SDNode *CmpSwap = CurDAG->getMachineNode(
2747 : Opcode, SDLoc(N),
2748 : CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
2749 :
2750 : MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2751 76 : CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2752 :
2753 : ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2754 : ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2755 : CurDAG->RemoveDeadNode(N);
2756 76 :
2757 : return true;
2758 : }
2759 :
2760 : void AArch64DAGToDAGISel::Select(SDNode *Node) {
2761 : // If we have a custom node, we already have selected!
2762 : if (Node->isMachineOpcode()) {
2763 : LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
2764 : Node->setNodeId(-1);
2765 : return;
2766 : }
2767 0 :
2768 : // Few custom selection stuff.
2769 : EVT VT = Node->getValueType(0);
2770 4 :
2771 4 : switch (Node->getOpcode()) {
2772 12 : default:
2773 4 : break;
2774 :
2775 : case ISD::ATOMIC_CMP_SWAP:
2776 4 : if (SelectCMP_SWAP(Node))
2777 8 : return;
2778 : break;
2779 4 :
2780 4 : case ISD::READ_REGISTER:
2781 4 : if (tryReadRegister(Node))
2782 : return;
2783 4 : break;
2784 :
2785 : case ISD::WRITE_REGISTER:
2786 194002 : if (tryWriteRegister(Node))
2787 : return;
2788 194002 : break;
2789 :
2790 : case ISD::ADD:
2791 3259 : if (tryMLAV64LaneV128(Node))
2792 : return;
2793 : break;
2794 :
2795 386966 : case ISD::LOAD: {
2796 : // Try to select as an indexed load. Fall through to normal processing
2797 193483 : // if we can't.
2798 : if (tryIndexedLoad(Node))
2799 : return;
2800 : break;
2801 76 : }
2802 76 :
2803 : case ISD::SRL:
2804 : case ISD::AND:
2805 : case ISD::SRA:
2806 7 : case ISD::SIGN_EXTEND_INREG:
2807 7 : if (tryBitfieldExtractOp(Node))
2808 : return;
2809 : if (tryBitfieldInsertInZeroOp(Node))
2810 : return;
2811 8 : LLVM_FALLTHROUGH;
2812 8 : case ISD::ROTR:
2813 : case ISD::SHL:
2814 : if (tryShiftAmountMod(Node))
2815 : return;
2816 2584 : break;
2817 2584 :
2818 : case ISD::SIGN_EXTEND:
2819 : if (tryBitfieldExtractOpFromSExt(Node))
2820 : return;
2821 6792 : break;
2822 :
2823 : case ISD::OR:
2824 6792 : if (tryBitfieldInsertOp(Node))
2825 : return;
2826 : break;
2827 :
2828 : case ISD::EXTRACT_VECTOR_ELT: {
2829 1507 : // Extracting lane zero is a special case where we can just use a plain
2830 : // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
2831 : // the rest of the compiler, especially the register allocator and copyi
2832 : // propagation, to reason about, so is preferred when it's possible to
2833 1507 : // use it.
2834 : ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
2835 1440 : // Bail and use the default Select() for non-zero lanes.
2836 : if (LaneNode->getZExtValue() != 0)
2837 : break;
2838 : // If the element type is not the same as the result type, likewise
2839 : // bail and use the default Select(), as there's more to do than just
2840 1921 : // a cross-class COPY. This catches extracts of i8 and i16 elements
2841 : // since they will need an explicit zext.
2842 : if (VT != Node->getOperand(0).getValueType().getVectorElementType())
2843 : break;
2844 89 : unsigned SubReg;
2845 89 : switch (Node->getOperand(0)
2846 : .getValueType()
2847 : .getVectorElementType()
2848 : .getSizeInBits()) {
2849 467 : default:
2850 467 : llvm_unreachable("Unexpected vector element type!");
2851 : case 64:
2852 : SubReg = AArch64::dsub;
2853 : break;
2854 1505 : case 32:
2855 : SubReg = AArch64::ssub;
2856 : break;
2857 : case 16:
2858 : SubReg = AArch64::hsub;
2859 : break;
2860 1505 : case 8:
2861 : llvm_unreachable("unexpected zext-requiring extract element!");
2862 3010 : }
2863 : SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
2864 : Node->getOperand(0));
2865 : LLVM_DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
2866 : LLVM_DEBUG(Extract->dumpr(CurDAG));
2867 : LLVM_DEBUG(dbgs() << "\n");
2868 1600 : ReplaceNode(Node, Extract.getNode());
2869 : return;
2870 : }
2871 456 : case ISD::Constant: {
2872 456 : // Materialize zero constants as copies from WZR/XZR. This allows
2873 456 : // the coalescer to propagate these into other instructions.
2874 456 : ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
2875 0 : if (ConstNode->isNullValue()) {
2876 0 : if (VT == MVT::i32) {
2877 : SDValue New = CurDAG->getCopyFromReg(
2878 : CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
2879 : ReplaceNode(Node, New.getNode());
2880 165 : return;
2881 : } else if (VT == MVT::i64) {
2882 165 : SDValue New = CurDAG->getCopyFromReg(
2883 129 : CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
2884 : ReplaceNode(Node, New.getNode());
2885 129 : return;
2886 : }
2887 : }
2888 : break;
2889 912 : }
2890 456 :
2891 : case ISD::FrameIndex: {
2892 : // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
2893 : int FI = cast<FrameIndexSDNode>(Node)->getIndex();
2894 456 : unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
2895 456 : const TargetLowering *TLI = getTargetLowering();
2896 : SDValue TFI = CurDAG->getTargetFrameIndex(
2897 : FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2898 : SDLoc DL(Node);
2899 : SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
2900 : CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
2901 4704 : CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
2902 : return;
2903 651 : }
2904 1302 : case ISD::INTRINSIC_W_CHAIN: {
2905 651 : unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2906 : switch (IntNo) {
2907 : default:
2908 240 : break;
2909 480 : case Intrinsic::aarch64_ldaxp:
2910 240 : case Intrinsic::aarch64_ldxp: {
2911 : unsigned Op =
2912 : IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
2913 : SDValue MemAddr = Node->getOperand(2);
2914 : SDLoc DL(Node);
2915 : SDValue Chain = Node->getOperand(0);
2916 :
2917 : SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
2918 : MVT::Other, MemAddr, Chain);
2919 168 :
2920 : // Transfer memoperands.
2921 : MachineMemOperand *MemOp =
2922 168 : cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2923 336 : CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
2924 : ReplaceNode(Node, Ld);
2925 336 : return;
2926 168 : }
2927 336 : case Intrinsic::aarch64_stlxp:
2928 : case Intrinsic::aarch64_stxp: {
2929 : unsigned Op =
2930 400 : IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
2931 1200 : SDLoc DL(Node);
2932 : SDValue Chain = Node->getOperand(0);
2933 : SDValue ValLo = Node->getOperand(2);
2934 : SDValue ValHi = Node->getOperand(3);
2935 16 : SDValue MemAddr = Node->getOperand(4);
2936 :
2937 : // Place arguments in the right order.
2938 16 : SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
2939 16 :
2940 : SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
2941 16 : // Transfer memoperands.
2942 : MachineMemOperand *MemOp =
2943 32 : cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2944 : CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2945 :
2946 : ReplaceNode(Node, St);
2947 : return;
2948 16 : }
2949 32 : case Intrinsic::aarch64_neon_ld1x2:
2950 16 : if (VT == MVT::v8i8) {
2951 : SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
2952 : return;
2953 16 : } else if (VT == MVT::v16i8) {
2954 : SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
2955 : return;
2956 16 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2957 : SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
2958 16 : return;
2959 16 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2960 16 : SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
2961 16 : return;
2962 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2963 : SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
2964 16 : return;
2965 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2966 32 : SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
2967 : return;
2968 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2969 16 : SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2970 32 : return;
2971 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2972 16 : SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
2973 : return;
2974 : }
2975 : break;
2976 : case Intrinsic::aarch64_neon_ld1x3:
2977 1 : if (VT == MVT::v8i8) {
2978 1 : SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
2979 : return;
2980 1 : } else if (VT == MVT::v16i8) {
2981 1 : SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
2982 : return;
2983 2 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2984 2 : SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
2985 : return;
2986 2 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2987 2 : SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
2988 : return;
2989 2 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2990 2 : SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
2991 : return;
2992 2 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2993 2 : SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
2994 : return;
2995 2 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2996 2 : SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
2997 : return;
2998 2 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2999 2 : SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
3000 : return;
3001 : }
3002 : break;
3003 : case Intrinsic::aarch64_neon_ld1x4:
3004 1 : if (VT == MVT::v8i8) {
3005 1 : SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
3006 : return;
3007 1 : } else if (VT == MVT::v16i8) {
3008 1 : SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
3009 : return;
3010 2 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3011 2 : SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
3012 : return;
3013 2 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3014 2 : SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
3015 : return;
3016 2 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3017 2 : SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
3018 : return;
3019 2 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3020 2 : SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
3021 : return;
3022 2 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3023 2 : SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3024 : return;
3025 2 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3026 2 : SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
3027 : return;
3028 : }
3029 : break;
3030 : case Intrinsic::aarch64_neon_ld2:
3031 1 : if (VT == MVT::v8i8) {
3032 1 : SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
3033 : return;
3034 1 : } else if (VT == MVT::v16i8) {
3035 1 : SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
3036 : return;
3037 2 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3038 2 : SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
3039 : return;
3040 2 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3041 2 : SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
3042 : return;
3043 2 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3044 2 : SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
3045 : return;
3046 2 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3047 2 : SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
3048 : return;
3049 2 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3050 2 : SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3051 : return;
3052 2 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3053 2 : SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
3054 : return;
3055 : }
3056 : break;
3057 : case Intrinsic::aarch64_neon_ld3:
3058 5 : if (VT == MVT::v8i8) {
3059 5 : SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
3060 : return;
3061 1 : } else if (VT == MVT::v16i8) {
3062 1 : SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
3063 : return;
3064 2 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3065 2 : SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
3066 : return;
3067 2 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3068 2 : SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
3069 : return;
3070 2 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3071 2 : SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
3072 : return;
3073 4 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3074 4 : SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
3075 : return;
3076 2 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3077 2 : SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3078 : return;
3079 1 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3080 1 : SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
3081 : return;
3082 : }
3083 : break;
3084 : case Intrinsic::aarch64_neon_ld4:
3085 2 : if (VT == MVT::v8i8) {
3086 2 : SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
3087 : return;
3088 2 : } else if (VT == MVT::v16i8) {
3089 2 : SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
3090 : return;
3091 3 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3092 3 : SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
3093 : return;
3094 2 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3095 2 : SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
3096 : return;
3097 1 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3098 1 : SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
3099 : return;
3100 2 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3101 2 : SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
3102 : return;
3103 2 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3104 2 : SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3105 : return;
3106 1 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3107 1 : SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
3108 : return;
3109 : }
3110 : break;
3111 : case Intrinsic::aarch64_neon_ld2r:
3112 1 : if (VT == MVT::v8i8) {
3113 1 : SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
3114 : return;
3115 3 : } else if (VT == MVT::v16i8) {
3116 3 : SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
3117 : return;
3118 3 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3119 3 : SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
3120 : return;
3121 2 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3122 2 : SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
3123 : return;
3124 1 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3125 1 : SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
3126 : return;
3127 1 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3128 1 : SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
3129 : return;
3130 2 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3131 2 : SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
3132 : return;
3133 1 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3134 1 : SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
3135 : return;
3136 : }
3137 : break;
3138 : case Intrinsic::aarch64_neon_ld3r:
3139 1 : if (VT == MVT::v8i8) {
3140 1 : SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
3141 : return;
3142 1 : } else if (VT == MVT::v16i8) {
3143 1 : SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
3144 : return;
3145 2 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3146 2 : SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
3147 : return;
3148 2 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3149 2 : SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
3150 : return;
3151 1 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3152 1 : SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
3153 : return;
3154 1 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3155 1 : SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
3156 : return;
3157 1 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3158 1 : SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
3159 : return;
3160 1 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3161 1 : SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
3162 : return;
3163 : }
3164 : break;
3165 : case Intrinsic::aarch64_neon_ld4r:
3166 1 : if (VT == MVT::v8i8) {
3167 1 : SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
3168 : return;
3169 1 : } else if (VT == MVT::v16i8) {
3170 1 : SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
3171 : return;
3172 2 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3173 2 : SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
3174 : return;
3175 2 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3176 2 : SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
3177 : return;
3178 1 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3179 1 : SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
3180 : return;
3181 1 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3182 1 : SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
3183 : return;
3184 1 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3185 1 : SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
3186 : return;
3187 1 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3188 1 : SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
3189 : return;
3190 : }
3191 : break;
3192 : case Intrinsic::aarch64_neon_ld2lane:
3193 1 : if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3194 1 : SelectLoadLane(Node, 2, AArch64::LD2i8);
3195 : return;
3196 1 : } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3197 1 : VT == MVT::v8f16) {
3198 : SelectLoadLane(Node, 2, AArch64::LD2i16);
3199 2 : return;
3200 2 : } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3201 : VT == MVT::v2f32) {
3202 2 : SelectLoadLane(Node, 2, AArch64::LD2i32);
3203 2 : return;
3204 : } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3205 1 : VT == MVT::v1f64) {
3206 1 : SelectLoadLane(Node, 2, AArch64::LD2i64);
3207 : return;
3208 1 : }
3209 1 : break;
3210 : case Intrinsic::aarch64_neon_ld3lane:
3211 1 : if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3212 1 : SelectLoadLane(Node, 3, AArch64::LD3i8);
3213 : return;
3214 1 : } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3215 1 : VT == MVT::v8f16) {
3216 : SelectLoadLane(Node, 3, AArch64::LD3i16);
3217 : return;
3218 : } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3219 : VT == MVT::v2f32) {
3220 1 : SelectLoadLane(Node, 3, AArch64::LD3i32);
3221 1 : return;
3222 : } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3223 : VT == MVT::v1f64) {
3224 3 : SelectLoadLane(Node, 3, AArch64::LD3i64);
3225 3 : return;
3226 : }
3227 : break;
3228 3 : case Intrinsic::aarch64_neon_ld4lane:
3229 3 : if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3230 : SelectLoadLane(Node, 4, AArch64::LD4i8);
3231 : return;
3232 3 : } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3233 3 : VT == MVT::v8f16) {
3234 : SelectLoadLane(Node, 4, AArch64::LD4i16);
3235 : return;
3236 : } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3237 : VT == MVT::v2f32) {
3238 1 : SelectLoadLane(Node, 4, AArch64::LD4i32);
3239 1 : return;
3240 : } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3241 : VT == MVT::v1f64) {
3242 3 : SelectLoadLane(Node, 4, AArch64::LD4i64);
3243 3 : return;
3244 : }
3245 : break;
3246 3 : }
3247 3 : } break;
3248 : case ISD::INTRINSIC_WO_CHAIN: {
3249 : unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
3250 1 : switch (IntNo) {
3251 1 : default:
3252 : break;
3253 : case Intrinsic::aarch64_neon_tbl2:
3254 : SelectTable(Node, 2,
3255 : VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
3256 1 : false);
3257 1 : return;
3258 : case Intrinsic::aarch64_neon_tbl3:
3259 : SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
3260 3 : : AArch64::TBLv16i8Three,
3261 3 : false);
3262 : return;
3263 : case Intrinsic::aarch64_neon_tbl4:
3264 3 : SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
3265 3 : : AArch64::TBLv16i8Four,
3266 : false);
3267 : return;
3268 1 : case Intrinsic::aarch64_neon_tbx2:
3269 1 : SelectTable(Node, 2,
3270 : VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
3271 : true);
3272 : return;
3273 : case Intrinsic::aarch64_neon_tbx3:
3274 1802 : SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
3275 5406 : : AArch64::TBXv16i8Three,
3276 : true);
3277 : return;
3278 : case Intrinsic::aarch64_neon_tbx4:
3279 : SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
3280 4 : : AArch64::TBXv16i8Four,
3281 : true);
3282 : return;
3283 4 : case Intrinsic::aarch64_neon_smull:
3284 : case Intrinsic::aarch64_neon_umull:
3285 2 : if (tryMULLV64LaneV128(IntNo, Node))
3286 : return;
3287 : break;
3288 2 : }
3289 : break;
3290 2 : }
3291 : case ISD::INTRINSIC_VOID: {
3292 : unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3293 2 : if (Node->getNumOperands() >= 3)
3294 : VT = Node->getOperand(2)->getValueType(0);
3295 2 : switch (IntNo) {
3296 : default:
3297 : break;
3298 2 : case Intrinsic::aarch64_neon_st1x2: {
3299 : if (VT == MVT::v8i8) {
3300 2 : SelectStore(Node, 2, AArch64::ST1Twov8b);
3301 : return;
3302 : } else if (VT == MVT::v16i8) {
3303 2 : SelectStore(Node, 2, AArch64::ST1Twov16b);
3304 : return;
3305 2 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3306 : SelectStore(Node, 2, AArch64::ST1Twov4h);
3307 : return;
3308 2 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3309 146 : SelectStore(Node, 2, AArch64::ST1Twov8h);
3310 : return;
3311 146 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3312 : SelectStore(Node, 2, AArch64::ST1Twov2s);
3313 : return;
3314 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3315 : SelectStore(Node, 2, AArch64::ST1Twov4s);
3316 : return;
3317 217 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3318 434 : SelectStore(Node, 2, AArch64::ST1Twov2d);
3319 217 : return;
3320 199 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3321 : SelectStore(Node, 2, AArch64::ST1Twov1d);
3322 : return;
3323 : }
3324 : break;
3325 : }
3326 2 : case Intrinsic::aarch64_neon_st1x3: {
3327 2 : if (VT == MVT::v8i8) {
3328 : SelectStore(Node, 3, AArch64::ST1Threev8b);
3329 2 : return;
3330 2 : } else if (VT == MVT::v16i8) {
3331 : SelectStore(Node, 3, AArch64::ST1Threev16b);
3332 3 : return;
3333 3 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3334 : SelectStore(Node, 3, AArch64::ST1Threev4h);
3335 3 : return;
3336 3 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3337 : SelectStore(Node, 3, AArch64::ST1Threev8h);
3338 4 : return;
3339 4 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3340 : SelectStore(Node, 3, AArch64::ST1Threev2s);
3341 4 : return;
3342 4 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3343 : SelectStore(Node, 3, AArch64::ST1Threev4s);
3344 4 : return;
3345 4 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3346 : SelectStore(Node, 3, AArch64::ST1Threev2d);
3347 4 : return;
3348 4 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3349 : SelectStore(Node, 3, AArch64::ST1Threev1d);
3350 : return;
3351 : }
3352 : break;
3353 : }
3354 2 : case Intrinsic::aarch64_neon_st1x4: {
3355 2 : if (VT == MVT::v8i8) {
3356 : SelectStore(Node, 4, AArch64::ST1Fourv8b);
3357 2 : return;
3358 2 : } else if (VT == MVT::v16i8) {
3359 : SelectStore(Node, 4, AArch64::ST1Fourv16b);
3360 3 : return;
3361 3 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3362 : SelectStore(Node, 4, AArch64::ST1Fourv4h);
3363 3 : return;
3364 3 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3365 : SelectStore(Node, 4, AArch64::ST1Fourv8h);
3366 4 : return;
3367 4 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3368 : SelectStore(Node, 4, AArch64::ST1Fourv2s);
3369 4 : return;
3370 4 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3371 : SelectStore(Node, 4, AArch64::ST1Fourv4s);
3372 4 : return;
3373 4 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3374 : SelectStore(Node, 4, AArch64::ST1Fourv2d);
3375 4 : return;
3376 4 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3377 : SelectStore(Node, 4, AArch64::ST1Fourv1d);
3378 : return;
3379 : }
3380 : break;
3381 : }
3382 2 : case Intrinsic::aarch64_neon_st2: {
3383 2 : if (VT == MVT::v8i8) {
3384 : SelectStore(Node, 2, AArch64::ST2Twov8b);
3385 2 : return;
3386 2 : } else if (VT == MVT::v16i8) {
3387 : SelectStore(Node, 2, AArch64::ST2Twov16b);
3388 3 : return;
3389 3 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3390 : SelectStore(Node, 2, AArch64::ST2Twov4h);
3391 3 : return;
3392 3 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3393 : SelectStore(Node, 2, AArch64::ST2Twov8h);
3394 4 : return;
3395 4 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3396 : SelectStore(Node, 2, AArch64::ST2Twov2s);
3397 4 : return;
3398 4 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3399 : SelectStore(Node, 2, AArch64::ST2Twov4s);
3400 4 : return;
3401 4 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3402 : SelectStore(Node, 2, AArch64::ST2Twov2d);
3403 4 : return;
3404 4 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3405 : SelectStore(Node, 2, AArch64::ST1Twov1d);
3406 : return;
3407 : }
3408 : break;
3409 : }
3410 10 : case Intrinsic::aarch64_neon_st3: {
3411 10 : if (VT == MVT::v8i8) {
3412 : SelectStore(Node, 3, AArch64::ST3Threev8b);
3413 2 : return;
3414 2 : } else if (VT == MVT::v16i8) {
3415 : SelectStore(Node, 3, AArch64::ST3Threev16b);
3416 3 : return;
3417 3 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3418 : SelectStore(Node, 3, AArch64::ST3Threev4h);
3419 3 : return;
3420 3 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3421 : SelectStore(Node, 3, AArch64::ST3Threev8h);
3422 2 : return;
3423 2 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3424 : SelectStore(Node, 3, AArch64::ST3Threev2s);
3425 4 : return;
3426 4 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3427 : SelectStore(Node, 3, AArch64::ST3Threev4s);
3428 2 : return;
3429 2 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3430 : SelectStore(Node, 3, AArch64::ST3Threev2d);
3431 3 : return;
3432 3 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3433 : SelectStore(Node, 3, AArch64::ST1Threev1d);
3434 : return;
3435 : }
3436 : break;
3437 : }
3438 4 : case Intrinsic::aarch64_neon_st4: {
3439 4 : if (VT == MVT::v8i8) {
3440 : SelectStore(Node, 4, AArch64::ST4Fourv8b);
3441 4 : return;
3442 4 : } else if (VT == MVT::v16i8) {
3443 : SelectStore(Node, 4, AArch64::ST4Fourv16b);
3444 3 : return;
3445 3 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3446 : SelectStore(Node, 4, AArch64::ST4Fourv4h);
3447 3 : return;
3448 3 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3449 : SelectStore(Node, 4, AArch64::ST4Fourv8h);
3450 2 : return;
3451 2 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3452 : SelectStore(Node, 4, AArch64::ST4Fourv2s);
3453 2 : return;
3454 2 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3455 : SelectStore(Node, 4, AArch64::ST4Fourv4s);
3456 2 : return;
3457 2 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3458 : SelectStore(Node, 4, AArch64::ST4Fourv2d);
3459 2 : return;
3460 2 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3461 : SelectStore(Node, 4, AArch64::ST1Fourv1d);
3462 : return;
3463 : }
3464 : break;
3465 : }
3466 2 : case Intrinsic::aarch64_neon_st2lane: {
3467 2 : if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3468 : SelectStoreLane(Node, 2, AArch64::ST2i8);
3469 4 : return;
3470 4 : } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3471 : VT == MVT::v8f16) {
3472 3 : SelectStoreLane(Node, 2, AArch64::ST2i16);
3473 3 : return;
3474 : } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3475 3 : VT == MVT::v2f32) {
3476 3 : SelectStoreLane(Node, 2, AArch64::ST2i32);
3477 : return;
3478 2 : } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3479 2 : VT == MVT::v1f64) {
3480 : SelectStoreLane(Node, 2, AArch64::ST2i64);
3481 2 : return;
3482 2 : }
3483 : break;
3484 2 : }
3485 2 : case Intrinsic::aarch64_neon_st3lane: {
3486 : if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3487 2 : SelectStoreLane(Node, 3, AArch64::ST3i8);
3488 2 : return;
3489 : } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3490 : VT == MVT::v8f16) {
3491 : SelectStoreLane(Node, 3, AArch64::ST3i16);
3492 : return;
3493 : } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3494 2 : VT == MVT::v2f32) {
3495 2 : SelectStoreLane(Node, 3, AArch64::ST3i32);
3496 : return;
3497 : } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3498 4 : VT == MVT::v1f64) {
3499 4 : SelectStoreLane(Node, 3, AArch64::ST3i64);
3500 : return;
3501 : }
3502 2 : break;
3503 2 : }
3504 : case Intrinsic::aarch64_neon_st4lane: {
3505 : if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3506 5 : SelectStoreLane(Node, 4, AArch64::ST4i8);
3507 5 : return;
3508 : } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3509 : VT == MVT::v8f16) {
3510 : SelectStoreLane(Node, 4, AArch64::ST4i16);
3511 : return;
3512 : } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3513 2 : VT == MVT::v2f32) {
3514 2 : SelectStoreLane(Node, 4, AArch64::ST4i32);
3515 : return;
3516 : } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3517 4 : VT == MVT::v1f64) {
3518 4 : SelectStoreLane(Node, 4, AArch64::ST4i64);
3519 : return;
3520 : }
3521 2 : break;
3522 2 : }
3523 : }
3524 : break;
3525 3 : }
3526 3 : case AArch64ISD::LD2post: {
3527 : if (VT == MVT::v8i8) {
3528 : SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
3529 : return;
3530 : } else if (VT == MVT::v16i8) {
3531 : SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
3532 2 : return;
3533 2 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3534 : SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
3535 : return;
3536 4 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3537 4 : SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
3538 : return;
3539 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3540 2 : SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
3541 2 : return;
3542 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3543 : SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
3544 3 : return;
3545 3 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3546 : SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3547 : return;
3548 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3549 : SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
3550 : return;
3551 : }
3552 : break;
3553 : }
3554 2 : case AArch64ISD::LD3post: {
3555 2 : if (VT == MVT::v8i8) {
3556 : SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
3557 2 : return;
3558 2 : } else if (VT == MVT::v16i8) {
3559 : SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
3560 2 : return;
3561 2 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3562 : SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
3563 2 : return;
3564 2 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3565 : SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
3566 4 : return;
3567 4 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3568 : SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
3569 4 : return;
3570 4 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3571 : SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
3572 4 : return;
3573 4 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3574 : SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3575 4 : return;
3576 4 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3577 : SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
3578 : return;
3579 : }
3580 : break;
3581 : }
3582 2 : case AArch64ISD::LD4post: {
3583 2 : if (VT == MVT::v8i8) {
3584 : SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
3585 2 : return;
3586 2 : } else if (VT == MVT::v16i8) {
3587 : SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
3588 2 : return;
3589 2 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3590 : SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
3591 2 : return;
3592 2 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3593 : SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
3594 4 : return;
3595 4 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3596 : SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
3597 4 : return;
3598 4 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3599 : SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
3600 4 : return;
3601 4 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3602 : SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3603 4 : return;
3604 4 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3605 : SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
3606 : return;
3607 : }
3608 : break;
3609 : }
3610 2 : case AArch64ISD::LD1x2post: {
3611 2 : if (VT == MVT::v8i8) {
3612 : SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
3613 2 : return;
3614 2 : } else if (VT == MVT::v16i8) {
3615 : SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
3616 2 : return;
3617 2 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3618 : SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
3619 2 : return;
3620 2 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3621 : SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
3622 4 : return;
3623 4 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3624 : SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
3625 4 : return;
3626 4 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3627 : SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
3628 4 : return;
3629 4 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3630 : SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3631 4 : return;
3632 4 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3633 : SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
3634 : return;
3635 : }
3636 : break;
3637 : }
3638 2 : case AArch64ISD::LD1x3post: {
3639 2 : if (VT == MVT::v8i8) {
3640 : SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
3641 2 : return;
3642 2 : } else if (VT == MVT::v16i8) {
3643 : SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
3644 2 : return;
3645 2 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3646 : SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
3647 2 : return;
3648 2 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3649 : SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
3650 4 : return;
3651 4 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3652 : SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
3653 4 : return;
3654 4 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3655 : SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
3656 4 : return;
3657 4 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3658 : SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3659 4 : return;
3660 4 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3661 : SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
3662 : return;
3663 : }
3664 : break;
3665 : }
3666 2 : case AArch64ISD::LD1x4post: {
3667 2 : if (VT == MVT::v8i8) {
3668 : SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
3669 2 : return;
3670 2 : } else if (VT == MVT::v16i8) {
3671 : SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
3672 2 : return;
3673 2 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3674 : SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
3675 2 : return;
3676 2 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3677 : SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
3678 4 : return;
3679 4 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3680 : SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
3681 4 : return;
3682 4 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3683 : SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
3684 4 : return;
3685 4 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3686 : SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3687 4 : return;
3688 4 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3689 : SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
3690 : return;
3691 : }
3692 : break;
3693 : }
3694 2 : case AArch64ISD::LD1DUPpost: {
3695 2 : if (VT == MVT::v8i8) {
3696 : SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
3697 2 : return;
3698 2 : } else if (VT == MVT::v16i8) {
3699 : SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
3700 2 : return;
3701 2 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3702 : SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
3703 2 : return;
3704 2 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3705 : SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
3706 4 : return;
3707 4 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3708 : SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
3709 4 : return;
3710 4 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3711 : SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
3712 4 : return;
3713 4 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3714 : SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
3715 4 : return;
3716 4 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3717 : SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
3718 : return;
3719 : }
3720 : break;
3721 : }
3722 2 : case AArch64ISD::LD2DUPpost: {
3723 2 : if (VT == MVT::v8i8) {
3724 : SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
3725 2 : return;
3726 2 : } else if (VT == MVT::v16i8) {
3727 : SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
3728 2 : return;
3729 2 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3730 : SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
3731 2 : return;
3732 2 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3733 : SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
3734 4 : return;
3735 4 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3736 : SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
3737 4 : return;
3738 4 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3739 : SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
3740 0 : return;
3741 0 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3742 : SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
3743 4 : return;
3744 4 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3745 : SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
3746 : return;
3747 : }
3748 : break;
3749 : }
3750 2 : case AArch64ISD::LD3DUPpost: {
3751 2 : if (VT == MVT::v8i8) {
3752 : SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
3753 2 : return;
3754 2 : } else if (VT == MVT::v16i8) {
3755 : SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
3756 2 : return;
3757 2 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3758 : SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
3759 2 : return;
3760 2 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3761 : SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
3762 4 : return;
3763 4 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3764 : SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
3765 4 : return;
3766 4 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3767 : SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
3768 4 : return;
3769 4 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3770 : SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
3771 4 : return;
3772 4 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3773 : SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
3774 : return;
3775 : }
3776 : break;
3777 : }
3778 2 : case AArch64ISD::LD4DUPpost: {
3779 2 : if (VT == MVT::v8i8) {
3780 : SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
3781 2 : return;
3782 2 : } else if (VT == MVT::v16i8) {
3783 : SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
3784 2 : return;
3785 2 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3786 : SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
3787 2 : return;
3788 2 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3789 : SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
3790 4 : return;
3791 4 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3792 : SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
3793 4 : return;
3794 4 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3795 : SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
3796 4 : return;
3797 4 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3798 : SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
3799 4 : return;
3800 4 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3801 : SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
3802 : return;
3803 : }
3804 : break;
3805 : }
3806 2 : case AArch64ISD::LD1LANEpost: {
3807 2 : if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3808 : SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
3809 2 : return;
3810 2 : } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3811 : VT == MVT::v8f16) {
3812 2 : SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
3813 2 : return;
3814 : } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3815 2 : VT == MVT::v2f32) {
3816 2 : SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
3817 : return;
3818 4 : } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3819 4 : VT == MVT::v1f64) {
3820 : SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
3821 4 : return;
3822 4 : }
3823 : break;
3824 4 : }
3825 4 : case AArch64ISD::LD2LANEpost: {
3826 : if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3827 4 : SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
3828 4 : return;
3829 : } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3830 : VT == MVT::v8f16) {
3831 : SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
3832 : return;
3833 : } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3834 4 : VT == MVT::v2f32) {
3835 4 : SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
3836 : return;
3837 : } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3838 5 : VT == MVT::v1f64) {
3839 5 : SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
3840 : return;
3841 : }
3842 8 : break;
3843 8 : }
3844 : case AArch64ISD::LD3LANEpost: {
3845 : if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3846 5 : SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
3847 5 : return;
3848 : } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3849 : VT == MVT::v8f16) {
3850 : SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
3851 : return;
3852 : } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3853 4 : VT == MVT::v2f32) {
3854 4 : SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
3855 : return;
3856 : } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3857 4 : VT == MVT::v1f64) {
3858 4 : SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
3859 : return;
3860 : }
3861 8 : break;
3862 8 : }
3863 : case AArch64ISD::LD4LANEpost: {
3864 : if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3865 8 : SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
3866 8 : return;
3867 : } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3868 : VT == MVT::v8f16) {
3869 : SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
3870 : return;
3871 : } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3872 4 : VT == MVT::v2f32) {
3873 4 : SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
3874 : return;
3875 : } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3876 4 : VT == MVT::v1f64) {
3877 4 : SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
3878 : return;
3879 : }
3880 8 : break;
3881 8 : }
3882 : case AArch64ISD::ST2post: {
3883 : VT = Node->getOperand(1).getValueType();
3884 8 : if (VT == MVT::v8i8) {
3885 8 : SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
3886 : return;
3887 : } else if (VT == MVT::v16i8) {
3888 : SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
3889 : return;
3890 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3891 4 : SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
3892 4 : return;
3893 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3894 : SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
3895 4 : return;
3896 4 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3897 : SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
3898 : return;
3899 8 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3900 8 : SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
3901 : return;
3902 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3903 8 : SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
3904 8 : return;
3905 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3906 : SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3907 : return;
3908 24 : }
3909 24 : break;
3910 : }
3911 2 : case AArch64ISD::ST3post: {
3912 2 : VT = Node->getOperand(1).getValueType();
3913 : if (VT == MVT::v8i8) {
3914 2 : SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
3915 2 : return;
3916 : } else if (VT == MVT::v16i8) {
3917 2 : SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
3918 2 : return;
3919 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3920 2 : SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
3921 2 : return;
3922 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3923 4 : SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
3924 4 : return;
3925 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3926 4 : SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
3927 4 : return;
3928 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3929 4 : SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
3930 4 : return;
3931 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3932 4 : SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
3933 4 : return;
3934 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3935 : SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
3936 : return;
3937 24 : }
3938 24 : break;
3939 : }
3940 2 : case AArch64ISD::ST4post: {
3941 2 : VT = Node->getOperand(1).getValueType();
3942 : if (VT == MVT::v8i8) {
3943 2 : SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
3944 2 : return;
3945 : } else if (VT == MVT::v16i8) {
3946 2 : SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
3947 2 : return;
3948 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3949 2 : SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
3950 2 : return;
3951 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3952 4 : SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
3953 4 : return;
3954 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3955 4 : SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
3956 4 : return;
3957 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3958 4 : SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
3959 4 : return;
3960 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3961 4 : SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
3962 4 : return;
3963 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3964 : SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
3965 : return;
3966 24 : }
3967 24 : break;
3968 : }
3969 2 : case AArch64ISD::ST1x2post: {
3970 2 : VT = Node->getOperand(1).getValueType();
3971 : if (VT == MVT::v8i8) {
3972 2 : SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
3973 2 : return;
3974 : } else if (VT == MVT::v16i8) {
3975 2 : SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
3976 2 : return;
3977 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3978 2 : SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
3979 2 : return;
3980 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3981 4 : SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
3982 4 : return;
3983 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3984 4 : SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
3985 4 : return;
3986 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3987 4 : SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
3988 4 : return;
3989 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3990 4 : SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3991 4 : return;
3992 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3993 : SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
3994 : return;
3995 24 : }
3996 24 : break;
3997 : }
3998 2 : case AArch64ISD::ST1x3post: {
3999 2 : VT = Node->getOperand(1).getValueType();
4000 : if (VT == MVT::v8i8) {
4001 2 : SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
4002 2 : return;
4003 : } else if (VT == MVT::v16i8) {
4004 2 : SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
4005 2 : return;
4006 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4007 2 : SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
4008 2 : return;
4009 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4010 4 : SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
4011 4 : return;
4012 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4013 4 : SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
4014 4 : return;
4015 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4016 4 : SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
4017 4 : return;
4018 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4019 4 : SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4020 4 : return;
4021 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4022 : SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
4023 : return;
4024 24 : }
4025 24 : break;
4026 : }
4027 2 : case AArch64ISD::ST1x4post: {
4028 2 : VT = Node->getOperand(1).getValueType();
4029 : if (VT == MVT::v8i8) {
4030 2 : SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
4031 2 : return;
4032 : } else if (VT == MVT::v16i8) {
4033 2 : SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
4034 2 : return;
4035 : } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4036 2 : SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
4037 2 : return;
4038 : } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4039 4 : SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
4040 4 : return;
4041 : } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4042 4 : SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
4043 4 : return;
4044 : } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4045 4 : SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
4046 4 : return;
4047 : } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4048 4 : SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4049 4 : return;
4050 : } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4051 : SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
4052 : return;
4053 24 : }
4054 24 : break;
4055 : }
4056 2 : case AArch64ISD::ST2LANEpost: {
4057 2 : VT = Node->getOperand(1).getValueType();
4058 : if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4059 2 : SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
4060 2 : return;
4061 : } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4062 2 : VT == MVT::v8f16) {
4063 2 : SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
4064 : return;
4065 2 : } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4066 2 : VT == MVT::v2f32) {
4067 : SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
4068 4 : return;
4069 4 : } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4070 : VT == MVT::v1f64) {
4071 4 : SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
4072 4 : return;
4073 : }
4074 4 : break;
4075 4 : }
4076 : case AArch64ISD::ST3LANEpost: {
4077 4 : VT = Node->getOperand(1).getValueType();
4078 4 : if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4079 : SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
4080 : return;
4081 : } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4082 24 : VT == MVT::v8f16) {
4083 24 : SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
4084 : return;
4085 4 : } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4086 4 : VT == MVT::v2f32) {
4087 : SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
4088 : return;
4089 4 : } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4090 4 : VT == MVT::v1f64) {
4091 : SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
4092 : return;
4093 8 : }
4094 8 : break;
4095 : }
4096 : case AArch64ISD::ST4LANEpost: {
4097 8 : VT = Node->getOperand(1).getValueType();
4098 8 : if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4099 : SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
4100 : return;
4101 : } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4102 24 : VT == MVT::v8f16) {
4103 24 : SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
4104 : return;
4105 4 : } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4106 4 : VT == MVT::v2f32) {
4107 : SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
4108 : return;
4109 4 : } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4110 4 : VT == MVT::v1f64) {
4111 : SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
4112 : return;
4113 8 : }
4114 8 : break;
4115 : }
4116 : }
4117 8 :
4118 8 : // Select the default instruction
4119 : SelectCode(Node);
4120 : }
4121 :
4122 24 : /// createAArch64ISelDag - This pass converts a legalized DAG into a
4123 24 : /// AArch64-specific DAG, ready for instruction scheduling.
4124 : FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
4125 4 : CodeGenOpt::Level OptLevel) {
4126 4 : return new AArch64DAGToDAGISel(TM, OptLevel);
4127 : }
|