LLVM  9.0.0svn
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the AArch64 target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64TargetMachine.h"
15 #include "llvm/ADT/APSInt.h"
17 #include "llvm/IR/Function.h" // To access function attributes.
18 #include "llvm/IR/GlobalValue.h"
19 #include "llvm/IR/Intrinsics.h"
20 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/KnownBits.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "aarch64-isel"
29 
30 //===--------------------------------------------------------------------===//
31 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
32 /// instructions for SelectionDAG operations.
33 ///
34 namespace {
35 
36 class AArch64DAGToDAGISel : public SelectionDAGISel {
37 
38  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
39  /// make the right decision when generating code for different targets.
40  const AArch64Subtarget *Subtarget;
41 
42  bool ForCodeSize;
43 
44 public:
45  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
46  CodeGenOpt::Level OptLevel)
47  : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
48  ForCodeSize(false) {}
49 
50  StringRef getPassName() const override {
51  return "AArch64 Instruction Selection";
52  }
53 
54  bool runOnMachineFunction(MachineFunction &MF) override {
55  ForCodeSize = MF.getFunction().hasOptSize();
56  Subtarget = &MF.getSubtarget<AArch64Subtarget>();
58  }
59 
60  void Select(SDNode *Node) override;
61 
62  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
63  /// inline asm expressions.
64  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
65  unsigned ConstraintID,
66  std::vector<SDValue> &OutOps) override;
67 
68  bool tryMLAV64LaneV128(SDNode *N);
69  bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
70  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
71  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
72  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
74  return SelectShiftedRegister(N, false, Reg, Shift);
75  }
76  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77  return SelectShiftedRegister(N, true, Reg, Shift);
78  }
79  bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
80  return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
81  }
82  bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
83  return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
84  }
85  bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
86  return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
87  }
88  bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
89  return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
90  }
91  bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
92  return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
93  }
94  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
95  return SelectAddrModeIndexed(N, 1, Base, OffImm);
96  }
97  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
98  return SelectAddrModeIndexed(N, 2, Base, OffImm);
99  }
100  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
101  return SelectAddrModeIndexed(N, 4, Base, OffImm);
102  }
103  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
104  return SelectAddrModeIndexed(N, 8, Base, OffImm);
105  }
106  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
107  return SelectAddrModeIndexed(N, 16, Base, OffImm);
108  }
109  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
110  return SelectAddrModeUnscaled(N, 1, Base, OffImm);
111  }
112  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
113  return SelectAddrModeUnscaled(N, 2, Base, OffImm);
114  }
115  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
116  return SelectAddrModeUnscaled(N, 4, Base, OffImm);
117  }
118  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
119  return SelectAddrModeUnscaled(N, 8, Base, OffImm);
120  }
121  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
122  return SelectAddrModeUnscaled(N, 16, Base, OffImm);
123  }
124 
125  template<int Width>
126  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
127  SDValue &SignExtend, SDValue &DoShift) {
128  return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
129  }
130 
131  template<int Width>
132  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
133  SDValue &SignExtend, SDValue &DoShift) {
134  return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
135  }
136 
137 
138  /// Form sequences of consecutive 64/128-bit registers for use in NEON
139  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
140  /// between 1 and 4 elements. If it contains a single element that is returned
141  /// unchanged; otherwise a REG_SEQUENCE value is returned.
142  SDValue createDTuple(ArrayRef<SDValue> Vecs);
143  SDValue createQTuple(ArrayRef<SDValue> Vecs);
144 
145  /// Generic helper for the createDTuple/createQTuple
146  /// functions. Those should almost always be called instead.
147  SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
148  const unsigned SubRegs[]);
149 
150  void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
151 
152  bool tryIndexedLoad(SDNode *N);
153 
154  void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
155  unsigned SubRegIdx);
156  void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
157  unsigned SubRegIdx);
158  void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
159  void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
160 
161  void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
162  void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
163  void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
164  void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
165 
166  bool tryBitfieldExtractOp(SDNode *N);
167  bool tryBitfieldExtractOpFromSExt(SDNode *N);
168  bool tryBitfieldInsertOp(SDNode *N);
169  bool tryBitfieldInsertInZeroOp(SDNode *N);
170  bool tryShiftAmountMod(SDNode *N);
171 
172  bool tryReadRegister(SDNode *N);
173  bool tryWriteRegister(SDNode *N);
174 
175 // Include the pieces autogenerated from the target description.
176 #include "AArch64GenDAGISel.inc"
177 
178 private:
179  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
180  SDValue &Shift);
181  bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
182  SDValue &OffImm);
183  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
184  SDValue &OffImm);
185  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
186  SDValue &OffImm);
187  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
188  SDValue &Offset, SDValue &SignExtend,
189  SDValue &DoShift);
190  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
191  SDValue &Offset, SDValue &SignExtend,
192  SDValue &DoShift);
193  bool isWorthFolding(SDValue V) const;
194  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
195  SDValue &Offset, SDValue &SignExtend);
196 
197  template<unsigned RegWidth>
198  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
199  return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
200  }
201 
202  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
203 
204  bool SelectCMP_SWAP(SDNode *N);
205 
206 };
207 } // end anonymous namespace
208 
209 /// isIntImmediate - This method tests to see if the node is a constant
210 /// operand. If so Imm will receive the 32-bit value.
211 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
212  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
213  Imm = C->getZExtValue();
214  return true;
215  }
216  return false;
217 }
218 
219 // isIntImmediate - This method tests to see if a constant operand.
220 // If so Imm will receive the value.
221 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
222  return isIntImmediate(N.getNode(), Imm);
223 }
224 
225 // isOpcWithIntImmediate - This method tests to see if the node is a specific
226 // opcode and that it has a immediate integer right operand.
227 // If so Imm will receive the 32 bit value.
228 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
229  uint64_t &Imm) {
230  return N->getOpcode() == Opc &&
231  isIntImmediate(N->getOperand(1).getNode(), Imm);
232 }
233 
234 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
235  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
236  switch(ConstraintID) {
237  default:
238  llvm_unreachable("Unexpected asm memory constraint");
242  // We need to make sure that this one operand does not end up in XZR, thus
243  // require the address to be in a PointerRegClass register.
244  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
245  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
246  SDLoc dl(Op);
247  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
248  SDValue NewOp =
249  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
250  dl, Op.getValueType(),
251  Op, RC), 0);
252  OutOps.push_back(NewOp);
253  return false;
254  }
255  return true;
256 }
257 
258 /// SelectArithImmed - Select an immediate value that can be represented as
259 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
260 /// Val set to the 12-bit value and Shift set to the shifter operand.
261 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
262  SDValue &Shift) {
263  // This function is called from the addsub_shifted_imm ComplexPattern,
264  // which lists [imm] as the list of opcode it's interested in, however
265  // we still need to check whether the operand is actually an immediate
266  // here because the ComplexPattern opcode list is only used in
267  // root-level opcode matching.
268  if (!isa<ConstantSDNode>(N.getNode()))
269  return false;
270 
271  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
272  unsigned ShiftAmt;
273 
274  if (Immed >> 12 == 0) {
275  ShiftAmt = 0;
276  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
277  ShiftAmt = 12;
278  Immed = Immed >> 12;
279  } else
280  return false;
281 
282  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
283  SDLoc dl(N);
284  Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
285  Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
286  return true;
287 }
288 
289 /// SelectNegArithImmed - As above, but negates the value before trying to
290 /// select it.
291 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
292  SDValue &Shift) {
293  // This function is called from the addsub_shifted_imm ComplexPattern,
294  // which lists [imm] as the list of opcode it's interested in, however
295  // we still need to check whether the operand is actually an immediate
296  // here because the ComplexPattern opcode list is only used in
297  // root-level opcode matching.
298  if (!isa<ConstantSDNode>(N.getNode()))
299  return false;
300 
301  // The immediate operand must be a 24-bit zero-extended immediate.
302  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
303 
304  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
305  // have the opposite effect on the C flag, so this pattern mustn't match under
306  // those circumstances.
307  if (Immed == 0)
308  return false;
309 
310  if (N.getValueType() == MVT::i32)
311  Immed = ~((uint32_t)Immed) + 1;
312  else
313  Immed = ~Immed + 1ULL;
314  if (Immed & 0xFFFFFFFFFF000000ULL)
315  return false;
316 
317  Immed &= 0xFFFFFFULL;
318  return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
319  Shift);
320 }
321 
322 /// getShiftTypeForNode - Translate a shift node to the corresponding
323 /// ShiftType value.
325  switch (N.getOpcode()) {
326  default:
328  case ISD::SHL:
329  return AArch64_AM::LSL;
330  case ISD::SRL:
331  return AArch64_AM::LSR;
332  case ISD::SRA:
333  return AArch64_AM::ASR;
334  case ISD::ROTR:
335  return AArch64_AM::ROR;
336  }
337 }
338 
339 /// Determine whether it is worth it to fold SHL into the addressing
340 /// mode.
341 static bool isWorthFoldingSHL(SDValue V) {
342  assert(V.getOpcode() == ISD::SHL && "invalid opcode");
343  // It is worth folding logical shift of up to three places.
344  auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
345  if (!CSD)
346  return false;
347  unsigned ShiftVal = CSD->getZExtValue();
348  if (ShiftVal > 3)
349  return false;
350 
351  // Check if this particular node is reused in any non-memory related
352  // operation. If yes, do not try to fold this node into the address
353  // computation, since the computation will be kept.
354  const SDNode *Node = V.getNode();
355  for (SDNode *UI : Node->uses())
356  if (!isa<MemSDNode>(*UI))
357  for (SDNode *UII : UI->uses())
358  if (!isa<MemSDNode>(*UII))
359  return false;
360  return true;
361 }
362 
363 /// Determine whether it is worth to fold V into an extended register.
364 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
365  // Trivial if we are optimizing for code size or if there is only
366  // one use of the value.
367  if (ForCodeSize || V.hasOneUse())
368  return true;
369  // If a subtarget has a fastpath LSL we can fold a logical shift into
370  // the addressing mode and save a cycle.
371  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
373  return true;
374  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
375  const SDValue LHS = V.getOperand(0);
376  const SDValue RHS = V.getOperand(1);
377  if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
378  return true;
379  if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
380  return true;
381  }
382 
383  // It hurts otherwise, since the value will be reused.
384  return false;
385 }
386 
387 /// SelectShiftedRegister - Select a "shifted register" operand. If the value
388 /// is not shifted, set the Shift operand to default of "LSL 0". The logical
389 /// instructions allow the shifted register to be rotated, but the arithmetic
390 /// instructions do not. The AllowROR parameter specifies whether ROR is
391 /// supported.
392 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
393  SDValue &Reg, SDValue &Shift) {
395  if (ShType == AArch64_AM::InvalidShiftExtend)
396  return false;
397  if (!AllowROR && ShType == AArch64_AM::ROR)
398  return false;
399 
400  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
401  unsigned BitSize = N.getValueSizeInBits();
402  unsigned Val = RHS->getZExtValue() & (BitSize - 1);
403  unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
404 
405  Reg = N.getOperand(0);
406  Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
407  return isWorthFolding(N);
408  }
409 
410  return false;
411 }
412 
413 /// getExtendTypeForNode - Translate an extend node to the corresponding
414 /// ExtendType value.
416 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
417  if (N.getOpcode() == ISD::SIGN_EXTEND ||
419  EVT SrcVT;
421  SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
422  else
423  SrcVT = N.getOperand(0).getValueType();
424 
425  if (!IsLoadStore && SrcVT == MVT::i8)
426  return AArch64_AM::SXTB;
427  else if (!IsLoadStore && SrcVT == MVT::i16)
428  return AArch64_AM::SXTH;
429  else if (SrcVT == MVT::i32)
430  return AArch64_AM::SXTW;
431  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
432 
434  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
435  N.getOpcode() == ISD::ANY_EXTEND) {
436  EVT SrcVT = N.getOperand(0).getValueType();
437  if (!IsLoadStore && SrcVT == MVT::i8)
438  return AArch64_AM::UXTB;
439  else if (!IsLoadStore && SrcVT == MVT::i16)
440  return AArch64_AM::UXTH;
441  else if (SrcVT == MVT::i32)
442  return AArch64_AM::UXTW;
443  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
444 
446  } else if (N.getOpcode() == ISD::AND) {
448  if (!CSD)
450  uint64_t AndMask = CSD->getZExtValue();
451 
452  switch (AndMask) {
453  default:
455  case 0xFF:
456  return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
457  case 0xFFFF:
458  return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
459  case 0xFFFFFFFF:
460  return AArch64_AM::UXTW;
461  }
462  }
463 
465 }
466 
467 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
468 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
469  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
471  return false;
472 
473  SDValue SV = DL->getOperand(0);
474  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
475  return false;
476 
477  SDValue EV = SV.getOperand(1);
478  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
479  return false;
480 
481  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
482  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
483  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
484  LaneOp = EV.getOperand(0);
485 
486  return true;
487 }
488 
489 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
490 // high lane extract.
491 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
492  SDValue &LaneOp, int &LaneIdx) {
493 
494  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
495  std::swap(Op0, Op1);
496  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
497  return false;
498  }
499  StdOp = Op1;
500  return true;
501 }
502 
503 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
504 /// is a lane in the upper half of a 128-bit vector. Recognize and select this
505 /// so that we don't emit unnecessary lane extracts.
506 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
507  SDLoc dl(N);
508  SDValue Op0 = N->getOperand(0);
509  SDValue Op1 = N->getOperand(1);
510  SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
511  SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
512  int LaneIdx = -1; // Will hold the lane index.
513 
514  if (Op1.getOpcode() != ISD::MUL ||
515  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
516  LaneIdx)) {
517  std::swap(Op0, Op1);
518  if (Op1.getOpcode() != ISD::MUL ||
519  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
520  LaneIdx))
521  return false;
522  }
523 
524  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
525 
526  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
527 
528  unsigned MLAOpc = ~0U;
529 
530  switch (N->getSimpleValueType(0).SimpleTy) {
531  default:
532  llvm_unreachable("Unrecognized MLA.");
533  case MVT::v4i16:
534  MLAOpc = AArch64::MLAv4i16_indexed;
535  break;
536  case MVT::v8i16:
537  MLAOpc = AArch64::MLAv8i16_indexed;
538  break;
539  case MVT::v2i32:
540  MLAOpc = AArch64::MLAv2i32_indexed;
541  break;
542  case MVT::v4i32:
543  MLAOpc = AArch64::MLAv4i32_indexed;
544  break;
545  }
546 
547  ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
548  return true;
549 }
550 
551 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
552  SDLoc dl(N);
553  SDValue SMULLOp0;
554  SDValue SMULLOp1;
555  int LaneIdx;
556 
557  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
558  LaneIdx))
559  return false;
560 
561  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
562 
563  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
564 
565  unsigned SMULLOpc = ~0U;
566 
567  if (IntNo == Intrinsic::aarch64_neon_smull) {
568  switch (N->getSimpleValueType(0).SimpleTy) {
569  default:
570  llvm_unreachable("Unrecognized SMULL.");
571  case MVT::v4i32:
572  SMULLOpc = AArch64::SMULLv4i16_indexed;
573  break;
574  case MVT::v2i64:
575  SMULLOpc = AArch64::SMULLv2i32_indexed;
576  break;
577  }
578  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
579  switch (N->getSimpleValueType(0).SimpleTy) {
580  default:
581  llvm_unreachable("Unrecognized SMULL.");
582  case MVT::v4i32:
583  SMULLOpc = AArch64::UMULLv4i16_indexed;
584  break;
585  case MVT::v2i64:
586  SMULLOpc = AArch64::UMULLv2i32_indexed;
587  break;
588  }
589  } else
590  llvm_unreachable("Unrecognized intrinsic.");
591 
592  ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
593  return true;
594 }
595 
596 /// Instructions that accept extend modifiers like UXTW expect the register
597 /// being extended to be a GPR32, but the incoming DAG might be acting on a
598 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
599 /// this is the case.
601  if (N.getValueType() == MVT::i32)
602  return N;
603 
604  SDLoc dl(N);
605  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
606  MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
607  dl, MVT::i32, N, SubReg);
608  return SDValue(Node, 0);
609 }
610 
611 
612 /// SelectArithExtendedRegister - Select a "extended register" operand. This
613 /// operand folds in an extend followed by an optional left shift.
614 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
615  SDValue &Shift) {
616  unsigned ShiftVal = 0;
618 
619  if (N.getOpcode() == ISD::SHL) {
621  if (!CSD)
622  return false;
623  ShiftVal = CSD->getZExtValue();
624  if (ShiftVal > 4)
625  return false;
626 
627  Ext = getExtendTypeForNode(N.getOperand(0));
629  return false;
630 
631  Reg = N.getOperand(0).getOperand(0);
632  } else {
633  Ext = getExtendTypeForNode(N);
635  return false;
636 
637  Reg = N.getOperand(0);
638 
639  // Don't match if free 32-bit -> 64-bit zext can be used instead.
640  if (Ext == AArch64_AM::UXTW &&
641  Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
642  return false;
643  }
644 
645  // AArch64 mandates that the RHS of the operation must use the smallest
646  // register class that could contain the size being extended from. Thus,
647  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
648  // there might not be an actual 32-bit value in the program. We can
649  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
650  assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
651  Reg = narrowIfNeeded(CurDAG, Reg);
652  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
653  MVT::i32);
654  return isWorthFolding(N);
655 }
656 
657 /// If there's a use of this ADDlow that's not itself a load/store then we'll
658 /// need to create a real ADD instruction from it anyway and there's no point in
659 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
660 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
661 /// leads to duplicated ADRP instructions.
663  for (auto Use : N->uses()) {
664  if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
665  Use->getOpcode() != ISD::ATOMIC_LOAD &&
666  Use->getOpcode() != ISD::ATOMIC_STORE)
667  return false;
668 
669  // ldar and stlr have much more restrictive addressing modes (just a
670  // register).
671  if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering()))
672  return false;
673  }
674 
675  return true;
676 }
677 
678 /// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
679 /// immediate" address. The "Size" argument is the size in bytes of the memory
680 /// reference, which determines the scale.
681 bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
682  SDValue &Base,
683  SDValue &OffImm) {
684  SDLoc dl(N);
685  const DataLayout &DL = CurDAG->getDataLayout();
686  const TargetLowering *TLI = getTargetLowering();
687  if (N.getOpcode() == ISD::FrameIndex) {
688  int FI = cast<FrameIndexSDNode>(N)->getIndex();
689  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
690  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
691  return true;
692  }
693 
694  // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
695  // selected here doesn't support labels/immediates, only base+offset.
696 
697  if (CurDAG->isBaseWithConstantOffset(N)) {
698  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
699  int64_t RHSC = RHS->getSExtValue();
700  unsigned Scale = Log2_32(Size);
701  if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) &&
702  RHSC < (0x40 << Scale)) {
703  Base = N.getOperand(0);
704  if (Base.getOpcode() == ISD::FrameIndex) {
705  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
706  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
707  }
708  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
709  return true;
710  }
711  }
712  }
713 
714  // Base only. The address will be materialized into a register before
715  // the memory is accessed.
716  // add x0, Xbase, #offset
717  // stp x1, x2, [x0]
718  Base = N;
719  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
720  return true;
721 }
722 
723 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
724 /// immediate" address. The "Size" argument is the size in bytes of the memory
725 /// reference, which determines the scale.
726 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
727  SDValue &Base, SDValue &OffImm) {
728  SDLoc dl(N);
729  const DataLayout &DL = CurDAG->getDataLayout();
730  const TargetLowering *TLI = getTargetLowering();
731  if (N.getOpcode() == ISD::FrameIndex) {
732  int FI = cast<FrameIndexSDNode>(N)->getIndex();
733  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
734  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
735  return true;
736  }
737 
739  GlobalAddressSDNode *GAN =
741  Base = N.getOperand(0);
742  OffImm = N.getOperand(1);
743  if (!GAN)
744  return true;
745 
746  if (GAN->getOffset() % Size == 0) {
747  const GlobalValue *GV = GAN->getGlobal();
748  unsigned Alignment = GV->getAlignment();
749  Type *Ty = GV->getValueType();
750  if (Alignment == 0 && Ty->isSized())
751  Alignment = DL.getABITypeAlignment(Ty);
752 
753  if (Alignment >= Size)
754  return true;
755  }
756  }
757 
758  if (CurDAG->isBaseWithConstantOffset(N)) {
759  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
760  int64_t RHSC = (int64_t)RHS->getZExtValue();
761  unsigned Scale = Log2_32(Size);
762  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
763  Base = N.getOperand(0);
764  if (Base.getOpcode() == ISD::FrameIndex) {
765  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
766  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
767  }
768  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
769  return true;
770  }
771  }
772  }
773 
774  // Before falling back to our general case, check if the unscaled
775  // instructions can handle this. If so, that's preferable.
776  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
777  return false;
778 
779  // Base only. The address will be materialized into a register before
780  // the memory is accessed.
781  // add x0, Xbase, #offset
782  // ldr x0, [x0]
783  Base = N;
784  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
785  return true;
786 }
787 
788 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
789 /// immediate" address. This should only match when there is an offset that
790 /// is not valid for a scaled immediate addressing mode. The "Size" argument
791 /// is the size in bytes of the memory reference, which is needed here to know
792 /// what is valid for a scaled immediate.
793 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
794  SDValue &Base,
795  SDValue &OffImm) {
796  if (!CurDAG->isBaseWithConstantOffset(N))
797  return false;
798  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
799  int64_t RHSC = RHS->getSExtValue();
800  // If the offset is valid as a scaled immediate, don't match here.
801  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
802  RHSC < (0x1000 << Log2_32(Size)))
803  return false;
804  if (RHSC >= -256 && RHSC < 256) {
805  Base = N.getOperand(0);
806  if (Base.getOpcode() == ISD::FrameIndex) {
807  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
808  const TargetLowering *TLI = getTargetLowering();
809  Base = CurDAG->getTargetFrameIndex(
810  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
811  }
812  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
813  return true;
814  }
815  }
816  return false;
817 }
818 
819 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
820  SDLoc dl(N);
821  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
822  SDValue ImpDef = SDValue(
823  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
824  MachineSDNode *Node = CurDAG->getMachineNode(
825  TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
826  return SDValue(Node, 0);
827 }
828 
829 /// Check if the given SHL node (\p N), can be used to form an
830 /// extended register for an addressing mode.
831 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
832  bool WantExtend, SDValue &Offset,
833  SDValue &SignExtend) {
834  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
836  if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
837  return false;
838 
839  SDLoc dl(N);
840  if (WantExtend) {
842  getExtendTypeForNode(N.getOperand(0), true);
844  return false;
845 
846  Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
847  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
848  MVT::i32);
849  } else {
850  Offset = N.getOperand(0);
851  SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
852  }
853 
854  unsigned LegalShiftVal = Log2_32(Size);
855  unsigned ShiftVal = CSD->getZExtValue();
856 
857  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
858  return false;
859 
860  return isWorthFolding(N);
861 }
862 
863 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
864  SDValue &Base, SDValue &Offset,
865  SDValue &SignExtend,
866  SDValue &DoShift) {
867  if (N.getOpcode() != ISD::ADD)
868  return false;
869  SDValue LHS = N.getOperand(0);
870  SDValue RHS = N.getOperand(1);
871  SDLoc dl(N);
872 
873  // We don't want to match immediate adds here, because they are better lowered
874  // to the register-immediate addressing modes.
875  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
876  return false;
877 
878  // Check if this particular node is reused in any non-memory related
879  // operation. If yes, do not try to fold this node into the address
880  // computation, since the computation will be kept.
881  const SDNode *Node = N.getNode();
882  for (SDNode *UI : Node->uses()) {
883  if (!isa<MemSDNode>(*UI))
884  return false;
885  }
886 
887  // Remember if it is worth folding N when it produces extended register.
888  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
889 
890  // Try to match a shifted extend on the RHS.
891  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
892  SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
893  Base = LHS;
894  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
895  return true;
896  }
897 
898  // Try to match a shifted extend on the LHS.
899  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
900  SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
901  Base = RHS;
902  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
903  return true;
904  }
905 
906  // There was no shift, whatever else we find.
907  DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
908 
910  // Try to match an unshifted extend on the LHS.
911  if (IsExtendedRegisterWorthFolding &&
912  (Ext = getExtendTypeForNode(LHS, true)) !=
914  Base = RHS;
915  Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
916  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
917  MVT::i32);
918  if (isWorthFolding(LHS))
919  return true;
920  }
921 
922  // Try to match an unshifted extend on the RHS.
923  if (IsExtendedRegisterWorthFolding &&
924  (Ext = getExtendTypeForNode(RHS, true)) !=
926  Base = LHS;
927  Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
928  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
929  MVT::i32);
930  if (isWorthFolding(RHS))
931  return true;
932  }
933 
934  return false;
935 }
936 
937 // Check if the given immediate is preferred by ADD. If an immediate can be
938 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
939 // encoded by one MOVZ, return true.
940 static bool isPreferredADD(int64_t ImmOff) {
941  // Constant in [0x0, 0xfff] can be encoded in ADD.
942  if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
943  return true;
944  // Check if it can be encoded in an "ADD LSL #12".
945  if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
946  // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
947  return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
948  (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
949  return false;
950 }
951 
952 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
953  SDValue &Base, SDValue &Offset,
954  SDValue &SignExtend,
955  SDValue &DoShift) {
956  if (N.getOpcode() != ISD::ADD)
957  return false;
958  SDValue LHS = N.getOperand(0);
959  SDValue RHS = N.getOperand(1);
960  SDLoc DL(N);
961 
962  // Check if this particular node is reused in any non-memory related
963  // operation. If yes, do not try to fold this node into the address
964  // computation, since the computation will be kept.
965  const SDNode *Node = N.getNode();
966  for (SDNode *UI : Node->uses()) {
967  if (!isa<MemSDNode>(*UI))
968  return false;
969  }
970 
971  // Watch out if RHS is a wide immediate, it can not be selected into
972  // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
973  // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
974  // instructions like:
975  // MOV X0, WideImmediate
976  // ADD X1, BaseReg, X0
977  // LDR X2, [X1, 0]
978  // For such situation, using [BaseReg, XReg] addressing mode can save one
979  // ADD/SUB:
980  // MOV X0, WideImmediate
981  // LDR X2, [BaseReg, X0]
982  if (isa<ConstantSDNode>(RHS)) {
983  int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
984  unsigned Scale = Log2_32(Size);
985  // Skip the immediate can be selected by load/store addressing mode.
986  // Also skip the immediate can be encoded by a single ADD (SUB is also
987  // checked by using -ImmOff).
988  if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
989  isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
990  return false;
991 
992  SDValue Ops[] = { RHS };
993  SDNode *MOVI =
994  CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
995  SDValue MOVIV = SDValue(MOVI, 0);
996  // This ADD of two X register will be selected into [Reg+Reg] mode.
997  N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
998  }
999 
1000  // Remember if it is worth folding N when it produces extended register.
1001  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1002 
1003  // Try to match a shifted extend on the RHS.
1004  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1005  SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1006  Base = LHS;
1007  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1008  return true;
1009  }
1010 
1011  // Try to match a shifted extend on the LHS.
1012  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1013  SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1014  Base = RHS;
1015  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1016  return true;
1017  }
1018 
1019  // Match any non-shifted, non-extend, non-immediate add expression.
1020  Base = LHS;
1021  Offset = RHS;
1022  SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1023  DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1024  // Reg1 + Reg2 is free: no check needed.
1025  return true;
1026 }
1027 
1028 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1029  static const unsigned RegClassIDs[] = {
1030  AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1031  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1032  AArch64::dsub2, AArch64::dsub3};
1033 
1034  return createTuple(Regs, RegClassIDs, SubRegs);
1035 }
1036 
1037 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1038  static const unsigned RegClassIDs[] = {
1039  AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1040  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1041  AArch64::qsub2, AArch64::qsub3};
1042 
1043  return createTuple(Regs, RegClassIDs, SubRegs);
1044 }
1045 
1046 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1047  const unsigned RegClassIDs[],
1048  const unsigned SubRegs[]) {
1049  // There's no special register-class for a vector-list of 1 element: it's just
1050  // a vector.
1051  if (Regs.size() == 1)
1052  return Regs[0];
1053 
1054  assert(Regs.size() >= 2 && Regs.size() <= 4);
1055 
1056  SDLoc DL(Regs[0]);
1057 
1059 
1060  // First operand of REG_SEQUENCE is the desired RegClass.
1061  Ops.push_back(
1062  CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1063 
1064  // Then we get pairs of source & subregister-position for the components.
1065  for (unsigned i = 0; i < Regs.size(); ++i) {
1066  Ops.push_back(Regs[i]);
1067  Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1068  }
1069 
1070  SDNode *N =
1071  CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1072  return SDValue(N, 0);
1073 }
1074 
1075 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1076  bool isExt) {
1077  SDLoc dl(N);
1078  EVT VT = N->getValueType(0);
1079 
1080  unsigned ExtOff = isExt;
1081 
1082  // Form a REG_SEQUENCE to force register allocation.
1083  unsigned Vec0Off = ExtOff + 1;
1084  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1085  N->op_begin() + Vec0Off + NumVecs);
1086  SDValue RegSeq = createQTuple(Regs);
1087 
1089  if (isExt)
1090  Ops.push_back(N->getOperand(1));
1091  Ops.push_back(RegSeq);
1092  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1093  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1094 }
1095 
1096 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1097  LoadSDNode *LD = cast<LoadSDNode>(N);
1098  if (LD->isUnindexed())
1099  return false;
1100  EVT VT = LD->getMemoryVT();
1101  EVT DstVT = N->getValueType(0);
1103  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1104 
1105  // We're not doing validity checking here. That was done when checking
1106  // if we should mark the load as indexed or not. We're just selecting
1107  // the right instruction.
1108  unsigned Opcode = 0;
1109 
1111  bool InsertTo64 = false;
1112  if (VT == MVT::i64)
1113  Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1114  else if (VT == MVT::i32) {
1115  if (ExtType == ISD::NON_EXTLOAD)
1116  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1117  else if (ExtType == ISD::SEXTLOAD)
1118  Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1119  else {
1120  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1121  InsertTo64 = true;
1122  // The result of the load is only i32. It's the subreg_to_reg that makes
1123  // it into an i64.
1124  DstVT = MVT::i32;
1125  }
1126  } else if (VT == MVT::i16) {
1127  if (ExtType == ISD::SEXTLOAD) {
1128  if (DstVT == MVT::i64)
1129  Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1130  else
1131  Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1132  } else {
1133  Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1134  InsertTo64 = DstVT == MVT::i64;
1135  // The result of the load is only i32. It's the subreg_to_reg that makes
1136  // it into an i64.
1137  DstVT = MVT::i32;
1138  }
1139  } else if (VT == MVT::i8) {
1140  if (ExtType == ISD::SEXTLOAD) {
1141  if (DstVT == MVT::i64)
1142  Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1143  else
1144  Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1145  } else {
1146  Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1147  InsertTo64 = DstVT == MVT::i64;
1148  // The result of the load is only i32. It's the subreg_to_reg that makes
1149  // it into an i64.
1150  DstVT = MVT::i32;
1151  }
1152  } else if (VT == MVT::f16) {
1153  Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1154  } else if (VT == MVT::f32) {
1155  Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1156  } else if (VT == MVT::f64 || VT.is64BitVector()) {
1157  Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1158  } else if (VT.is128BitVector()) {
1159  Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1160  } else
1161  return false;
1162  SDValue Chain = LD->getChain();
1163  SDValue Base = LD->getBasePtr();
1164  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1165  int OffsetVal = (int)OffsetOp->getZExtValue();
1166  SDLoc dl(N);
1167  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1168  SDValue Ops[] = { Base, Offset, Chain };
1169  SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1170  MVT::Other, Ops);
1171  // Either way, we're replacing the node, so tell the caller that.
1172  SDValue LoadedVal = SDValue(Res, 1);
1173  if (InsertTo64) {
1174  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1175  LoadedVal =
1176  SDValue(CurDAG->getMachineNode(
1177  AArch64::SUBREG_TO_REG, dl, MVT::i64,
1178  CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1179  SubReg),
1180  0);
1181  }
1182 
1183  ReplaceUses(SDValue(N, 0), LoadedVal);
1184  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1185  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1186  CurDAG->RemoveDeadNode(N);
1187  return true;
1188 }
1189 
1190 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1191  unsigned SubRegIdx) {
1192  SDLoc dl(N);
1193  EVT VT = N->getValueType(0);
1194  SDValue Chain = N->getOperand(0);
1195 
1196  SDValue Ops[] = {N->getOperand(2), // Mem operand;
1197  Chain};
1198 
1199  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1200 
1201  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1202  SDValue SuperReg = SDValue(Ld, 0);
1203  for (unsigned i = 0; i < NumVecs; ++i)
1204  ReplaceUses(SDValue(N, i),
1205  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1206 
1207  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1208 
1209  // Transfer memoperands.
1210  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1211  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1212 
1213  CurDAG->RemoveDeadNode(N);
1214 }
1215 
1216 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1217  unsigned Opc, unsigned SubRegIdx) {
1218  SDLoc dl(N);
1219  EVT VT = N->getValueType(0);
1220  SDValue Chain = N->getOperand(0);
1221 
1222  SDValue Ops[] = {N->getOperand(1), // Mem operand
1223  N->getOperand(2), // Incremental
1224  Chain};
1225 
1226  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1228 
1229  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1230 
1231  // Update uses of write back register
1232  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1233 
1234  // Update uses of vector list
1235  SDValue SuperReg = SDValue(Ld, 1);
1236  if (NumVecs == 1)
1237  ReplaceUses(SDValue(N, 0), SuperReg);
1238  else
1239  for (unsigned i = 0; i < NumVecs; ++i)
1240  ReplaceUses(SDValue(N, i),
1241  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1242 
1243  // Update the chain
1244  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1245  CurDAG->RemoveDeadNode(N);
1246 }
1247 
1248 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1249  unsigned Opc) {
1250  SDLoc dl(N);
1251  EVT VT = N->getOperand(2)->getValueType(0);
1252 
1253  // Form a REG_SEQUENCE to force register allocation.
1254  bool Is128Bit = VT.getSizeInBits() == 128;
1255  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1256  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1257 
1258  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1259  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1260 
1261  // Transfer memoperands.
1262  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1263  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1264 
1265  ReplaceNode(N, St);
1266 }
1267 
1268 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1269  unsigned Opc) {
1270  SDLoc dl(N);
1271  EVT VT = N->getOperand(2)->getValueType(0);
1272  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1273  MVT::Other}; // Type for the Chain
1274 
1275  // Form a REG_SEQUENCE to force register allocation.
1276  bool Is128Bit = VT.getSizeInBits() == 128;
1277  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1278  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1279 
1280  SDValue Ops[] = {RegSeq,
1281  N->getOperand(NumVecs + 1), // base register
1282  N->getOperand(NumVecs + 2), // Incremental
1283  N->getOperand(0)}; // Chain
1284  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1285 
1286  ReplaceNode(N, St);
1287 }
1288 
1289 namespace {
1290 /// WidenVector - Given a value in the V64 register class, produce the
1291 /// equivalent value in the V128 register class.
1292 class WidenVector {
1293  SelectionDAG &DAG;
1294 
1295 public:
1296  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1297 
1298  SDValue operator()(SDValue V64Reg) {
1299  EVT VT = V64Reg.getValueType();
1300  unsigned NarrowSize = VT.getVectorNumElements();
1301  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1302  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1303  SDLoc DL(V64Reg);
1304 
1305  SDValue Undef =
1306  SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1307  return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1308  }
1309 };
1310 } // namespace
1311 
1312 /// NarrowVector - Given a value in the V128 register class, produce the
1313 /// equivalent value in the V64 register class.
1314 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1315  EVT VT = V128Reg.getValueType();
1316  unsigned WideSize = VT.getVectorNumElements();
1317  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1318  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1319 
1320  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1321  V128Reg);
1322 }
1323 
1324 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1325  unsigned Opc) {
1326  SDLoc dl(N);
1327  EVT VT = N->getValueType(0);
1328  bool Narrow = VT.getSizeInBits() == 64;
1329 
1330  // Form a REG_SEQUENCE to force register allocation.
1331  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1332 
1333  if (Narrow)
1334  transform(Regs, Regs.begin(),
1335  WidenVector(*CurDAG));
1336 
1337  SDValue RegSeq = createQTuple(Regs);
1338 
1339  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1340 
1341  unsigned LaneNo =
1342  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1343 
1344  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1345  N->getOperand(NumVecs + 3), N->getOperand(0)};
1346  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1347  SDValue SuperReg = SDValue(Ld, 0);
1348 
1349  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1350  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1351  AArch64::qsub2, AArch64::qsub3 };
1352  for (unsigned i = 0; i < NumVecs; ++i) {
1353  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1354  if (Narrow)
1355  NV = NarrowVector(NV, *CurDAG);
1356  ReplaceUses(SDValue(N, i), NV);
1357  }
1358 
1359  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1360  CurDAG->RemoveDeadNode(N);
1361 }
1362 
1363 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1364  unsigned Opc) {
1365  SDLoc dl(N);
1366  EVT VT = N->getValueType(0);
1367  bool Narrow = VT.getSizeInBits() == 64;
1368 
1369  // Form a REG_SEQUENCE to force register allocation.
1370  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1371 
1372  if (Narrow)
1373  transform(Regs, Regs.begin(),
1374  WidenVector(*CurDAG));
1375 
1376  SDValue RegSeq = createQTuple(Regs);
1377 
1378  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1379  RegSeq->getValueType(0), MVT::Other};
1380 
1381  unsigned LaneNo =
1382  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1383 
1384  SDValue Ops[] = {RegSeq,
1385  CurDAG->getTargetConstant(LaneNo, dl,
1386  MVT::i64), // Lane Number
1387  N->getOperand(NumVecs + 2), // Base register
1388  N->getOperand(NumVecs + 3), // Incremental
1389  N->getOperand(0)};
1390  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1391 
1392  // Update uses of the write back register
1393  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1394 
1395  // Update uses of the vector list
1396  SDValue SuperReg = SDValue(Ld, 1);
1397  if (NumVecs == 1) {
1398  ReplaceUses(SDValue(N, 0),
1399  Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1400  } else {
1401  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1402  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1403  AArch64::qsub2, AArch64::qsub3 };
1404  for (unsigned i = 0; i < NumVecs; ++i) {
1405  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1406  SuperReg);
1407  if (Narrow)
1408  NV = NarrowVector(NV, *CurDAG);
1409  ReplaceUses(SDValue(N, i), NV);
1410  }
1411  }
1412 
1413  // Update the Chain
1414  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1415  CurDAG->RemoveDeadNode(N);
1416 }
1417 
1418 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1419  unsigned Opc) {
1420  SDLoc dl(N);
1421  EVT VT = N->getOperand(2)->getValueType(0);
1422  bool Narrow = VT.getSizeInBits() == 64;
1423 
1424  // Form a REG_SEQUENCE to force register allocation.
1425  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1426 
1427  if (Narrow)
1428  transform(Regs, Regs.begin(),
1429  WidenVector(*CurDAG));
1430 
1431  SDValue RegSeq = createQTuple(Regs);
1432 
1433  unsigned LaneNo =
1434  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1435 
1436  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1437  N->getOperand(NumVecs + 3), N->getOperand(0)};
1438  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1439 
1440  // Transfer memoperands.
1441  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1442  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1443 
1444  ReplaceNode(N, St);
1445 }
1446 
1447 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1448  unsigned Opc) {
1449  SDLoc dl(N);
1450  EVT VT = N->getOperand(2)->getValueType(0);
1451  bool Narrow = VT.getSizeInBits() == 64;
1452 
1453  // Form a REG_SEQUENCE to force register allocation.
1454  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1455 
1456  if (Narrow)
1457  transform(Regs, Regs.begin(),
1458  WidenVector(*CurDAG));
1459 
1460  SDValue RegSeq = createQTuple(Regs);
1461 
1462  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1463  MVT::Other};
1464 
1465  unsigned LaneNo =
1466  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1467 
1468  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1469  N->getOperand(NumVecs + 2), // Base Register
1470  N->getOperand(NumVecs + 3), // Incremental
1471  N->getOperand(0)};
1472  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1473 
1474  // Transfer memoperands.
1475  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1476  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1477 
1478  ReplaceNode(N, St);
1479 }
1480 
1482  unsigned &Opc, SDValue &Opd0,
1483  unsigned &LSB, unsigned &MSB,
1484  unsigned NumberOfIgnoredLowBits,
1485  bool BiggerPattern) {
1486  assert(N->getOpcode() == ISD::AND &&
1487  "N must be a AND operation to call this function");
1488 
1489  EVT VT = N->getValueType(0);
1490 
1491  // Here we can test the type of VT and return false when the type does not
1492  // match, but since it is done prior to that call in the current context
1493  // we turned that into an assert to avoid redundant code.
1494  assert((VT == MVT::i32 || VT == MVT::i64) &&
1495  "Type checking must have been done before calling this function");
1496 
1497  // FIXME: simplify-demanded-bits in DAGCombine will probably have
1498  // changed the AND node to a 32-bit mask operation. We'll have to
1499  // undo that as part of the transform here if we want to catch all
1500  // the opportunities.
1501  // Currently the NumberOfIgnoredLowBits argument helps to recover
1502  // form these situations when matching bigger pattern (bitfield insert).
1503 
1504  // For unsigned extracts, check for a shift right and mask
1505  uint64_t AndImm = 0;
1506  if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
1507  return false;
1508 
1509  const SDNode *Op0 = N->getOperand(0).getNode();
1510 
1511  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1512  // simplified. Try to undo that
1513  AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
1514 
1515  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1516  if (AndImm & (AndImm + 1))
1517  return false;
1518 
1519  bool ClampMSB = false;
1520  uint64_t SrlImm = 0;
1521  // Handle the SRL + ANY_EXTEND case.
1522  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1523  isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
1524  // Extend the incoming operand of the SRL to 64-bit.
1525  Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1526  // Make sure to clamp the MSB so that we preserve the semantics of the
1527  // original operations.
1528  ClampMSB = true;
1529  } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1531  SrlImm)) {
1532  // If the shift result was truncated, we can still combine them.
1533  Opd0 = Op0->getOperand(0).getOperand(0);
1534 
1535  // Use the type of SRL node.
1536  VT = Opd0->getValueType(0);
1537  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
1538  Opd0 = Op0->getOperand(0);
1539  } else if (BiggerPattern) {
1540  // Let's pretend a 0 shift right has been performed.
1541  // The resulting code will be at least as good as the original one
1542  // plus it may expose more opportunities for bitfield insert pattern.
1543  // FIXME: Currently we limit this to the bigger pattern, because
1544  // some optimizations expect AND and not UBFM.
1545  Opd0 = N->getOperand(0);
1546  } else
1547  return false;
1548 
1549  // Bail out on large immediates. This happens when no proper
1550  // combining/constant folding was performed.
1551  if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
1552  LLVM_DEBUG(
1553  (dbgs() << N
1554  << ": Found large shift immediate, this should not happen\n"));
1555  return false;
1556  }
1557 
1558  LSB = SrlImm;
1559  MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
1560  : countTrailingOnes<uint64_t>(AndImm)) -
1561  1;
1562  if (ClampMSB)
1563  // Since we're moving the extend before the right shift operation, we need
1564  // to clamp the MSB to make sure we don't shift in undefined bits instead of
1565  // the zeros which would get shifted in with the original right shift
1566  // operation.
1567  MSB = MSB > 31 ? 31 : MSB;
1568 
1569  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1570  return true;
1571 }
1572 
1573 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
1574  SDValue &Opd0, unsigned &Immr,
1575  unsigned &Imms) {
1577 
1578  EVT VT = N->getValueType(0);
1579  unsigned BitWidth = VT.getSizeInBits();
1580  assert((VT == MVT::i32 || VT == MVT::i64) &&
1581  "Type checking must have been done before calling this function");
1582 
1583  SDValue Op = N->getOperand(0);
1584  if (Op->getOpcode() == ISD::TRUNCATE) {
1585  Op = Op->getOperand(0);
1586  VT = Op->getValueType(0);
1587  BitWidth = VT.getSizeInBits();
1588  }
1589 
1590  uint64_t ShiftImm;
1591  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
1592  !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1593  return false;
1594 
1595  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1596  if (ShiftImm + Width > BitWidth)
1597  return false;
1598 
1599  Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
1600  Opd0 = Op.getOperand(0);
1601  Immr = ShiftImm;
1602  Imms = ShiftImm + Width - 1;
1603  return true;
1604 }
1605 
1606 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
1607  SDValue &Opd0, unsigned &LSB,
1608  unsigned &MSB) {
1609  // We are looking for the following pattern which basically extracts several
1610  // continuous bits from the source value and places it from the LSB of the
1611  // destination value, all other bits of the destination value or set to zero:
1612  //
1613  // Value2 = AND Value, MaskImm
1614  // SRL Value2, ShiftImm
1615  //
1616  // with MaskImm >> ShiftImm to search for the bit width.
1617  //
1618  // This gets selected into a single UBFM:
1619  //
1620  // UBFM Value, ShiftImm, BitWide + SrlImm -1
1621  //
1622 
1623  if (N->getOpcode() != ISD::SRL)
1624  return false;
1625 
1626  uint64_t AndMask = 0;
1627  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
1628  return false;
1629 
1630  Opd0 = N->getOperand(0).getOperand(0);
1631 
1632  uint64_t SrlImm = 0;
1633  if (!isIntImmediate(N->getOperand(1), SrlImm))
1634  return false;
1635 
1636  // Check whether we really have several bits extract here.
1637  unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
1638  if (BitWide && isMask_64(AndMask >> SrlImm)) {
1639  if (N->getValueType(0) == MVT::i32)
1640  Opc = AArch64::UBFMWri;
1641  else
1642  Opc = AArch64::UBFMXri;
1643 
1644  LSB = SrlImm;
1645  MSB = BitWide + SrlImm - 1;
1646  return true;
1647  }
1648 
1649  return false;
1650 }
1651 
1652 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1653  unsigned &Immr, unsigned &Imms,
1654  bool BiggerPattern) {
1655  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1656  "N must be a SHR/SRA operation to call this function");
1657 
1658  EVT VT = N->getValueType(0);
1659 
1660  // Here we can test the type of VT and return false when the type does not
1661  // match, but since it is done prior to that call in the current context
1662  // we turned that into an assert to avoid redundant code.
1663  assert((VT == MVT::i32 || VT == MVT::i64) &&
1664  "Type checking must have been done before calling this function");
1665 
1666  // Check for AND + SRL doing several bits extract.
1667  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
1668  return true;
1669 
1670  // We're looking for a shift of a shift.
1671  uint64_t ShlImm = 0;
1672  uint64_t TruncBits = 0;
1673  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
1674  Opd0 = N->getOperand(0).getOperand(0);
1675  } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1676  N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1677  // We are looking for a shift of truncate. Truncate from i64 to i32 could
1678  // be considered as setting high 32 bits as zero. Our strategy here is to
1679  // always generate 64bit UBFM. This consistency will help the CSE pass
1680  // later find more redundancy.
1681  Opd0 = N->getOperand(0).getOperand(0);
1682  TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1683  VT = Opd0.getValueType();
1684  assert(VT == MVT::i64 && "the promoted type should be i64");
1685  } else if (BiggerPattern) {
1686  // Let's pretend a 0 shift left has been performed.
1687  // FIXME: Currently we limit this to the bigger pattern case,
1688  // because some optimizations expect AND and not UBFM
1689  Opd0 = N->getOperand(0);
1690  } else
1691  return false;
1692 
1693  // Missing combines/constant folding may have left us with strange
1694  // constants.
1695  if (ShlImm >= VT.getSizeInBits()) {
1696  LLVM_DEBUG(
1697  (dbgs() << N
1698  << ": Found large shift immediate, this should not happen\n"));
1699  return false;
1700  }
1701 
1702  uint64_t SrlImm = 0;
1703  if (!isIntImmediate(N->getOperand(1), SrlImm))
1704  return false;
1705 
1706  assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
1707  "bad amount in shift node!");
1708  int immr = SrlImm - ShlImm;
1709  Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
1710  Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
1711  // SRA requires a signed extraction
1712  if (VT == MVT::i32)
1713  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1714  else
1715  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1716  return true;
1717 }
1718 
1719 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
1721 
1722  EVT VT = N->getValueType(0);
1723  EVT NarrowVT = N->getOperand(0)->getValueType(0);
1724  if (VT != MVT::i64 || NarrowVT != MVT::i32)
1725  return false;
1726 
1727  uint64_t ShiftImm;
1728  SDValue Op = N->getOperand(0);
1729  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1730  return false;
1731 
1732  SDLoc dl(N);
1733  // Extend the incoming operand of the shift to 64-bits.
1734  SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
1735  unsigned Immr = ShiftImm;
1736  unsigned Imms = NarrowVT.getSizeInBits() - 1;
1737  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1738  CurDAG->getTargetConstant(Imms, dl, VT)};
1739  CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
1740  return true;
1741 }
1742 
1743 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
1744  SDValue &Opd0, unsigned &Immr, unsigned &Imms,
1745  unsigned NumberOfIgnoredLowBits = 0,
1746  bool BiggerPattern = false) {
1747  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
1748  return false;
1749 
1750  switch (N->getOpcode()) {
1751  default:
1752  if (!N->isMachineOpcode())
1753  return false;
1754  break;
1755  case ISD::AND:
1756  return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
1757  NumberOfIgnoredLowBits, BiggerPattern);
1758  case ISD::SRL:
1759  case ISD::SRA:
1760  return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
1761 
1763  return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
1764  }
1765 
1766  unsigned NOpc = N->getMachineOpcode();
1767  switch (NOpc) {
1768  default:
1769  return false;
1770  case AArch64::SBFMWri:
1771  case AArch64::UBFMWri:
1772  case AArch64::SBFMXri:
1773  case AArch64::UBFMXri:
1774  Opc = NOpc;
1775  Opd0 = N->getOperand(0);
1776  Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
1777  Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
1778  return true;
1779  }
1780  // Unreachable
1781  return false;
1782 }
1783 
1784 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
1785  unsigned Opc, Immr, Imms;
1786  SDValue Opd0;
1787  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
1788  return false;
1789 
1790  EVT VT = N->getValueType(0);
1791  SDLoc dl(N);
1792 
1793  // If the bit extract operation is 64bit but the original type is 32bit, we
1794  // need to add one EXTRACT_SUBREG.
1795  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
1796  SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
1797  CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
1798 
1799  SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
1800  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1801  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
1802  MVT::i32, SDValue(BFM, 0), SubReg));
1803  return true;
1804  }
1805 
1806  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1807  CurDAG->getTargetConstant(Imms, dl, VT)};
1808  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1809  return true;
1810 }
1811 
1812 /// Does DstMask form a complementary pair with the mask provided by
1813 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
1814 /// this asks whether DstMask zeroes precisely those bits that will be set by
1815 /// the other half.
1816 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
1817  unsigned NumberOfIgnoredHighBits, EVT VT) {
1818  assert((VT == MVT::i32 || VT == MVT::i64) &&
1819  "i32 or i64 mask type expected!");
1820  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
1821 
1822  APInt SignificantDstMask = APInt(BitWidth, DstMask);
1823  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
1824 
1825  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
1826  (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
1827 }
1828 
1829 // Look for bits that will be useful for later uses.
1830 // A bit is consider useless as soon as it is dropped and never used
1831 // before it as been dropped.
1832 // E.g., looking for useful bit of x
1833 // 1. y = x & 0x7
1834 // 2. z = y >> 2
1835 // After #1, x useful bits are 0x7, then the useful bits of x, live through
1836 // y.
1837 // After #2, the useful bits of x are 0x4.
1838 // However, if x is used on an unpredicatable instruction, then all its bits
1839 // are useful.
1840 // E.g.
1841 // 1. y = x & 0x7
1842 // 2. z = y >> 2
1843 // 3. str x, [@x]
1844 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
1845 
1847  unsigned Depth) {
1848  uint64_t Imm =
1849  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1850  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
1851  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
1852  getUsefulBits(Op, UsefulBits, Depth + 1);
1853 }
1854 
1855 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
1856  uint64_t Imm, uint64_t MSB,
1857  unsigned Depth) {
1858  // inherit the bitwidth value
1859  APInt OpUsefulBits(UsefulBits);
1860  OpUsefulBits = 1;
1861 
1862  if (MSB >= Imm) {
1863  OpUsefulBits <<= MSB - Imm + 1;
1864  --OpUsefulBits;
1865  // The interesting part will be in the lower part of the result
1866  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1867  // The interesting part was starting at Imm in the argument
1868  OpUsefulBits <<= Imm;
1869  } else {
1870  OpUsefulBits <<= MSB + 1;
1871  --OpUsefulBits;
1872  // The interesting part will be shifted in the result
1873  OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
1874  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1875  // The interesting part was at zero in the argument
1876  OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
1877  }
1878 
1879  UsefulBits &= OpUsefulBits;
1880 }
1881 
1882 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
1883  unsigned Depth) {
1884  uint64_t Imm =
1885  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1886  uint64_t MSB =
1887  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1888 
1889  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1890 }
1891 
1893  unsigned Depth) {
1894  uint64_t ShiftTypeAndValue =
1895  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1896  APInt Mask(UsefulBits);
1897  Mask.clearAllBits();
1898  Mask.flipAllBits();
1899 
1900  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
1901  // Shift Left
1902  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1903  Mask <<= ShiftAmt;
1904  getUsefulBits(Op, Mask, Depth + 1);
1905  Mask.lshrInPlace(ShiftAmt);
1906  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
1907  // Shift Right
1908  // We do not handle AArch64_AM::ASR, because the sign will change the
1909  // number of useful bits
1910  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1911  Mask.lshrInPlace(ShiftAmt);
1912  getUsefulBits(Op, Mask, Depth + 1);
1913  Mask <<= ShiftAmt;
1914  } else
1915  return;
1916 
1917  UsefulBits &= Mask;
1918 }
1919 
1920 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
1921  unsigned Depth) {
1922  uint64_t Imm =
1923  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1924  uint64_t MSB =
1925  cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
1926 
1927  APInt OpUsefulBits(UsefulBits);
1928  OpUsefulBits = 1;
1929 
1930  APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
1931  ResultUsefulBits.flipAllBits();
1932  APInt Mask(UsefulBits.getBitWidth(), 0);
1933 
1934  getUsefulBits(Op, ResultUsefulBits, Depth + 1);
1935 
1936  if (MSB >= Imm) {
1937  // The instruction is a BFXIL.
1938  uint64_t Width = MSB - Imm + 1;
1939  uint64_t LSB = Imm;
1940 
1941  OpUsefulBits <<= Width;
1942  --OpUsefulBits;
1943 
1944  if (Op.getOperand(1) == Orig) {
1945  // Copy the low bits from the result to bits starting from LSB.
1946  Mask = ResultUsefulBits & OpUsefulBits;
1947  Mask <<= LSB;
1948  }
1949 
1950  if (Op.getOperand(0) == Orig)
1951  // Bits starting from LSB in the input contribute to the result.
1952  Mask |= (ResultUsefulBits & ~OpUsefulBits);
1953  } else {
1954  // The instruction is a BFI.
1955  uint64_t Width = MSB + 1;
1956  uint64_t LSB = UsefulBits.getBitWidth() - Imm;
1957 
1958  OpUsefulBits <<= Width;
1959  --OpUsefulBits;
1960  OpUsefulBits <<= LSB;
1961 
1962  if (Op.getOperand(1) == Orig) {
1963  // Copy the bits from the result to the zero bits.
1964  Mask = ResultUsefulBits & OpUsefulBits;
1965  Mask.lshrInPlace(LSB);
1966  }
1967 
1968  if (Op.getOperand(0) == Orig)
1969  Mask |= (ResultUsefulBits & ~OpUsefulBits);
1970  }
1971 
1972  UsefulBits &= Mask;
1973 }
1974 
1975 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
1976  SDValue Orig, unsigned Depth) {
1977 
1978  // Users of this node should have already been instruction selected
1979  // FIXME: Can we turn that into an assert?
1980  if (!UserNode->isMachineOpcode())
1981  return;
1982 
1983  switch (UserNode->getMachineOpcode()) {
1984  default:
1985  return;
1986  case AArch64::ANDSWri:
1987  case AArch64::ANDSXri:
1988  case AArch64::ANDWri:
1989  case AArch64::ANDXri:
1990  // We increment Depth only when we call the getUsefulBits
1991  return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
1992  Depth);
1993  case AArch64::UBFMWri:
1994  case AArch64::UBFMXri:
1995  return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
1996 
1997  case AArch64::ORRWrs:
1998  case AArch64::ORRXrs:
1999  if (UserNode->getOperand(1) != Orig)
2000  return;
2001  return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2002  Depth);
2003  case AArch64::BFMWri:
2004  case AArch64::BFMXri:
2005  return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2006 
2007  case AArch64::STRBBui:
2008  case AArch64::STURBBi:
2009  if (UserNode->getOperand(0) != Orig)
2010  return;
2011  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2012  return;
2013 
2014  case AArch64::STRHHui:
2015  case AArch64::STURHHi:
2016  if (UserNode->getOperand(0) != Orig)
2017  return;
2018  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2019  return;
2020  }
2021 }
2022 
2023 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2024  if (Depth >= 6)
2025  return;
2026  // Initialize UsefulBits
2027  if (!Depth) {
2028  unsigned Bitwidth = Op.getScalarValueSizeInBits();
2029  // At the beginning, assume every produced bits is useful
2030  UsefulBits = APInt(Bitwidth, 0);
2031  UsefulBits.flipAllBits();
2032  }
2033  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2034 
2035  for (SDNode *Node : Op.getNode()->uses()) {
2036  // A use cannot produce useful bits
2037  APInt UsefulBitsForUse = APInt(UsefulBits);
2038  getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2039  UsersUsefulBits |= UsefulBitsForUse;
2040  }
2041  // UsefulBits contains the produced bits that are meaningful for the
2042  // current definition, thus a user cannot make a bit meaningful at
2043  // this point
2044  UsefulBits &= UsersUsefulBits;
2045 }
2046 
2047 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
2048 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2049 /// 0, return Op unchanged.
2050 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2051  if (ShlAmount == 0)
2052  return Op;
2053 
2054  EVT VT = Op.getValueType();
2055  SDLoc dl(Op);
2056  unsigned BitWidth = VT.getSizeInBits();
2057  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2058 
2059  SDNode *ShiftNode;
2060  if (ShlAmount > 0) {
2061  // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2062  ShiftNode = CurDAG->getMachineNode(
2063  UBFMOpc, dl, VT, Op,
2064  CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2065  CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2066  } else {
2067  // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2068  assert(ShlAmount < 0 && "expected right shift");
2069  int ShrAmount = -ShlAmount;
2070  ShiftNode = CurDAG->getMachineNode(
2071  UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2072  CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2073  }
2074 
2075  return SDValue(ShiftNode, 0);
2076 }
2077 
2078 /// Does this tree qualify as an attempt to move a bitfield into position,
2079 /// essentially "(and (shl VAL, N), Mask)".
2081  bool BiggerPattern,
2082  SDValue &Src, int &ShiftAmount,
2083  int &MaskWidth) {
2084  EVT VT = Op.getValueType();
2085  unsigned BitWidth = VT.getSizeInBits();
2086  (void)BitWidth;
2087  assert(BitWidth == 32 || BitWidth == 64);
2088 
2089  KnownBits Known = CurDAG->computeKnownBits(Op);
2090 
2091  // Non-zero in the sense that they're not provably zero, which is the key
2092  // point if we want to use this value
2093  uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2094 
2095  // Discard a constant AND mask if present. It's safe because the node will
2096  // already have been factored into the computeKnownBits calculation above.
2097  uint64_t AndImm;
2098  if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
2099  assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
2100  Op = Op.getOperand(0);
2101  }
2102 
2103  // Don't match if the SHL has more than one use, since then we'll end up
2104  // generating SHL+UBFIZ instead of just keeping SHL+AND.
2105  if (!BiggerPattern && !Op.hasOneUse())
2106  return false;
2107 
2108  uint64_t ShlImm;
2109  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2110  return false;
2111  Op = Op.getOperand(0);
2112 
2113  if (!isShiftedMask_64(NonZeroBits))
2114  return false;
2115 
2116  ShiftAmount = countTrailingZeros(NonZeroBits);
2117  MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
2118 
2119  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2120  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2121  // amount. BiggerPattern is true when this pattern is being matched for BFI,
2122  // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2123  // which case it is not profitable to insert an extra shift.
2124  if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
2125  return false;
2126  Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
2127 
2128  return true;
2129 }
2130 
2131 static bool isShiftedMask(uint64_t Mask, EVT VT) {
2132  assert(VT == MVT::i32 || VT == MVT::i64);
2133  if (VT == MVT::i32)
2134  return isShiftedMask_32(Mask);
2135  return isShiftedMask_64(Mask);
2136 }
2137 
2138 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
2139 // inserted only sets known zero bits.
2141  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2142 
2143  EVT VT = N->getValueType(0);
2144  if (VT != MVT::i32 && VT != MVT::i64)
2145  return false;
2146 
2147  unsigned BitWidth = VT.getSizeInBits();
2148 
2149  uint64_t OrImm;
2150  if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
2151  return false;
2152 
2153  // Skip this transformation if the ORR immediate can be encoded in the ORR.
2154  // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2155  // performance neutral.
2156  if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
2157  return false;
2158 
2159  uint64_t MaskImm;
2160  SDValue And = N->getOperand(0);
2161  // Must be a single use AND with an immediate operand.
2162  if (!And.hasOneUse() ||
2163  !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
2164  return false;
2165 
2166  // Compute the Known Zero for the AND as this allows us to catch more general
2167  // cases than just looking for AND with imm.
2168  KnownBits Known = CurDAG->computeKnownBits(And);
2169 
2170  // Non-zero in the sense that they're not provably zero, which is the key
2171  // point if we want to use this value.
2172  uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
2173 
2174  // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2175  if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
2176  return false;
2177 
2178  // The bits being inserted must only set those bits that are known to be zero.
2179  if ((OrImm & NotKnownZero) != 0) {
2180  // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2181  // currently handle this case.
2182  return false;
2183  }
2184 
2185  // BFI/BFXIL dst, src, #lsb, #width.
2186  int LSB = countTrailingOnes(NotKnownZero);
2187  int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2188 
2189  // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2190  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2191  unsigned ImmS = Width - 1;
2192 
2193  // If we're creating a BFI instruction avoid cases where we need more
2194  // instructions to materialize the BFI constant as compared to the original
2195  // ORR. A BFXIL will use the same constant as the original ORR, so the code
2196  // should be no worse in this case.
2197  bool IsBFI = LSB != 0;
2198  uint64_t BFIImm = OrImm >> LSB;
2199  if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
2200  // We have a BFI instruction and we know the constant can't be materialized
2201  // with a ORR-immediate with the zero register.
2202  unsigned OrChunks = 0, BFIChunks = 0;
2203  for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
2204  if (((OrImm >> Shift) & 0xFFFF) != 0)
2205  ++OrChunks;
2206  if (((BFIImm >> Shift) & 0xFFFF) != 0)
2207  ++BFIChunks;
2208  }
2209  if (BFIChunks > OrChunks)
2210  return false;
2211  }
2212 
2213  // Materialize the constant to be inserted.
2214  SDLoc DL(N);
2215  unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2216  SDNode *MOVI = CurDAG->getMachineNode(
2217  MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2218 
2219  // Create the BFI/BFXIL instruction.
2220  SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2221  CurDAG->getTargetConstant(ImmR, DL, VT),
2222  CurDAG->getTargetConstant(ImmS, DL, VT)};
2223  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2224  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2225  return true;
2226 }
2227 
2228 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
2229  SelectionDAG *CurDAG) {
2230  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2231 
2232  EVT VT = N->getValueType(0);
2233  if (VT != MVT::i32 && VT != MVT::i64)
2234  return false;
2235 
2236  unsigned BitWidth = VT.getSizeInBits();
2237 
2238  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
2239  // have the expected shape. Try to undo that.
2240 
2241  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
2242  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
2243 
2244  // Given a OR operation, check if we have the following pattern
2245  // ubfm c, b, imm, imm2 (or something that does the same jobs, see
2246  // isBitfieldExtractOp)
2247  // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
2248  // countTrailingZeros(mask2) == imm2 - imm + 1
2249  // f = d | c
2250  // if yes, replace the OR instruction with:
2251  // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
2252 
2253  // OR is commutative, check all combinations of operand order and values of
2254  // BiggerPattern, i.e.
2255  // Opd0, Opd1, BiggerPattern=false
2256  // Opd1, Opd0, BiggerPattern=false
2257  // Opd0, Opd1, BiggerPattern=true
2258  // Opd1, Opd0, BiggerPattern=true
2259  // Several of these combinations may match, so check with BiggerPattern=false
2260  // first since that will produce better results by matching more instructions
2261  // and/or inserting fewer extra instructions.
2262  for (int I = 0; I < 4; ++I) {
2263 
2264  SDValue Dst, Src;
2265  unsigned ImmR, ImmS;
2266  bool BiggerPattern = I / 2;
2267  SDValue OrOpd0Val = N->getOperand(I % 2);
2268  SDNode *OrOpd0 = OrOpd0Val.getNode();
2269  SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
2270  SDNode *OrOpd1 = OrOpd1Val.getNode();
2271 
2272  unsigned BFXOpc;
2273  int DstLSB, Width;
2274  if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
2275  NumberOfIgnoredLowBits, BiggerPattern)) {
2276  // Check that the returned opcode is compatible with the pattern,
2277  // i.e., same type and zero extended (U and not S)
2278  if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
2279  (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
2280  continue;
2281 
2282  // Compute the width of the bitfield insertion
2283  DstLSB = 0;
2284  Width = ImmS - ImmR + 1;
2285  // FIXME: This constraint is to catch bitfield insertion we may
2286  // want to widen the pattern if we want to grab general bitfied
2287  // move case
2288  if (Width <= 0)
2289  continue;
2290 
2291  // If the mask on the insertee is correct, we have a BFXIL operation. We
2292  // can share the ImmR and ImmS values from the already-computed UBFM.
2293  } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
2294  BiggerPattern,
2295  Src, DstLSB, Width)) {
2296  ImmR = (BitWidth - DstLSB) % BitWidth;
2297  ImmS = Width - 1;
2298  } else
2299  continue;
2300 
2301  // Check the second part of the pattern
2302  EVT VT = OrOpd1Val.getValueType();
2303  assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
2304 
2305  // Compute the Known Zero for the candidate of the first operand.
2306  // This allows to catch more general case than just looking for
2307  // AND with imm. Indeed, simplify-demanded-bits may have removed
2308  // the AND instruction because it proves it was useless.
2309  KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
2310 
2311  // Check if there is enough room for the second operand to appear
2312  // in the first one
2313  APInt BitsToBeInserted =
2314  APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
2315 
2316  if ((BitsToBeInserted & ~Known.Zero) != 0)
2317  continue;
2318 
2319  // Set the first operand
2320  uint64_t Imm;
2321  if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
2322  isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
2323  // In that case, we can eliminate the AND
2324  Dst = OrOpd1->getOperand(0);
2325  else
2326  // Maybe the AND has been removed by simplify-demanded-bits
2327  // or is useful because it discards more bits
2328  Dst = OrOpd1Val;
2329 
2330  // both parts match
2331  SDLoc DL(N);
2332  SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
2333  CurDAG->getTargetConstant(ImmS, DL, VT)};
2334  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2335  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2336  return true;
2337  }
2338 
2339  // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
2340  // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
2341  // mask (e.g., 0x000ffff0).
2342  uint64_t Mask0Imm, Mask1Imm;
2343  SDValue And0 = N->getOperand(0);
2344  SDValue And1 = N->getOperand(1);
2345  if (And0.hasOneUse() && And1.hasOneUse() &&
2346  isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
2347  isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
2348  APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
2349  (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
2350 
2351  // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
2352  // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
2353  // bits to be inserted.
2354  if (isShiftedMask(Mask0Imm, VT)) {
2355  std::swap(And0, And1);
2356  std::swap(Mask0Imm, Mask1Imm);
2357  }
2358 
2359  SDValue Src = And1->getOperand(0);
2360  SDValue Dst = And0->getOperand(0);
2361  unsigned LSB = countTrailingZeros(Mask1Imm);
2362  int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
2363 
2364  // The BFXIL inserts the low-order bits from a source register, so right
2365  // shift the needed bits into place.
2366  SDLoc DL(N);
2367  unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2368  SDNode *LSR = CurDAG->getMachineNode(
2369  ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
2370  CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
2371 
2372  // BFXIL is an alias of BFM, so translate to BFM operands.
2373  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2374  unsigned ImmS = Width - 1;
2375 
2376  // Create the BFXIL instruction.
2377  SDValue Ops[] = {Dst, SDValue(LSR, 0),
2378  CurDAG->getTargetConstant(ImmR, DL, VT),
2379  CurDAG->getTargetConstant(ImmS, DL, VT)};
2380  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2381  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2382  return true;
2383  }
2384 
2385  return false;
2386 }
2387 
2388 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
2389  if (N->getOpcode() != ISD::OR)
2390  return false;
2391 
2392  APInt NUsefulBits;
2393  getUsefulBits(SDValue(N, 0), NUsefulBits);
2394 
2395  // If all bits are not useful, just return UNDEF.
2396  if (!NUsefulBits) {
2397  CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
2398  return true;
2399  }
2400 
2401  if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
2402  return true;
2403 
2404  return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
2405 }
2406 
2407 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
2408 /// equivalent of a left shift by a constant amount followed by an and masking
2409 /// out a contiguous set of bits.
2410 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
2411  if (N->getOpcode() != ISD::AND)
2412  return false;
2413 
2414  EVT VT = N->getValueType(0);
2415  if (VT != MVT::i32 && VT != MVT::i64)
2416  return false;
2417 
2418  SDValue Op0;
2419  int DstLSB, Width;
2420  if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
2421  Op0, DstLSB, Width))
2422  return false;
2423 
2424  // ImmR is the rotate right amount.
2425  unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
2426  // ImmS is the most significant bit of the source to be moved.
2427  unsigned ImmS = Width - 1;
2428 
2429  SDLoc DL(N);
2430  SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
2431  CurDAG->getTargetConstant(ImmS, DL, VT)};
2432  unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2433  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2434  return true;
2435 }
2436 
2437 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
2438 /// variable shift/rotate instructions.
2439 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
2440  EVT VT = N->getValueType(0);
2441 
2442  unsigned Opc;
2443  switch (N->getOpcode()) {
2444  case ISD::ROTR:
2445  Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
2446  break;
2447  case ISD::SHL:
2448  Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
2449  break;
2450  case ISD::SRL:
2451  Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
2452  break;
2453  case ISD::SRA:
2454  Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
2455  break;
2456  default:
2457  return false;
2458  }
2459 
2460  uint64_t Size;
2461  uint64_t Bits;
2462  if (VT == MVT::i32) {
2463  Bits = 5;
2464  Size = 32;
2465  } else if (VT == MVT::i64) {
2466  Bits = 6;
2467  Size = 64;
2468  } else
2469  return false;
2470 
2471  SDValue ShiftAmt = N->getOperand(1);
2472  SDLoc DL(N);
2473  SDValue NewShiftAmt;
2474 
2475  // Skip over an extend of the shift amount.
2476  if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
2477  ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
2478  ShiftAmt = ShiftAmt->getOperand(0);
2479 
2480  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
2481  SDValue Add0 = ShiftAmt->getOperand(0);
2482  SDValue Add1 = ShiftAmt->getOperand(1);
2483  uint64_t Add0Imm;
2484  uint64_t Add1Imm;
2485  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
2486  // to avoid the ADD/SUB.
2487  if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0))
2488  NewShiftAmt = Add0;
2489  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2490  // generate a NEG instead of a SUB of a constant.
2491  else if (ShiftAmt->getOpcode() == ISD::SUB &&
2492  isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
2493  (Add0Imm % Size == 0)) {
2494  unsigned NegOpc;
2495  unsigned ZeroReg;
2496  EVT SubVT = ShiftAmt->getValueType(0);
2497  if (SubVT == MVT::i32) {
2498  NegOpc = AArch64::SUBWrr;
2499  ZeroReg = AArch64::WZR;
2500  } else {
2501  assert(SubVT == MVT::i64);
2502  NegOpc = AArch64::SUBXrr;
2503  ZeroReg = AArch64::XZR;
2504  }
2505  SDValue Zero =
2506  CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
2507  MachineSDNode *Neg =
2508  CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
2509  NewShiftAmt = SDValue(Neg, 0);
2510  } else
2511  return false;
2512  } else {
2513  // If the shift amount is masked with an AND, check that the mask covers the
2514  // bits that are implicitly ANDed off by the above opcodes and if so, skip
2515  // the AND.
2516  uint64_t MaskImm;
2517  if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm))
2518  return false;
2519 
2520  if (countTrailingOnes(MaskImm) < Bits)
2521  return false;
2522 
2523  NewShiftAmt = ShiftAmt->getOperand(0);
2524  }
2525 
2526  // Narrow/widen the shift amount to match the size of the shift operation.
2527  if (VT == MVT::i32)
2528  NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
2529  else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
2530  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
2531  MachineSDNode *Ext = CurDAG->getMachineNode(
2532  AArch64::SUBREG_TO_REG, DL, VT,
2533  CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
2534  NewShiftAmt = SDValue(Ext, 0);
2535  }
2536 
2537  SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
2538  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2539  return true;
2540 }
2541 
2542 bool
2543 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2544  unsigned RegWidth) {
2545  APFloat FVal(0.0);
2546  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2547  FVal = CN->getValueAPF();
2548  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2549  // Some otherwise illegal constants are allowed in this case.
2550  if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2551  !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2552  return false;
2553 
2554  ConstantPoolSDNode *CN =
2555  dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2556  FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2557  } else
2558  return false;
2559 
2560  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2561  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2562  // x-register.
2563  //
2564  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2565  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2566  // integers.
2567  bool IsExact;
2568 
2569  // fbits is between 1 and 64 in the worst-case, which means the fmul
2570  // could have 2^64 as an actual operand. Need 65 bits of precision.
2571  APSInt IntVal(65, true);
2572  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2573 
2574  // N.b. isPowerOf2 also checks for > 0.
2575  if (!IsExact || !IntVal.isPowerOf2()) return false;
2576  unsigned FBits = IntVal.logBase2();
2577 
2578  // Checks above should have guaranteed that we haven't lost information in
2579  // finding FBits, but it must still be in range.
2580  if (FBits == 0 || FBits > RegWidth) return false;
2581 
2582  FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
2583  return true;
2584 }
2585 
2586 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
2587 // of the string and obtains the integer values from them and combines these
2588 // into a single value to be used in the MRS/MSR instruction.
2591  RegString.split(Fields, ':');
2592 
2593  if (Fields.size() == 1)
2594  return -1;
2595 
2596  assert(Fields.size() == 5
2597  && "Invalid number of fields in read register string");
2598 
2599  SmallVector<int, 5> Ops;
2600  bool AllIntFields = true;
2601 
2602  for (StringRef Field : Fields) {
2603  unsigned IntField;
2604  AllIntFields &= !Field.getAsInteger(10, IntField);
2605  Ops.push_back(IntField);
2606  }
2607 
2608  assert(AllIntFields &&
2609  "Unexpected non-integer value in special register string.");
2610 
2611  // Need to combine the integer fields of the string into a single value
2612  // based on the bit encoding of MRS/MSR instruction.
2613  return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2614  (Ops[3] << 3) | (Ops[4]);
2615 }
2616 
2617 // Lower the read_register intrinsic to an MRS instruction node if the special
2618 // register string argument is either of the form detailed in the ALCE (the
2619 // form described in getIntOperandsFromRegsterString) or is a named register
2620 // known by the MRS SysReg mapper.
2621 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
2622  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2623  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2624  SDLoc DL(N);
2625 
2626  int Reg = getIntOperandFromRegisterString(RegString->getString());
2627  if (Reg != -1) {
2628  ReplaceNode(N, CurDAG->getMachineNode(
2629  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2630  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2631  N->getOperand(0)));
2632  return true;
2633  }
2634 
2635  // Use the sysreg mapper to map the remaining possible strings to the
2636  // value for the register to be used for the instruction operand.
2637  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2638  if (TheReg && TheReg->Readable &&
2639  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2640  Reg = TheReg->Encoding;
2641  else
2642  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2643 
2644  if (Reg != -1) {
2645  ReplaceNode(N, CurDAG->getMachineNode(
2646  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2647  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2648  N->getOperand(0)));
2649  return true;
2650  }
2651 
2652  return false;
2653 }
2654 
2655 // Lower the write_register intrinsic to an MSR instruction node if the special
2656 // register string argument is either of the form detailed in the ALCE (the
2657 // form described in getIntOperandsFromRegsterString) or is a named register
2658 // known by the MSR SysReg mapper.
2659 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
2660  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2661  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2662  SDLoc DL(N);
2663 
2664  int Reg = getIntOperandFromRegisterString(RegString->getString());
2665  if (Reg != -1) {
2666  ReplaceNode(
2667  N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2668  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2669  N->getOperand(2), N->getOperand(0)));
2670  return true;
2671  }
2672 
2673  // Check if the register was one of those allowed as the pstatefield value in
2674  // the MSR (immediate) instruction. To accept the values allowed in the
2675  // pstatefield for the MSR (immediate) instruction, we also require that an
2676  // immediate value has been provided as an argument, we know that this is
2677  // the case as it has been ensured by semantic checking.
2678  auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
2679  if (PMapper) {
2680  assert (isa<ConstantSDNode>(N->getOperand(2))
2681  && "Expected a constant integer expression.");
2682  unsigned Reg = PMapper->Encoding;
2683  uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
2684  unsigned State;
2685  if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) {
2686  assert(Immed < 2 && "Bad imm");
2687  State = AArch64::MSRpstateImm1;
2688  } else {
2689  assert(Immed < 16 && "Bad imm");
2690  State = AArch64::MSRpstateImm4;
2691  }
2692  ReplaceNode(N, CurDAG->getMachineNode(
2693  State, DL, MVT::Other,
2694  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2695  CurDAG->getTargetConstant(Immed, DL, MVT::i16),
2696  N->getOperand(0)));
2697  return true;
2698  }
2699 
2700  // Use the sysreg mapper to attempt to map the remaining possible strings
2701  // to the value for the register to be used for the MSR (register)
2702  // instruction operand.
2703  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2704  if (TheReg && TheReg->Writeable &&
2705  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2706  Reg = TheReg->Encoding;
2707  else
2708  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2709  if (Reg != -1) {
2710  ReplaceNode(N, CurDAG->getMachineNode(
2711  AArch64::MSR, DL, MVT::Other,
2712  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2713  N->getOperand(2), N->getOperand(0)));
2714  return true;
2715  }
2716 
2717  return false;
2718 }
2719 
2720 /// We've got special pseudo-instructions for these
2721 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2722  unsigned Opcode;
2723  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2724 
2725  // Leave IR for LSE if subtarget supports it.
2726  if (Subtarget->hasLSE()) return false;
2727 
2728  if (MemTy == MVT::i8)
2729  Opcode = AArch64::CMP_SWAP_8;
2730  else if (MemTy == MVT::i16)
2731  Opcode = AArch64::CMP_SWAP_16;
2732  else if (MemTy == MVT::i32)
2733  Opcode = AArch64::CMP_SWAP_32;
2734  else if (MemTy == MVT::i64)
2735  Opcode = AArch64::CMP_SWAP_64;
2736  else
2737  llvm_unreachable("Unknown AtomicCmpSwap type");
2738 
2739  MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
2740  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2741  N->getOperand(0)};
2742  SDNode *CmpSwap = CurDAG->getMachineNode(
2743  Opcode, SDLoc(N),
2744  CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
2745 
2746  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2747  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2748 
2749  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2750  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2751  CurDAG->RemoveDeadNode(N);
2752 
2753  return true;
2754 }
2755 
2756 void AArch64DAGToDAGISel::Select(SDNode *Node) {
2757  // If we have a custom node, we already have selected!
2758  if (Node->isMachineOpcode()) {
2759  LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
2760  Node->setNodeId(-1);
2761  return;
2762  }
2763 
2764  // Few custom selection stuff.
2765  EVT VT = Node->getValueType(0);
2766 
2767  switch (Node->getOpcode()) {
2768  default:
2769  break;
2770 
2771  case ISD::ATOMIC_CMP_SWAP:
2772  if (SelectCMP_SWAP(Node))
2773  return;
2774  break;
2775 
2776  case ISD::READ_REGISTER:
2777  if (tryReadRegister(Node))
2778  return;
2779  break;
2780 
2781  case ISD::WRITE_REGISTER:
2782  if (tryWriteRegister(Node))
2783  return;
2784  break;
2785 
2786  case ISD::ADD:
2787  if (tryMLAV64LaneV128(Node))
2788  return;
2789  break;
2790 
2791  case ISD::LOAD: {
2792  // Try to select as an indexed load. Fall through to normal processing
2793  // if we can't.
2794  if (tryIndexedLoad(Node))
2795  return;
2796  break;
2797  }
2798 
2799  case ISD::SRL:
2800  case ISD::AND:
2801  case ISD::SRA:
2803  if (tryBitfieldExtractOp(Node))
2804  return;
2805  if (tryBitfieldInsertInZeroOp(Node))
2806  return;
2808  case ISD::ROTR:
2809  case ISD::SHL:
2810  if (tryShiftAmountMod(Node))
2811  return;
2812  break;
2813 
2814  case ISD::SIGN_EXTEND:
2815  if (tryBitfieldExtractOpFromSExt(Node))
2816  return;
2817  break;
2818 
2819  case ISD::OR:
2820  if (tryBitfieldInsertOp(Node))
2821  return;
2822  break;
2823 
2824  case ISD::EXTRACT_VECTOR_ELT: {
2825  // Extracting lane zero is a special case where we can just use a plain
2826  // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
2827  // the rest of the compiler, especially the register allocator and copyi
2828  // propagation, to reason about, so is preferred when it's possible to
2829  // use it.
2830  ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
2831  // Bail and use the default Select() for non-zero lanes.
2832  if (LaneNode->getZExtValue() != 0)
2833  break;
2834  // If the element type is not the same as the result type, likewise
2835  // bail and use the default Select(), as there's more to do than just
2836  // a cross-class COPY. This catches extracts of i8 and i16 elements
2837  // since they will need an explicit zext.
2838  if (VT != Node->getOperand(0).getValueType().getVectorElementType())
2839  break;
2840  unsigned SubReg;
2841  switch (Node->getOperand(0)
2842  .getValueType()
2844  .getSizeInBits()) {
2845  default:
2846  llvm_unreachable("Unexpected vector element type!");
2847  case 64:
2848  SubReg = AArch64::dsub;
2849  break;
2850  case 32:
2851  SubReg = AArch64::ssub;
2852  break;
2853  case 16:
2854  SubReg = AArch64::hsub;
2855  break;
2856  case 8:
2857  llvm_unreachable("unexpected zext-requiring extract element!");
2858  }
2859  SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
2860  Node->getOperand(0));
2861  LLVM_DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
2862  LLVM_DEBUG(Extract->dumpr(CurDAG));
2863  LLVM_DEBUG(dbgs() << "\n");
2864  ReplaceNode(Node, Extract.getNode());
2865  return;
2866  }
2867  case ISD::Constant: {
2868  // Materialize zero constants as copies from WZR/XZR. This allows
2869  // the coalescer to propagate these into other instructions.
2870  ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
2871  if (ConstNode->isNullValue()) {
2872  if (VT == MVT::i32) {
2873  SDValue New = CurDAG->getCopyFromReg(
2874  CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
2875  ReplaceNode(Node, New.getNode());
2876  return;
2877  } else if (VT == MVT::i64) {
2878  SDValue New = CurDAG->getCopyFromReg(
2879  CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
2880  ReplaceNode(Node, New.getNode());
2881  return;
2882  }
2883  }
2884  break;
2885  }
2886 
2887  case ISD::FrameIndex: {
2888  // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
2889  int FI = cast<FrameIndexSDNode>(Node)->getIndex();
2890  unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
2891  const TargetLowering *TLI = getTargetLowering();
2892  SDValue TFI = CurDAG->getTargetFrameIndex(
2893  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2894  SDLoc DL(Node);
2895  SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
2896  CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
2897  CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
2898  return;
2899  }
2900  case ISD::INTRINSIC_W_CHAIN: {
2901  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2902  switch (IntNo) {
2903  default:
2904  break;
2905  case Intrinsic::aarch64_ldaxp:
2906  case Intrinsic::aarch64_ldxp: {
2907  unsigned Op =
2908  IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
2909  SDValue MemAddr = Node->getOperand(2);
2910  SDLoc DL(Node);
2911  SDValue Chain = Node->getOperand(0);
2912 
2913  SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
2914  MVT::Other, MemAddr, Chain);
2915 
2916  // Transfer memoperands.
2917  MachineMemOperand *MemOp =
2918  cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2919  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
2920  ReplaceNode(Node, Ld);
2921  return;
2922  }
2923  case Intrinsic::aarch64_stlxp:
2924  case Intrinsic::aarch64_stxp: {
2925  unsigned Op =
2926  IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
2927  SDLoc DL(Node);
2928  SDValue Chain = Node->getOperand(0);
2929  SDValue ValLo = Node->getOperand(2);
2930  SDValue ValHi = Node->getOperand(3);
2931  SDValue MemAddr = Node->getOperand(4);
2932 
2933  // Place arguments in the right order.
2934  SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
2935 
2936  SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
2937  // Transfer memoperands.
2938  MachineMemOperand *MemOp =
2939  cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2940  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2941 
2942  ReplaceNode(Node, St);
2943  return;
2944  }
2945  case Intrinsic::aarch64_neon_ld1x2:
2946  if (VT == MVT::v8i8) {
2947  SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
2948  return;
2949  } else if (VT == MVT::v16i8) {
2950  SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
2951  return;
2952  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2953  SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
2954  return;
2955  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2956  SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
2957  return;
2958  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2959  SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
2960  return;
2961  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2962  SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
2963  return;
2964  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2965  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2966  return;
2967  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2968  SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
2969  return;
2970  }
2971  break;
2972  case Intrinsic::aarch64_neon_ld1x3:
2973  if (VT == MVT::v8i8) {
2974  SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
2975  return;
2976  } else if (VT == MVT::v16i8) {
2977  SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
2978  return;
2979  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2980  SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
2981  return;
2982  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2983  SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
2984  return;
2985  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2986  SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
2987  return;
2988  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2989  SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
2990  return;
2991  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2992  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
2993  return;
2994  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2995  SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
2996  return;
2997  }
2998  break;
2999  case Intrinsic::aarch64_neon_ld1x4:
3000  if (VT == MVT::v8i8) {
3001  SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
3002  return;
3003  } else if (VT == MVT::v16i8) {
3004  SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
3005  return;
3006  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3007  SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
3008  return;
3009  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3010  SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
3011  return;
3012  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3013  SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
3014  return;
3015  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3016  SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
3017  return;
3018  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3019  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3020  return;
3021  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3022  SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
3023  return;
3024  }
3025  break;
3026  case Intrinsic::aarch64_neon_ld2:
3027  if (VT == MVT::v8i8) {
3028  SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
3029  return;
3030  } else if (VT == MVT::v16i8) {
3031  SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
3032  return;
3033  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3034  SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
3035  return;
3036  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3037  SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
3038  return;
3039  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3040  SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
3041  return;
3042  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3043  SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
3044  return;
3045  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3046  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3047  return;
3048  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3049  SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
3050  return;
3051  }
3052  break;
3053  case Intrinsic::aarch64_neon_ld3:
3054  if (VT == MVT::v8i8) {
3055  SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
3056  return;
3057  } else if (VT == MVT::v16i8) {
3058  SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
3059  return;
3060  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3061  SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
3062  return;
3063  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3064  SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
3065  return;
3066  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3067  SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
3068  return;
3069  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3070  SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
3071  return;
3072  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3073  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3074  return;
3075  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3076  SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
3077  return;
3078  }
3079  break;
3080  case Intrinsic::aarch64_neon_ld4:
3081  if (VT == MVT::v8i8) {
3082  SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
3083  return;
3084  } else if (VT == MVT::v16i8) {
3085  SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
3086  return;
3087  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3088  SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
3089  return;
3090  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3091  SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
3092  return;
3093  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3094  SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
3095  return;
3096  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3097  SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
3098  return;
3099  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3100  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3101  return;
3102  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3103  SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
3104  return;
3105  }
3106  break;
3107  case Intrinsic::aarch64_neon_ld2r:
3108  if (VT == MVT::v8i8) {
3109  SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
3110  return;
3111  } else if (VT == MVT::v16i8) {
3112  SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
3113  return;
3114  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3115  SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
3116  return;
3117  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3118  SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
3119  return;
3120  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3121  SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
3122  return;
3123  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3124  SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
3125  return;
3126  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3127  SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
3128  return;
3129  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3130  SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
3131  return;
3132  }
3133  break;
3134  case Intrinsic::aarch64_neon_ld3r:
3135  if (VT == MVT::v8i8) {
3136  SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
3137  return;
3138  } else if (VT == MVT::v16i8) {
3139  SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
3140  return;
3141  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3142  SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
3143  return;
3144  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3145  SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
3146  return;
3147  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3148  SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
3149  return;
3150  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3151  SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
3152  return;
3153  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3154  SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
3155  return;
3156  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3157  SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
3158  return;
3159  }
3160  break;
3161  case Intrinsic::aarch64_neon_ld4r:
3162  if (VT == MVT::v8i8) {
3163  SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
3164  return;
3165  } else if (VT == MVT::v16i8) {
3166  SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
3167  return;
3168  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3169  SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
3170  return;
3171  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3172  SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
3173  return;
3174  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3175  SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
3176  return;
3177  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3178  SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
3179  return;
3180  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3181  SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
3182  return;
3183  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3184  SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
3185  return;
3186  }
3187  break;
3188  case Intrinsic::aarch64_neon_ld2lane:
3189  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3190  SelectLoadLane(Node, 2, AArch64::LD2i8);
3191  return;
3192  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3193  VT == MVT::v8f16) {
3194  SelectLoadLane(Node, 2, AArch64::LD2i16);
3195  return;
3196  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3197  VT == MVT::v2f32) {
3198  SelectLoadLane(Node, 2, AArch64::LD2i32);
3199  return;
3200  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3201  VT == MVT::v1f64) {
3202  SelectLoadLane(Node, 2, AArch64::LD2i64);
3203  return;
3204  }
3205  break;
3206  case Intrinsic::aarch64_neon_ld3lane:
3207  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3208  SelectLoadLane(Node, 3, AArch64::LD3i8);
3209  return;
3210  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3211  VT == MVT::v8f16) {
3212  SelectLoadLane(Node, 3, AArch64::LD3i16);
3213  return;
3214  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3215  VT == MVT::v2f32) {
3216  SelectLoadLane(Node, 3, AArch64::LD3i32);
3217  return;
3218  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3219  VT == MVT::v1f64) {
3220  SelectLoadLane(Node, 3, AArch64::LD3i64);
3221  return;
3222  }
3223  break;
3224  case Intrinsic::aarch64_neon_ld4lane:
3225  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3226  SelectLoadLane(Node, 4, AArch64::LD4i8);
3227  return;
3228  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3229  VT == MVT::v8f16) {
3230  SelectLoadLane(Node, 4, AArch64::LD4i16);
3231  return;
3232  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3233  VT == MVT::v2f32) {
3234  SelectLoadLane(Node, 4, AArch64::LD4i32);
3235  return;
3236  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3237  VT == MVT::v1f64) {
3238  SelectLoadLane(Node, 4, AArch64::LD4i64);
3239  return;
3240  }
3241  break;
3242  }
3243  } break;
3244  case ISD::INTRINSIC_WO_CHAIN: {
3245  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
3246  switch (IntNo) {
3247  default:
3248  break;
3249  case Intrinsic::aarch64_neon_tbl2:
3250  SelectTable(Node, 2,
3251  VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
3252  false);
3253  return;
3254  case Intrinsic::aarch64_neon_tbl3:
3255  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
3256  : AArch64::TBLv16i8Three,
3257  false);
3258  return;
3259  case Intrinsic::aarch64_neon_tbl4:
3260  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
3261  : AArch64::TBLv16i8Four,
3262  false);
3263  return;
3264  case Intrinsic::aarch64_neon_tbx2:
3265  SelectTable(Node, 2,
3266  VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
3267  true);
3268  return;
3269  case Intrinsic::aarch64_neon_tbx3:
3270  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
3271  : AArch64::TBXv16i8Three,
3272  true);
3273  return;
3274  case Intrinsic::aarch64_neon_tbx4:
3275  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
3276  : AArch64::TBXv16i8Four,
3277  true);
3278  return;
3279  case Intrinsic::aarch64_neon_smull:
3280  case Intrinsic::aarch64_neon_umull:
3281  if (tryMULLV64LaneV128(IntNo, Node))
3282  return;
3283  break;
3284  }
3285  break;
3286  }
3287  case ISD::INTRINSIC_VOID: {
3288  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3289  if (Node->getNumOperands() >= 3)
3290  VT = Node->getOperand(2)->getValueType(0);
3291  switch (IntNo) {
3292  default:
3293  break;
3294  case Intrinsic::aarch64_neon_st1x2: {
3295  if (VT == MVT::v8i8) {
3296  SelectStore(Node, 2, AArch64::ST1Twov8b);
3297  return;
3298  } else if (VT == MVT::v16i8) {
3299  SelectStore(Node, 2, AArch64::ST1Twov16b);
3300  return;
3301  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3302  SelectStore(Node, 2, AArch64::ST1Twov4h);
3303  return;
3304  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3305  SelectStore(Node, 2, AArch64::ST1Twov8h);
3306  return;
3307  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3308  SelectStore(Node, 2, AArch64::ST1Twov2s);
3309  return;
3310  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3311  SelectStore(Node, 2, AArch64::ST1Twov4s);
3312  return;
3313  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3314  SelectStore(Node, 2, AArch64::ST1Twov2d);
3315  return;
3316  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3317  SelectStore(Node, 2, AArch64::ST1Twov1d);
3318  return;
3319  }
3320  break;
3321  }
3322  case Intrinsic::aarch64_neon_st1x3: {
3323  if (VT == MVT::v8i8) {
3324  SelectStore(Node, 3, AArch64::ST1Threev8b);
3325  return;
3326  } else if (VT == MVT::v16i8) {
3327  SelectStore(Node, 3, AArch64::ST1Threev16b);
3328  return;
3329  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3330  SelectStore(Node, 3, AArch64::ST1Threev4h);
3331  return;
3332  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3333  SelectStore(Node, 3, AArch64::ST1Threev8h);
3334  return;
3335  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3336  SelectStore(Node, 3, AArch64::ST1Threev2s);
3337  return;
3338  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3339  SelectStore(Node, 3, AArch64::ST1Threev4s);
3340  return;
3341  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3342  SelectStore(Node, 3, AArch64::ST1Threev2d);
3343  return;
3344  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3345  SelectStore(Node, 3, AArch64::ST1Threev1d);
3346  return;
3347  }
3348  break;
3349  }
3350  case Intrinsic::aarch64_neon_st1x4: {
3351  if (VT == MVT::v8i8) {
3352  SelectStore(Node, 4, AArch64::ST1Fourv8b);
3353  return;
3354  } else if (VT == MVT::v16i8) {
3355  SelectStore(Node, 4, AArch64::ST1Fourv16b);
3356  return;
3357  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3358  SelectStore(Node, 4, AArch64::ST1Fourv4h);
3359  return;
3360  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3361  SelectStore(Node, 4, AArch64::ST1Fourv8h);
3362  return;
3363  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3364  SelectStore(Node, 4, AArch64::ST1Fourv2s);
3365  return;
3366  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3367  SelectStore(Node, 4, AArch64::ST1Fourv4s);
3368  return;
3369  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3370  SelectStore(Node, 4, AArch64::ST1Fourv2d);
3371  return;
3372  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3373  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3374  return;
3375  }
3376  break;
3377  }
3378  case Intrinsic::aarch64_neon_st2: {
3379  if (VT == MVT::v8i8) {
3380  SelectStore(Node, 2, AArch64::ST2Twov8b);
3381  return;
3382  } else if (VT == MVT::v16i8) {
3383  SelectStore(Node, 2, AArch64::ST2Twov16b);
3384  return;
3385  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3386  SelectStore(Node, 2, AArch64::ST2Twov4h);
3387  return;
3388  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3389  SelectStore(Node, 2, AArch64::ST2Twov8h);
3390  return;
3391  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3392  SelectStore(Node, 2, AArch64::ST2Twov2s);
3393  return;
3394  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3395  SelectStore(Node, 2, AArch64::ST2Twov4s);
3396  return;
3397  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3398  SelectStore(Node, 2, AArch64::ST2Twov2d);
3399  return;
3400  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3401  SelectStore(Node, 2, AArch64::ST1Twov1d);
3402  return;
3403  }
3404  break;
3405  }
3406  case Intrinsic::aarch64_neon_st3: {
3407  if (VT == MVT::v8i8) {
3408  SelectStore(Node, 3, AArch64::ST3Threev8b);
3409  return;
3410  } else if (VT == MVT::v16i8) {
3411  SelectStore(Node, 3, AArch64::ST3Threev16b);
3412  return;
3413  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3414  SelectStore(Node, 3, AArch64::ST3Threev4h);
3415  return;
3416  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3417  SelectStore(Node, 3, AArch64::ST3Threev8h);
3418  return;
3419  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3420  SelectStore(Node, 3, AArch64::ST3Threev2s);
3421  return;
3422  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3423  SelectStore(Node, 3, AArch64::ST3Threev4s);
3424  return;
3425  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3426  SelectStore(Node, 3, AArch64::ST3Threev2d);
3427  return;
3428  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3429  SelectStore(Node, 3, AArch64::ST1Threev1d);
3430  return;
3431  }
3432  break;
3433  }
3434  case Intrinsic::aarch64_neon_st4: {
3435  if (VT == MVT::v8i8) {
3436  SelectStore(Node, 4, AArch64::ST4Fourv8b);
3437  return;
3438  } else if (VT == MVT::v16i8) {
3439  SelectStore(Node, 4, AArch64::ST4Fourv16b);
3440  return;
3441  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3442  SelectStore(Node, 4, AArch64::ST4Fourv4h);
3443  return;
3444  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3445  SelectStore(Node, 4, AArch64::ST4Fourv8h);
3446  return;
3447  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3448  SelectStore(Node, 4, AArch64::ST4Fourv2s);
3449  return;
3450  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3451  SelectStore(Node, 4, AArch64::ST4Fourv4s);
3452  return;
3453  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3454  SelectStore(Node, 4, AArch64::ST4Fourv2d);
3455  return;
3456  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3457  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3458  return;
3459  }
3460  break;
3461  }
3462  case Intrinsic::aarch64_neon_st2lane: {
3463  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3464  SelectStoreLane(Node, 2, AArch64::ST2i8);
3465  return;
3466  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3467  VT == MVT::v8f16) {
3468  SelectStoreLane(Node, 2, AArch64::ST2i16);
3469  return;
3470  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3471  VT == MVT::v2f32) {
3472  SelectStoreLane(Node, 2, AArch64::ST2i32);
3473  return;
3474  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3475  VT == MVT::v1f64) {
3476  SelectStoreLane(Node, 2, AArch64::ST2i64);
3477  return;
3478  }
3479  break;
3480  }
3481  case Intrinsic::aarch64_neon_st3lane: {
3482  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3483  SelectStoreLane(Node, 3, AArch64::ST3i8);
3484  return;
3485  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3486  VT == MVT::v8f16) {
3487  SelectStoreLane(Node, 3, AArch64::ST3i16);
3488  return;
3489  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3490  VT == MVT::v2f32) {
3491  SelectStoreLane(Node, 3, AArch64::ST3i32);
3492  return;
3493  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3494  VT == MVT::v1f64) {
3495  SelectStoreLane(Node, 3, AArch64::ST3i64);
3496  return;
3497  }
3498  break;
3499  }
3500  case Intrinsic::aarch64_neon_st4lane: {
3501  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3502  SelectStoreLane(Node, 4, AArch64::ST4i8);
3503  return;
3504  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3505  VT == MVT::v8f16) {
3506  SelectStoreLane(Node, 4, AArch64::ST4i16);
3507  return;
3508  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3509  VT == MVT::v2f32) {
3510  SelectStoreLane(Node, 4, AArch64::ST4i32);
3511  return;
3512  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3513  VT == MVT::v1f64) {
3514  SelectStoreLane(Node, 4, AArch64::ST4i64);
3515  return;
3516  }
3517  break;
3518  }
3519  }
3520  break;
3521  }
3522  case AArch64ISD::LD2post: {
3523  if (VT == MVT::v8i8) {
3524  SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
3525  return;
3526  } else if (VT == MVT::v16i8) {
3527  SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
3528  return;
3529  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3530  SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
3531  return;
3532  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3533  SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
3534  return;
3535  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3536  SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
3537  return;
3538  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3539  SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
3540  return;
3541  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3542  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3543  return;
3544  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3545  SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
3546  return;
3547  }
3548  break;
3549  }
3550  case AArch64ISD::LD3post: {
3551  if (VT == MVT::v8i8) {
3552  SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
3553  return;
3554  } else if (VT == MVT::v16i8) {
3555  SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
3556  return;
3557  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3558  SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
3559  return;
3560  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3561  SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
3562  return;
3563  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3564  SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
3565  return;
3566  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3567  SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
3568  return;
3569  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3570  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3571  return;
3572  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3573  SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
3574  return;
3575  }
3576  break;
3577  }
3578  case AArch64ISD::LD4post: {
3579  if (VT == MVT::v8i8) {
3580  SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
3581  return;
3582  } else if (VT == MVT::v16i8) {
3583  SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
3584  return;
3585  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3586  SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
3587  return;
3588  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3589  SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
3590  return;
3591  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3592  SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
3593  return;
3594  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3595  SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
3596  return;
3597  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3598  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3599  return;
3600  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3601  SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
3602  return;
3603  }
3604  break;
3605  }
3606  case AArch64ISD::LD1x2post: {
3607  if (VT == MVT::v8i8) {
3608  SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
3609  return;
3610  } else if (VT == MVT::v16i8) {
3611  SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
3612  return;
3613  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3614  SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
3615  return;
3616  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3617  SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
3618  return;
3619  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3620  SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
3621  return;
3622  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3623  SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
3624  return;
3625  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3626  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3627  return;
3628  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3629  SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
3630  return;
3631  }
3632  break;
3633  }
3634  case AArch64ISD::LD1x3post: {
3635  if (VT == MVT::v8i8) {
3636  SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
3637  return;
3638  } else if (VT == MVT::v16i8) {
3639  SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
3640  return;
3641  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3642  SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
3643  return;
3644  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3645  SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
3646  return;
3647  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3648  SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
3649  return;
3650  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3651  SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
3652  return;
3653  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3654  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3655  return;
3656  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3657  SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
3658  return;
3659  }
3660  break;
3661  }
3662  case AArch64ISD::LD1x4post: {
3663  if (VT == MVT::v8i8) {
3664  SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
3665  return;
3666  } else if (VT == MVT::v16i8) {
3667  SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
3668  return;
3669  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3670  SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
3671  return;
3672  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3673  SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
3674  return;
3675  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3676  SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
3677  return;
3678  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3679  SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
3680  return;
3681  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3682  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3683  return;
3684  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3685  SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
3686  return;
3687  }
3688  break;
3689  }
3690  case AArch64ISD::LD1DUPpost: {
3691  if (VT == MVT::v8i8) {
3692  SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
3693  return;
3694  } else if (VT == MVT::v16i8) {
3695  SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
3696  return;
3697  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3698  SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
3699  return;
3700  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3701  SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
3702  return;
3703  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3704  SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
3705  return;
3706  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3707  SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
3708  return;
3709  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3710  SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
3711  return;
3712  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3713  SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
3714  return;
3715  }
3716  break;
3717  }
3718  case AArch64ISD::LD2DUPpost: {
3719  if (VT == MVT::v8i8) {
3720  SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
3721  return;
3722  } else if (VT == MVT::v16i8) {
3723  SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
3724  return;
3725  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3726  SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
3727  return;
3728  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3729  SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
3730  return;
3731  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3732  SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
3733  return;
3734  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3735  SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
3736  return;
3737  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3738  SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
3739  return;
3740  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3741  SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
3742  return;
3743  }
3744  break;
3745  }
3746  case AArch64ISD::LD3DUPpost: {
3747  if (VT == MVT::v8i8) {
3748  SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
3749  return;
3750  } else if (VT == MVT::v16i8) {
3751  SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
3752  return;
3753  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3754  SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
3755  return;
3756  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3757  SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
3758  return;
3759  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3760  SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
3761  return;
3762  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3763  SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
3764  return;
3765  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3766  SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
3767  return;
3768  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3769  SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
3770  return;
3771  }
3772  break;
3773  }
3774  case AArch64ISD::LD4DUPpost: {
3775  if (VT == MVT::v8i8) {
3776  SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
3777  return;
3778  } else if (VT == MVT::v16i8) {
3779  SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
3780  return;
3781  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3782  SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
3783  return;
3784  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3785  SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
3786  return;
3787  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3788  SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
3789  return;
3790  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3791  SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
3792  return;
3793  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3794  SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
3795  return;
3796  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3797  SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
3798  return;
3799  }
3800  break;
3801  }
3802  case AArch64ISD::LD1LANEpost: {
3803  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3804  SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
3805  return;
3806  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3807  VT == MVT::v8f16) {
3808  SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
3809  return;
3810  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3811  VT == MVT::v2f32) {
3812  SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
3813  return;
3814  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3815  VT == MVT::v1f64) {
3816  SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
3817  return;
3818  }
3819  break;
3820  }
3821  case AArch64ISD::LD2LANEpost: {
3822  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3823  SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
3824  return;
3825  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3826  VT == MVT::v8f16) {
3827  SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
3828  return;
3829  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3830  VT == MVT::v2f32) {
3831  SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
3832  return;
3833  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3834  VT == MVT::v1f64) {
3835  SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
3836  return;
3837  }
3838  break;
3839  }
3840  case AArch64ISD::LD3LANEpost: {
3841  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3842  SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
3843  return;
3844  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3845  VT == MVT::v8f16) {
3846  SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
3847  return;
3848  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3849  VT == MVT::v2f32) {
3850  SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
3851  return;
3852  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3853  VT == MVT::v1f64) {
3854  SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
3855  return;
3856  }
3857  break;
3858  }
3859  case AArch64ISD::LD4LANEpost: {
3860  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3861  SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
3862  return;
3863  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3864  VT == MVT::v8f16) {
3865  SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
3866  return;
3867  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3868  VT == MVT::v2f32) {
3869  SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
3870  return;
3871  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3872  VT == MVT::v1f64) {
3873  SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
3874  return;
3875  }
3876  break;
3877  }
3878  case AArch64ISD::ST2post: {
3879  VT = Node->getOperand(1).getValueType();
3880  if (VT == MVT::v8i8) {
3881  SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
3882  return;
3883  } else if (VT == MVT::v16i8) {
3884  SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
3885  return;
3886  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3887  SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
3888  return;
3889  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3890  SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
3891  return;
3892  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3893  SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
3894  return;
3895  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3896  SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
3897  return;
3898  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3899  SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
3900  return;
3901  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3902  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3903  return;
3904  }
3905  break;
3906  }
3907  case AArch64ISD::ST3post: {
3908  VT = Node->getOperand(1).getValueType();
3909  if (VT == MVT::v8i8) {
3910  SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
3911  return;
3912  } else if (VT == MVT::v16i8) {
3913  SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
3914  return;
3915  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3916  SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
3917  return;
3918  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3919  SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
3920  return;
3921  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3922  SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
3923  return;
3924  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3925  SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
3926  return;
3927  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3928  SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
3929  return;
3930  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3931  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
3932  return;
3933  }
3934  break;
3935  }
3936  case AArch64ISD::ST4post: {
3937  VT = Node->getOperand(1).getValueType();
3938  if (VT == MVT::v8i8) {
3939  SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
3940  return;
3941  } else if (VT == MVT::v16i8) {
3942  SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
3943  return;
3944  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3945  SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
3946  return;
3947  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3948  SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
3949  return;
3950  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3951  SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
3952  return;
3953  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3954  SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
3955  return;
3956  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3957  SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
3958  return;
3959  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3960  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
3961  return;
3962  }
3963  break;
3964  }
3965  case AArch64ISD::ST1x2post: {
3966  VT = Node->getOperand(1).getValueType();
3967  if (VT == MVT::v8i8) {
3968  SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
3969  return;
3970  } else if (VT == MVT::v16i8) {
3971  SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
3972  return;
3973  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3974  SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
3975  return;
3976  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3977  SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
3978  return;
3979  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3980  SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
3981  return;
3982  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3983  SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
3984  return;
3985  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3986  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3987  return;
3988  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3989  SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
3990  return;
3991  }
3992  break;
3993  }
3994  case AArch64ISD::ST1x3post: {
3995  VT = Node->getOperand(1).getValueType();
3996  if (VT == MVT::v8i8) {
3997  SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
3998  return;
3999  } else if (VT == MVT::v16i8) {
4000  SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
4001  return;
4002  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4003  SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
4004  return;
4005  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4006  SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
4007  return;
4008  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4009  SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
4010  return;
4011  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4012  SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
4013  return;
4014  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4015  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4016  return;
4017  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4018  SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
4019  return;
4020  }
4021  break;
4022  }
4023  case AArch64ISD::ST1x4post: {
4024  VT = Node->getOperand(1).getValueType();
4025  if (VT == MVT::v8i8) {
4026  SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
4027  return;
4028  } else if (VT == MVT::v16i8) {
4029  SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
4030  return;
4031  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4032  SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
4033  return;
4034  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4035  SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
4036  return;
4037  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4038  SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
4039  return;
4040  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4041  SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
4042  return;
4043  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4044  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4045  return;
4046  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4047  SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
4048  return;
4049  }
4050  break;
4051  }
4052  case AArch64ISD::ST2LANEpost: {
4053  VT = Node->getOperand(1).getValueType();
4054  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4055  SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
4056  return;
4057  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4058  VT == MVT::v8f16) {
4059  SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
4060  return;
4061  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4062  VT == MVT::v2f32) {
4063  SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
4064  return;
4065  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4066  VT == MVT::v1f64) {
4067  SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
4068  return;
4069  }
4070  break;
4071  }
4072  case AArch64ISD::ST3LANEpost: {
4073  VT = Node->getOperand(1).getValueType();
4074  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4075  SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
4076  return;
4077  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4078  VT == MVT::v8f16) {
4079  SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
4080  return;
4081  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4082  VT == MVT::v2f32) {
4083  SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
4084  return;
4085  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4086  VT == MVT::v1f64) {
4087  SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
4088  return;
4089  }
4090  break;
4091  }
4092  case AArch64ISD::ST4LANEpost: {
4093  VT = Node->getOperand(1).getValueType();
4094  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4095  SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
4096  return;
4097  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4098  VT == MVT::v8f16) {
4099  SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
4100  return;
4101  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4102  VT == MVT::v2f32) {
4103  SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
4104  return;
4105  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4106  VT == MVT::v1f64) {
4107  SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
4108  return;
4109  }
4110  break;
4111  }
4112  }
4113 
4114  // Select the default instruction
4115  SelectCode(Node);
4116 }
4117 
4118 /// createAArch64ISelDag - This pass converts a legalized DAG into a
4119 /// AArch64-specific DAG, ready for instruction scheduling.
4121  CodeGenOpt::Level OptLevel) {
4122  return new AArch64DAGToDAGISel(TM, OptLevel);
4123 }
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1451
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
uint64_t CallInst * C
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &ShiftAmount, int &MaskWidth)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL...
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOffset() const
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1476
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
DiagnosticInfoOptimizationBase::Argument NV
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:357
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:264
iterator begin() const
Definition: ArrayRef.h:136
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:600
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isWorthFoldingADDlow(SDValue N)
If there&#39;s a use of this ADDlow that&#39;s not itself a load/store then we&#39;ll need to create a real ADD i...
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
const SDValue & getChain() const
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:875
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:352
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:39
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1068
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1631
A description of a memory reference used in the backend.
static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, SDValue &LaneOp, int &LaneIdx)
static bool isPreferredADD(int64_t ImmOff)
Shift and rotation operations.
Definition: ISDOpcodes.h:409
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:477
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
unsigned SubReg
const MDNode * getMD() const
unsigned getScalarValueSizeInBits() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:977
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
int64_t getSExtValue() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:410
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
op_iterator op_begin() const
unsigned getAlignment() const
Definition: Globals.cpp:96
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:581
bool isStrongerThanMonotonic(AtomicOrdering ao)
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:150
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:165
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:84
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1657
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:943
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32...
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:800
Extended Value Type.
Definition: ValueTypes.h:33
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:52
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
void dump() const
Dump this node, for debugging.
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:338
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:696
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:749
An SDNode that represents everything that will be needed to construct a MachineInstr.
const Constant * getConstVal() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
unsigned logBase2() const
Definition: APInt.h:1747
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< use_iterator > uses()
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:463
const SysReg * lookupSysRegByName(StringRef)
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:467
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:470
uint32_t parseGenericRegister(StringRef Name)
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOpt::Level OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG...
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:606
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:176
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:422
static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx)
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:386
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:181
static int getIntOperandFromRegisterString(StringRef RegString)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:485
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:615
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted, suitable for use in a BFI instruction.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
Type * getValueType() const
Definition: GlobalValue.h:275
uint32_t Size
Definition: Profile.cpp:46
unsigned getOpcode() const
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1267
static bool isShiftedMask(uint64_t Mask, EVT VT)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void dumpr() const
Dump (recursively) this node and its use-def subgraph.
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:416
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A single uniqued string.
Definition: Metadata.h:603
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1595
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Conversion operators.
Definition: ISDOpcodes.h:464
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:793
uint64_t getZExtValue() const
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:473
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
std::size_t countLeadingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the most significant bit to the first zero bit.
Definition: MathExtras.h:461
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:789
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
virtual const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const
Returns a TargetRegisterClass used for pointer values.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:923
This class is used to represent ISD::LOAD nodes.