LLVM  7.0.0svn
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the AArch64 target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64TargetMachine.h"
16 #include "llvm/ADT/APSInt.h"
18 #include "llvm/IR/Function.h" // To access function attributes.
19 #include "llvm/IR/GlobalValue.h"
20 #include "llvm/IR/Intrinsics.h"
21 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/KnownBits.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "aarch64-isel"
30 
31 //===--------------------------------------------------------------------===//
32 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
33 /// instructions for SelectionDAG operations.
34 ///
35 namespace {
36 
37 class AArch64DAGToDAGISel : public SelectionDAGISel {
38 
39  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
40  /// make the right decision when generating code for different targets.
41  const AArch64Subtarget *Subtarget;
42 
43  bool ForCodeSize;
44 
45 public:
46  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
47  CodeGenOpt::Level OptLevel)
48  : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
49  ForCodeSize(false) {}
50 
51  StringRef getPassName() const override {
52  return "AArch64 Instruction Selection";
53  }
54 
55  bool runOnMachineFunction(MachineFunction &MF) override {
56  ForCodeSize = MF.getFunction().optForSize();
57  Subtarget = &MF.getSubtarget<AArch64Subtarget>();
59  }
60 
61  void Select(SDNode *Node) override;
62 
63  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
64  /// inline asm expressions.
65  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
66  unsigned ConstraintID,
67  std::vector<SDValue> &OutOps) override;
68 
69  bool tryMLAV64LaneV128(SDNode *N);
70  bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
71  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
72  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
74  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
75  return SelectShiftedRegister(N, false, Reg, Shift);
76  }
77  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
78  return SelectShiftedRegister(N, true, Reg, Shift);
79  }
80  bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
81  return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
82  }
83  bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
84  return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
85  }
86  bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
87  return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
88  }
89  bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
90  return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
91  }
92  bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
93  return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
94  }
95  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
96  return SelectAddrModeIndexed(N, 1, Base, OffImm);
97  }
98  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
99  return SelectAddrModeIndexed(N, 2, Base, OffImm);
100  }
101  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
102  return SelectAddrModeIndexed(N, 4, Base, OffImm);
103  }
104  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
105  return SelectAddrModeIndexed(N, 8, Base, OffImm);
106  }
107  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
108  return SelectAddrModeIndexed(N, 16, Base, OffImm);
109  }
110  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
111  return SelectAddrModeUnscaled(N, 1, Base, OffImm);
112  }
113  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
114  return SelectAddrModeUnscaled(N, 2, Base, OffImm);
115  }
116  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
117  return SelectAddrModeUnscaled(N, 4, Base, OffImm);
118  }
119  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
120  return SelectAddrModeUnscaled(N, 8, Base, OffImm);
121  }
122  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
123  return SelectAddrModeUnscaled(N, 16, Base, OffImm);
124  }
125 
126  template<int Width>
127  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
128  SDValue &SignExtend, SDValue &DoShift) {
129  return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
130  }
131 
132  template<int Width>
133  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
134  SDValue &SignExtend, SDValue &DoShift) {
135  return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
136  }
137 
138 
139  /// Form sequences of consecutive 64/128-bit registers for use in NEON
140  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
141  /// between 1 and 4 elements. If it contains a single element that is returned
142  /// unchanged; otherwise a REG_SEQUENCE value is returned.
143  SDValue createDTuple(ArrayRef<SDValue> Vecs);
144  SDValue createQTuple(ArrayRef<SDValue> Vecs);
145 
146  /// Generic helper for the createDTuple/createQTuple
147  /// functions. Those should almost always be called instead.
148  SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
149  const unsigned SubRegs[]);
150 
151  void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
152 
153  bool tryIndexedLoad(SDNode *N);
154 
155  void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
156  unsigned SubRegIdx);
157  void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
158  unsigned SubRegIdx);
159  void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
160  void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
161 
162  void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
163  void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
164  void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
165  void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
166 
167  bool tryBitfieldExtractOp(SDNode *N);
168  bool tryBitfieldExtractOpFromSExt(SDNode *N);
169  bool tryBitfieldInsertOp(SDNode *N);
170  bool tryBitfieldInsertInZeroOp(SDNode *N);
171 
172  bool tryReadRegister(SDNode *N);
173  bool tryWriteRegister(SDNode *N);
174 
175 // Include the pieces autogenerated from the target description.
176 #include "AArch64GenDAGISel.inc"
177 
178 private:
179  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
180  SDValue &Shift);
181  bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
182  SDValue &OffImm);
183  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
184  SDValue &OffImm);
185  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
186  SDValue &OffImm);
187  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
188  SDValue &Offset, SDValue &SignExtend,
189  SDValue &DoShift);
190  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
191  SDValue &Offset, SDValue &SignExtend,
192  SDValue &DoShift);
193  bool isWorthFolding(SDValue V) const;
194  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
195  SDValue &Offset, SDValue &SignExtend);
196 
197  template<unsigned RegWidth>
198  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
199  return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
200  }
201 
202  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
203 
204  bool SelectCMP_SWAP(SDNode *N);
205 
206 };
207 } // end anonymous namespace
208 
209 /// isIntImmediate - This method tests to see if the node is a constant
210 /// operand. If so Imm will receive the 32-bit value.
211 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
212  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
213  Imm = C->getZExtValue();
214  return true;
215  }
216  return false;
217 }
218 
219 // isIntImmediate - This method tests to see if a constant operand.
220 // If so Imm will receive the value.
221 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
222  return isIntImmediate(N.getNode(), Imm);
223 }
224 
225 // isOpcWithIntImmediate - This method tests to see if the node is a specific
226 // opcode and that it has a immediate integer right operand.
227 // If so Imm will receive the 32 bit value.
228 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
229  uint64_t &Imm) {
230  return N->getOpcode() == Opc &&
231  isIntImmediate(N->getOperand(1).getNode(), Imm);
232 }
233 
234 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
235  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
236  switch(ConstraintID) {
237  default:
238  llvm_unreachable("Unexpected asm memory constraint");
242  // We need to make sure that this one operand does not end up in XZR, thus
243  // require the address to be in a PointerRegClass register.
244  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
245  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
246  SDLoc dl(Op);
247  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
248  SDValue NewOp =
249  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
250  dl, Op.getValueType(),
251  Op, RC), 0);
252  OutOps.push_back(NewOp);
253  return false;
254  }
255  return true;
256 }
257 
258 /// SelectArithImmed - Select an immediate value that can be represented as
259 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
260 /// Val set to the 12-bit value and Shift set to the shifter operand.
261 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
262  SDValue &Shift) {
263  // This function is called from the addsub_shifted_imm ComplexPattern,
264  // which lists [imm] as the list of opcode it's interested in, however
265  // we still need to check whether the operand is actually an immediate
266  // here because the ComplexPattern opcode list is only used in
267  // root-level opcode matching.
268  if (!isa<ConstantSDNode>(N.getNode()))
269  return false;
270 
271  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
272  unsigned ShiftAmt;
273 
274  if (Immed >> 12 == 0) {
275  ShiftAmt = 0;
276  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
277  ShiftAmt = 12;
278  Immed = Immed >> 12;
279  } else
280  return false;
281 
282  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
283  SDLoc dl(N);
284  Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
285  Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
286  return true;
287 }
288 
289 /// SelectNegArithImmed - As above, but negates the value before trying to
290 /// select it.
291 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
292  SDValue &Shift) {
293  // This function is called from the addsub_shifted_imm ComplexPattern,
294  // which lists [imm] as the list of opcode it's interested in, however
295  // we still need to check whether the operand is actually an immediate
296  // here because the ComplexPattern opcode list is only used in
297  // root-level opcode matching.
298  if (!isa<ConstantSDNode>(N.getNode()))
299  return false;
300 
301  // The immediate operand must be a 24-bit zero-extended immediate.
302  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
303 
304  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
305  // have the opposite effect on the C flag, so this pattern mustn't match under
306  // those circumstances.
307  if (Immed == 0)
308  return false;
309 
310  if (N.getValueType() == MVT::i32)
311  Immed = ~((uint32_t)Immed) + 1;
312  else
313  Immed = ~Immed + 1ULL;
314  if (Immed & 0xFFFFFFFFFF000000ULL)
315  return false;
316 
317  Immed &= 0xFFFFFFULL;
318  return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
319  Shift);
320 }
321 
322 /// getShiftTypeForNode - Translate a shift node to the corresponding
323 /// ShiftType value.
325  switch (N.getOpcode()) {
326  default:
328  case ISD::SHL:
329  return AArch64_AM::LSL;
330  case ISD::SRL:
331  return AArch64_AM::LSR;
332  case ISD::SRA:
333  return AArch64_AM::ASR;
334  case ISD::ROTR:
335  return AArch64_AM::ROR;
336  }
337 }
338 
339 /// \brief Determine whether it is worth it to fold SHL into the addressing
340 /// mode.
341 static bool isWorthFoldingSHL(SDValue V) {
342  assert(V.getOpcode() == ISD::SHL && "invalid opcode");
343  // It is worth folding logical shift of up to three places.
344  auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
345  if (!CSD)
346  return false;
347  unsigned ShiftVal = CSD->getZExtValue();
348  if (ShiftVal > 3)
349  return false;
350 
351  // Check if this particular node is reused in any non-memory related
352  // operation. If yes, do not try to fold this node into the address
353  // computation, since the computation will be kept.
354  const SDNode *Node = V.getNode();
355  for (SDNode *UI : Node->uses())
356  if (!isa<MemSDNode>(*UI))
357  for (SDNode *UII : UI->uses())
358  if (!isa<MemSDNode>(*UII))
359  return false;
360  return true;
361 }
362 
363 /// \brief Determine whether it is worth to fold V into an extended register.
364 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
365  // Trivial if we are optimizing for code size or if there is only
366  // one use of the value.
367  if (ForCodeSize || V.hasOneUse())
368  return true;
369  // If a subtarget has a fastpath LSL we can fold a logical shift into
370  // the addressing mode and save a cycle.
371  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
373  return true;
374  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
375  const SDValue LHS = V.getOperand(0);
376  const SDValue RHS = V.getOperand(1);
377  if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
378  return true;
379  if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
380  return true;
381  }
382 
383  // It hurts otherwise, since the value will be reused.
384  return false;
385 }
386 
387 /// SelectShiftedRegister - Select a "shifted register" operand. If the value
388 /// is not shifted, set the Shift operand to default of "LSL 0". The logical
389 /// instructions allow the shifted register to be rotated, but the arithmetic
390 /// instructions do not. The AllowROR parameter specifies whether ROR is
391 /// supported.
392 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
393  SDValue &Reg, SDValue &Shift) {
395  if (ShType == AArch64_AM::InvalidShiftExtend)
396  return false;
397  if (!AllowROR && ShType == AArch64_AM::ROR)
398  return false;
399 
400  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
401  unsigned BitSize = N.getValueSizeInBits();
402  unsigned Val = RHS->getZExtValue() & (BitSize - 1);
403  unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
404 
405  Reg = N.getOperand(0);
406  Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
407  return isWorthFolding(N);
408  }
409 
410  return false;
411 }
412 
413 /// getExtendTypeForNode - Translate an extend node to the corresponding
414 /// ExtendType value.
416 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
417  if (N.getOpcode() == ISD::SIGN_EXTEND ||
419  EVT SrcVT;
421  SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
422  else
423  SrcVT = N.getOperand(0).getValueType();
424 
425  if (!IsLoadStore && SrcVT == MVT::i8)
426  return AArch64_AM::SXTB;
427  else if (!IsLoadStore && SrcVT == MVT::i16)
428  return AArch64_AM::SXTH;
429  else if (SrcVT == MVT::i32)
430  return AArch64_AM::SXTW;
431  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
432 
434  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
435  N.getOpcode() == ISD::ANY_EXTEND) {
436  EVT SrcVT = N.getOperand(0).getValueType();
437  if (!IsLoadStore && SrcVT == MVT::i8)
438  return AArch64_AM::UXTB;
439  else if (!IsLoadStore && SrcVT == MVT::i16)
440  return AArch64_AM::UXTH;
441  else if (SrcVT == MVT::i32)
442  return AArch64_AM::UXTW;
443  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
444 
446  } else if (N.getOpcode() == ISD::AND) {
448  if (!CSD)
450  uint64_t AndMask = CSD->getZExtValue();
451 
452  switch (AndMask) {
453  default:
455  case 0xFF:
456  return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
457  case 0xFFFF:
458  return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
459  case 0xFFFFFFFF:
460  return AArch64_AM::UXTW;
461  }
462  }
463 
465 }
466 
467 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
468 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
469  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
471  return false;
472 
473  SDValue SV = DL->getOperand(0);
474  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
475  return false;
476 
477  SDValue EV = SV.getOperand(1);
478  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
479  return false;
480 
481  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
482  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
483  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
484  LaneOp = EV.getOperand(0);
485 
486  return true;
487 }
488 
489 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
490 // high lane extract.
491 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
492  SDValue &LaneOp, int &LaneIdx) {
493 
494  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
495  std::swap(Op0, Op1);
496  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
497  return false;
498  }
499  StdOp = Op1;
500  return true;
501 }
502 
503 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
504 /// is a lane in the upper half of a 128-bit vector. Recognize and select this
505 /// so that we don't emit unnecessary lane extracts.
506 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
507  SDLoc dl(N);
508  SDValue Op0 = N->getOperand(0);
509  SDValue Op1 = N->getOperand(1);
510  SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
511  SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
512  int LaneIdx = -1; // Will hold the lane index.
513 
514  if (Op1.getOpcode() != ISD::MUL ||
515  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
516  LaneIdx)) {
517  std::swap(Op0, Op1);
518  if (Op1.getOpcode() != ISD::MUL ||
519  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
520  LaneIdx))
521  return false;
522  }
523 
524  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
525 
526  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
527 
528  unsigned MLAOpc = ~0U;
529 
530  switch (N->getSimpleValueType(0).SimpleTy) {
531  default:
532  llvm_unreachable("Unrecognized MLA.");
533  case MVT::v4i16:
534  MLAOpc = AArch64::MLAv4i16_indexed;
535  break;
536  case MVT::v8i16:
537  MLAOpc = AArch64::MLAv8i16_indexed;
538  break;
539  case MVT::v2i32:
540  MLAOpc = AArch64::MLAv2i32_indexed;
541  break;
542  case MVT::v4i32:
543  MLAOpc = AArch64::MLAv4i32_indexed;
544  break;
545  }
546 
547  ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
548  return true;
549 }
550 
551 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
552  SDLoc dl(N);
553  SDValue SMULLOp0;
554  SDValue SMULLOp1;
555  int LaneIdx;
556 
557  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
558  LaneIdx))
559  return false;
560 
561  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
562 
563  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
564 
565  unsigned SMULLOpc = ~0U;
566 
567  if (IntNo == Intrinsic::aarch64_neon_smull) {
568  switch (N->getSimpleValueType(0).SimpleTy) {
569  default:
570  llvm_unreachable("Unrecognized SMULL.");
571  case MVT::v4i32:
572  SMULLOpc = AArch64::SMULLv4i16_indexed;
573  break;
574  case MVT::v2i64:
575  SMULLOpc = AArch64::SMULLv2i32_indexed;
576  break;
577  }
578  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
579  switch (N->getSimpleValueType(0).SimpleTy) {
580  default:
581  llvm_unreachable("Unrecognized SMULL.");
582  case MVT::v4i32:
583  SMULLOpc = AArch64::UMULLv4i16_indexed;
584  break;
585  case MVT::v2i64:
586  SMULLOpc = AArch64::UMULLv2i32_indexed;
587  break;
588  }
589  } else
590  llvm_unreachable("Unrecognized intrinsic.");
591 
592  ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
593  return true;
594 }
595 
596 /// Instructions that accept extend modifiers like UXTW expect the register
597 /// being extended to be a GPR32, but the incoming DAG might be acting on a
598 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
599 /// this is the case.
601  if (N.getValueType() == MVT::i32)
602  return N;
603 
604  SDLoc dl(N);
605  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
606  MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
607  dl, MVT::i32, N, SubReg);
608  return SDValue(Node, 0);
609 }
610 
611 
612 /// SelectArithExtendedRegister - Select a "extended register" operand. This
613 /// operand folds in an extend followed by an optional left shift.
614 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
615  SDValue &Shift) {
616  unsigned ShiftVal = 0;
618 
619  if (N.getOpcode() == ISD::SHL) {
621  if (!CSD)
622  return false;
623  ShiftVal = CSD->getZExtValue();
624  if (ShiftVal > 4)
625  return false;
626 
627  Ext = getExtendTypeForNode(N.getOperand(0));
629  return false;
630 
631  Reg = N.getOperand(0).getOperand(0);
632  } else {
633  Ext = getExtendTypeForNode(N);
635  return false;
636 
637  Reg = N.getOperand(0);
638 
639  // Don't match if free 32-bit -> 64-bit zext can be used instead.
640  if (Ext == AArch64_AM::UXTW &&
641  Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
642  return false;
643  }
644 
645  // AArch64 mandates that the RHS of the operation must use the smallest
646  // register class that could contain the size being extended from. Thus,
647  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
648  // there might not be an actual 32-bit value in the program. We can
649  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
650  assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
651  Reg = narrowIfNeeded(CurDAG, Reg);
652  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
653  MVT::i32);
654  return isWorthFolding(N);
655 }
656 
657 /// If there's a use of this ADDlow that's not itself a load/store then we'll
658 /// need to create a real ADD instruction from it anyway and there's no point in
659 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
660 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
661 /// leads to duplicated ADRP instructions.
663  for (auto Use : N->uses()) {
664  if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
665  Use->getOpcode() != ISD::ATOMIC_LOAD &&
666  Use->getOpcode() != ISD::ATOMIC_STORE)
667  return false;
668 
669  // ldar and stlr have much more restrictive addressing modes (just a
670  // register).
671  if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering()))
672  return false;
673  }
674 
675  return true;
676 }
677 
678 /// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
679 /// immediate" address. The "Size" argument is the size in bytes of the memory
680 /// reference, which determines the scale.
681 bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
682  SDValue &Base,
683  SDValue &OffImm) {
684  SDLoc dl(N);
685  const DataLayout &DL = CurDAG->getDataLayout();
686  const TargetLowering *TLI = getTargetLowering();
687  if (N.getOpcode() == ISD::FrameIndex) {
688  int FI = cast<FrameIndexSDNode>(N)->getIndex();
689  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
690  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
691  return true;
692  }
693 
694  // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
695  // selected here doesn't support labels/immediates, only base+offset.
696 
697  if (CurDAG->isBaseWithConstantOffset(N)) {
698  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
699  int64_t RHSC = RHS->getSExtValue();
700  unsigned Scale = Log2_32(Size);
701  if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) &&
702  RHSC < (0x40 << Scale)) {
703  Base = N.getOperand(0);
704  if (Base.getOpcode() == ISD::FrameIndex) {
705  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
706  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
707  }
708  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
709  return true;
710  }
711  }
712  }
713 
714  // Base only. The address will be materialized into a register before
715  // the memory is accessed.
716  // add x0, Xbase, #offset
717  // stp x1, x2, [x0]
718  Base = N;
719  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
720  return true;
721 }
722 
723 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
724 /// immediate" address. The "Size" argument is the size in bytes of the memory
725 /// reference, which determines the scale.
726 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
727  SDValue &Base, SDValue &OffImm) {
728  SDLoc dl(N);
729  const DataLayout &DL = CurDAG->getDataLayout();
730  const TargetLowering *TLI = getTargetLowering();
731  if (N.getOpcode() == ISD::FrameIndex) {
732  int FI = cast<FrameIndexSDNode>(N)->getIndex();
733  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
734  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
735  return true;
736  }
737 
739  GlobalAddressSDNode *GAN =
741  Base = N.getOperand(0);
742  OffImm = N.getOperand(1);
743  if (!GAN)
744  return true;
745 
746  const GlobalValue *GV = GAN->getGlobal();
747  unsigned Alignment = GV->getAlignment();
748  Type *Ty = GV->getValueType();
749  if (Alignment == 0 && Ty->isSized())
750  Alignment = DL.getABITypeAlignment(Ty);
751 
752  if (Alignment >= Size)
753  return true;
754  }
755 
756  if (CurDAG->isBaseWithConstantOffset(N)) {
757  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
758  int64_t RHSC = (int64_t)RHS->getZExtValue();
759  unsigned Scale = Log2_32(Size);
760  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
761  Base = N.getOperand(0);
762  if (Base.getOpcode() == ISD::FrameIndex) {
763  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
764  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
765  }
766  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
767  return true;
768  }
769  }
770  }
771 
772  // Before falling back to our general case, check if the unscaled
773  // instructions can handle this. If so, that's preferable.
774  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
775  return false;
776 
777  // Base only. The address will be materialized into a register before
778  // the memory is accessed.
779  // add x0, Xbase, #offset
780  // ldr x0, [x0]
781  Base = N;
782  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
783  return true;
784 }
785 
786 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
787 /// immediate" address. This should only match when there is an offset that
788 /// is not valid for a scaled immediate addressing mode. The "Size" argument
789 /// is the size in bytes of the memory reference, which is needed here to know
790 /// what is valid for a scaled immediate.
791 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
792  SDValue &Base,
793  SDValue &OffImm) {
794  if (!CurDAG->isBaseWithConstantOffset(N))
795  return false;
796  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
797  int64_t RHSC = RHS->getSExtValue();
798  // If the offset is valid as a scaled immediate, don't match here.
799  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
800  RHSC < (0x1000 << Log2_32(Size)))
801  return false;
802  if (RHSC >= -256 && RHSC < 256) {
803  Base = N.getOperand(0);
804  if (Base.getOpcode() == ISD::FrameIndex) {
805  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
806  const TargetLowering *TLI = getTargetLowering();
807  Base = CurDAG->getTargetFrameIndex(
808  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
809  }
810  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
811  return true;
812  }
813  }
814  return false;
815 }
816 
817 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
818  SDLoc dl(N);
819  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
820  SDValue ImpDef = SDValue(
821  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
822  MachineSDNode *Node = CurDAG->getMachineNode(
823  TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
824  return SDValue(Node, 0);
825 }
826 
827 /// \brief Check if the given SHL node (\p N), can be used to form an
828 /// extended register for an addressing mode.
829 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
830  bool WantExtend, SDValue &Offset,
831  SDValue &SignExtend) {
832  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
834  if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
835  return false;
836 
837  SDLoc dl(N);
838  if (WantExtend) {
840  getExtendTypeForNode(N.getOperand(0), true);
842  return false;
843 
844  Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
845  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
846  MVT::i32);
847  } else {
848  Offset = N.getOperand(0);
849  SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
850  }
851 
852  unsigned LegalShiftVal = Log2_32(Size);
853  unsigned ShiftVal = CSD->getZExtValue();
854 
855  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
856  return false;
857 
858  return isWorthFolding(N);
859 }
860 
861 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
862  SDValue &Base, SDValue &Offset,
863  SDValue &SignExtend,
864  SDValue &DoShift) {
865  if (N.getOpcode() != ISD::ADD)
866  return false;
867  SDValue LHS = N.getOperand(0);
868  SDValue RHS = N.getOperand(1);
869  SDLoc dl(N);
870 
871  // We don't want to match immediate adds here, because they are better lowered
872  // to the register-immediate addressing modes.
873  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
874  return false;
875 
876  // Check if this particular node is reused in any non-memory related
877  // operation. If yes, do not try to fold this node into the address
878  // computation, since the computation will be kept.
879  const SDNode *Node = N.getNode();
880  for (SDNode *UI : Node->uses()) {
881  if (!isa<MemSDNode>(*UI))
882  return false;
883  }
884 
885  // Remember if it is worth folding N when it produces extended register.
886  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
887 
888  // Try to match a shifted extend on the RHS.
889  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
890  SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
891  Base = LHS;
892  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
893  return true;
894  }
895 
896  // Try to match a shifted extend on the LHS.
897  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
898  SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
899  Base = RHS;
900  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
901  return true;
902  }
903 
904  // There was no shift, whatever else we find.
905  DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
906 
908  // Try to match an unshifted extend on the LHS.
909  if (IsExtendedRegisterWorthFolding &&
910  (Ext = getExtendTypeForNode(LHS, true)) !=
912  Base = RHS;
913  Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
914  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
915  MVT::i32);
916  if (isWorthFolding(LHS))
917  return true;
918  }
919 
920  // Try to match an unshifted extend on the RHS.
921  if (IsExtendedRegisterWorthFolding &&
922  (Ext = getExtendTypeForNode(RHS, true)) !=
924  Base = LHS;
925  Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
926  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
927  MVT::i32);
928  if (isWorthFolding(RHS))
929  return true;
930  }
931 
932  return false;
933 }
934 
935 // Check if the given immediate is preferred by ADD. If an immediate can be
936 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
937 // encoded by one MOVZ, return true.
938 static bool isPreferredADD(int64_t ImmOff) {
939  // Constant in [0x0, 0xfff] can be encoded in ADD.
940  if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
941  return true;
942  // Check if it can be encoded in an "ADD LSL #12".
943  if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
944  // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
945  return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
946  (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
947  return false;
948 }
949 
950 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
951  SDValue &Base, SDValue &Offset,
952  SDValue &SignExtend,
953  SDValue &DoShift) {
954  if (N.getOpcode() != ISD::ADD)
955  return false;
956  SDValue LHS = N.getOperand(0);
957  SDValue RHS = N.getOperand(1);
958  SDLoc DL(N);
959 
960  // Check if this particular node is reused in any non-memory related
961  // operation. If yes, do not try to fold this node into the address
962  // computation, since the computation will be kept.
963  const SDNode *Node = N.getNode();
964  for (SDNode *UI : Node->uses()) {
965  if (!isa<MemSDNode>(*UI))
966  return false;
967  }
968 
969  // Watch out if RHS is a wide immediate, it can not be selected into
970  // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
971  // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
972  // instructions like:
973  // MOV X0, WideImmediate
974  // ADD X1, BaseReg, X0
975  // LDR X2, [X1, 0]
976  // For such situation, using [BaseReg, XReg] addressing mode can save one
977  // ADD/SUB:
978  // MOV X0, WideImmediate
979  // LDR X2, [BaseReg, X0]
980  if (isa<ConstantSDNode>(RHS)) {
981  int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
982  unsigned Scale = Log2_32(Size);
983  // Skip the immediate can be selected by load/store addressing mode.
984  // Also skip the immediate can be encoded by a single ADD (SUB is also
985  // checked by using -ImmOff).
986  if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
987  isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
988  return false;
989 
990  SDValue Ops[] = { RHS };
991  SDNode *MOVI =
992  CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
993  SDValue MOVIV = SDValue(MOVI, 0);
994  // This ADD of two X register will be selected into [Reg+Reg] mode.
995  N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
996  }
997 
998  // Remember if it is worth folding N when it produces extended register.
999  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1000 
1001  // Try to match a shifted extend on the RHS.
1002  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1003  SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1004  Base = LHS;
1005  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1006  return true;
1007  }
1008 
1009  // Try to match a shifted extend on the LHS.
1010  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1011  SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1012  Base = RHS;
1013  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1014  return true;
1015  }
1016 
1017  // Match any non-shifted, non-extend, non-immediate add expression.
1018  Base = LHS;
1019  Offset = RHS;
1020  SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1021  DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1022  // Reg1 + Reg2 is free: no check needed.
1023  return true;
1024 }
1025 
1026 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1027  static const unsigned RegClassIDs[] = {
1028  AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1029  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1030  AArch64::dsub2, AArch64::dsub3};
1031 
1032  return createTuple(Regs, RegClassIDs, SubRegs);
1033 }
1034 
1035 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1036  static const unsigned RegClassIDs[] = {
1037  AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1038  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1039  AArch64::qsub2, AArch64::qsub3};
1040 
1041  return createTuple(Regs, RegClassIDs, SubRegs);
1042 }
1043 
1044 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1045  const unsigned RegClassIDs[],
1046  const unsigned SubRegs[]) {
1047  // There's no special register-class for a vector-list of 1 element: it's just
1048  // a vector.
1049  if (Regs.size() == 1)
1050  return Regs[0];
1051 
1052  assert(Regs.size() >= 2 && Regs.size() <= 4);
1053 
1054  SDLoc DL(Regs[0]);
1055 
1057 
1058  // First operand of REG_SEQUENCE is the desired RegClass.
1059  Ops.push_back(
1060  CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1061 
1062  // Then we get pairs of source & subregister-position for the components.
1063  for (unsigned i = 0; i < Regs.size(); ++i) {
1064  Ops.push_back(Regs[i]);
1065  Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1066  }
1067 
1068  SDNode *N =
1069  CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1070  return SDValue(N, 0);
1071 }
1072 
1073 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1074  bool isExt) {
1075  SDLoc dl(N);
1076  EVT VT = N->getValueType(0);
1077 
1078  unsigned ExtOff = isExt;
1079 
1080  // Form a REG_SEQUENCE to force register allocation.
1081  unsigned Vec0Off = ExtOff + 1;
1082  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1083  N->op_begin() + Vec0Off + NumVecs);
1084  SDValue RegSeq = createQTuple(Regs);
1085 
1087  if (isExt)
1088  Ops.push_back(N->getOperand(1));
1089  Ops.push_back(RegSeq);
1090  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1091  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1092 }
1093 
1094 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1095  LoadSDNode *LD = cast<LoadSDNode>(N);
1096  if (LD->isUnindexed())
1097  return false;
1098  EVT VT = LD->getMemoryVT();
1099  EVT DstVT = N->getValueType(0);
1101  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1102 
1103  // We're not doing validity checking here. That was done when checking
1104  // if we should mark the load as indexed or not. We're just selecting
1105  // the right instruction.
1106  unsigned Opcode = 0;
1107 
1108  ISD::LoadExtType ExtType = LD->getExtensionType();
1109  bool InsertTo64 = false;
1110  if (VT == MVT::i64)
1111  Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1112  else if (VT == MVT::i32) {
1113  if (ExtType == ISD::NON_EXTLOAD)
1114  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1115  else if (ExtType == ISD::SEXTLOAD)
1116  Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1117  else {
1118  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1119  InsertTo64 = true;
1120  // The result of the load is only i32. It's the subreg_to_reg that makes
1121  // it into an i64.
1122  DstVT = MVT::i32;
1123  }
1124  } else if (VT == MVT::i16) {
1125  if (ExtType == ISD::SEXTLOAD) {
1126  if (DstVT == MVT::i64)
1127  Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1128  else
1129  Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1130  } else {
1131  Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1132  InsertTo64 = DstVT == MVT::i64;
1133  // The result of the load is only i32. It's the subreg_to_reg that makes
1134  // it into an i64.
1135  DstVT = MVT::i32;
1136  }
1137  } else if (VT == MVT::i8) {
1138  if (ExtType == ISD::SEXTLOAD) {
1139  if (DstVT == MVT::i64)
1140  Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1141  else
1142  Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1143  } else {
1144  Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1145  InsertTo64 = DstVT == MVT::i64;
1146  // The result of the load is only i32. It's the subreg_to_reg that makes
1147  // it into an i64.
1148  DstVT = MVT::i32;
1149  }
1150  } else if (VT == MVT::f16) {
1151  Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1152  } else if (VT == MVT::f32) {
1153  Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1154  } else if (VT == MVT::f64 || VT.is64BitVector()) {
1155  Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1156  } else if (VT.is128BitVector()) {
1157  Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1158  } else
1159  return false;
1160  SDValue Chain = LD->getChain();
1161  SDValue Base = LD->getBasePtr();
1162  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1163  int OffsetVal = (int)OffsetOp->getZExtValue();
1164  SDLoc dl(N);
1165  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1166  SDValue Ops[] = { Base, Offset, Chain };
1167  SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1168  MVT::Other, Ops);
1169  // Either way, we're replacing the node, so tell the caller that.
1170  SDValue LoadedVal = SDValue(Res, 1);
1171  if (InsertTo64) {
1172  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1173  LoadedVal =
1174  SDValue(CurDAG->getMachineNode(
1175  AArch64::SUBREG_TO_REG, dl, MVT::i64,
1176  CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1177  SubReg),
1178  0);
1179  }
1180 
1181  ReplaceUses(SDValue(N, 0), LoadedVal);
1182  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1183  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1184  CurDAG->RemoveDeadNode(N);
1185  return true;
1186 }
1187 
1188 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1189  unsigned SubRegIdx) {
1190  SDLoc dl(N);
1191  EVT VT = N->getValueType(0);
1192  SDValue Chain = N->getOperand(0);
1193 
1194  SDValue Ops[] = {N->getOperand(2), // Mem operand;
1195  Chain};
1196 
1197  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1198 
1199  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1200  SDValue SuperReg = SDValue(Ld, 0);
1201  for (unsigned i = 0; i < NumVecs; ++i)
1202  ReplaceUses(SDValue(N, i),
1203  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1204 
1205  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1206 
1207  // Transfer memoperands.
1208  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1209  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1210  cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
1211 
1212  CurDAG->RemoveDeadNode(N);
1213 }
1214 
1215 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1216  unsigned Opc, unsigned SubRegIdx) {
1217  SDLoc dl(N);
1218  EVT VT = N->getValueType(0);
1219  SDValue Chain = N->getOperand(0);
1220 
1221  SDValue Ops[] = {N->getOperand(1), // Mem operand
1222  N->getOperand(2), // Incremental
1223  Chain};
1224 
1225  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1227 
1228  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1229 
1230  // Update uses of write back register
1231  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1232 
1233  // Update uses of vector list
1234  SDValue SuperReg = SDValue(Ld, 1);
1235  if (NumVecs == 1)
1236  ReplaceUses(SDValue(N, 0), SuperReg);
1237  else
1238  for (unsigned i = 0; i < NumVecs; ++i)
1239  ReplaceUses(SDValue(N, i),
1240  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1241 
1242  // Update the chain
1243  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1244  CurDAG->RemoveDeadNode(N);
1245 }
1246 
1247 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1248  unsigned Opc) {
1249  SDLoc dl(N);
1250  EVT VT = N->getOperand(2)->getValueType(0);
1251 
1252  // Form a REG_SEQUENCE to force register allocation.
1253  bool Is128Bit = VT.getSizeInBits() == 128;
1254  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1255  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1256 
1257  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1258  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1259 
1260  // Transfer memoperands.
1261  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1262  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1263  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1264 
1265  ReplaceNode(N, St);
1266 }
1267 
1268 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1269  unsigned Opc) {
1270  SDLoc dl(N);
1271  EVT VT = N->getOperand(2)->getValueType(0);
1272  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1273  MVT::Other}; // Type for the Chain
1274 
1275  // Form a REG_SEQUENCE to force register allocation.
1276  bool Is128Bit = VT.getSizeInBits() == 128;
1277  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1278  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1279 
1280  SDValue Ops[] = {RegSeq,
1281  N->getOperand(NumVecs + 1), // base register
1282  N->getOperand(NumVecs + 2), // Incremental
1283  N->getOperand(0)}; // Chain
1284  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1285 
1286  ReplaceNode(N, St);
1287 }
1288 
1289 namespace {
1290 /// WidenVector - Given a value in the V64 register class, produce the
1291 /// equivalent value in the V128 register class.
1292 class WidenVector {
1293  SelectionDAG &DAG;
1294 
1295 public:
1296  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1297 
1298  SDValue operator()(SDValue V64Reg) {
1299  EVT VT = V64Reg.getValueType();
1300  unsigned NarrowSize = VT.getVectorNumElements();
1301  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1302  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1303  SDLoc DL(V64Reg);
1304 
1305  SDValue Undef =
1306  SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1307  return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1308  }
1309 };
1310 } // namespace
1311 
1312 /// NarrowVector - Given a value in the V128 register class, produce the
1313 /// equivalent value in the V64 register class.
1314 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1315  EVT VT = V128Reg.getValueType();
1316  unsigned WideSize = VT.getVectorNumElements();
1317  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1318  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1319 
1320  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1321  V128Reg);
1322 }
1323 
1324 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1325  unsigned Opc) {
1326  SDLoc dl(N);
1327  EVT VT = N->getValueType(0);
1328  bool Narrow = VT.getSizeInBits() == 64;
1329 
1330  // Form a REG_SEQUENCE to force register allocation.
1331  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1332 
1333  if (Narrow)
1334  transform(Regs, Regs.begin(),
1335  WidenVector(*CurDAG));
1336 
1337  SDValue RegSeq = createQTuple(Regs);
1338 
1339  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1340 
1341  unsigned LaneNo =
1342  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1343 
1344  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1345  N->getOperand(NumVecs + 3), N->getOperand(0)};
1346  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1347  SDValue SuperReg = SDValue(Ld, 0);
1348 
1349  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1350  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1351  AArch64::qsub2, AArch64::qsub3 };
1352  for (unsigned i = 0; i < NumVecs; ++i) {
1353  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1354  if (Narrow)
1355  NV = NarrowVector(NV, *CurDAG);
1356  ReplaceUses(SDValue(N, i), NV);
1357  }
1358 
1359  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1360  CurDAG->RemoveDeadNode(N);
1361 }
1362 
1363 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1364  unsigned Opc) {
1365  SDLoc dl(N);
1366  EVT VT = N->getValueType(0);
1367  bool Narrow = VT.getSizeInBits() == 64;
1368 
1369  // Form a REG_SEQUENCE to force register allocation.
1370  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1371 
1372  if (Narrow)
1373  transform(Regs, Regs.begin(),
1374  WidenVector(*CurDAG));
1375 
1376  SDValue RegSeq = createQTuple(Regs);
1377 
1378  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1379  RegSeq->getValueType(0), MVT::Other};
1380 
1381  unsigned LaneNo =
1382  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1383 
1384  SDValue Ops[] = {RegSeq,
1385  CurDAG->getTargetConstant(LaneNo, dl,
1386  MVT::i64), // Lane Number
1387  N->getOperand(NumVecs + 2), // Base register
1388  N->getOperand(NumVecs + 3), // Incremental
1389  N->getOperand(0)};
1390  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1391 
1392  // Update uses of the write back register
1393  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1394 
1395  // Update uses of the vector list
1396  SDValue SuperReg = SDValue(Ld, 1);
1397  if (NumVecs == 1) {
1398  ReplaceUses(SDValue(N, 0),
1399  Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1400  } else {
1401  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1402  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1403  AArch64::qsub2, AArch64::qsub3 };
1404  for (unsigned i = 0; i < NumVecs; ++i) {
1405  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1406  SuperReg);
1407  if (Narrow)
1408  NV = NarrowVector(NV, *CurDAG);
1409  ReplaceUses(SDValue(N, i), NV);
1410  }
1411  }
1412 
1413  // Update the Chain
1414  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1415  CurDAG->RemoveDeadNode(N);
1416 }
1417 
1418 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1419  unsigned Opc) {
1420  SDLoc dl(N);
1421  EVT VT = N->getOperand(2)->getValueType(0);
1422  bool Narrow = VT.getSizeInBits() == 64;
1423 
1424  // Form a REG_SEQUENCE to force register allocation.
1425  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1426 
1427  if (Narrow)
1428  transform(Regs, Regs.begin(),
1429  WidenVector(*CurDAG));
1430 
1431  SDValue RegSeq = createQTuple(Regs);
1432 
1433  unsigned LaneNo =
1434  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1435 
1436  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1437  N->getOperand(NumVecs + 3), N->getOperand(0)};
1438  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1439 
1440  // Transfer memoperands.
1441  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1442  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1443  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1444 
1445  ReplaceNode(N, St);
1446 }
1447 
1448 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1449  unsigned Opc) {
1450  SDLoc dl(N);
1451  EVT VT = N->getOperand(2)->getValueType(0);
1452  bool Narrow = VT.getSizeInBits() == 64;
1453 
1454  // Form a REG_SEQUENCE to force register allocation.
1455  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1456 
1457  if (Narrow)
1458  transform(Regs, Regs.begin(),
1459  WidenVector(*CurDAG));
1460 
1461  SDValue RegSeq = createQTuple(Regs);
1462 
1463  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1464  MVT::Other};
1465 
1466  unsigned LaneNo =
1467  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1468 
1469  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1470  N->getOperand(NumVecs + 2), // Base Register
1471  N->getOperand(NumVecs + 3), // Incremental
1472  N->getOperand(0)};
1473  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1474 
1475  // Transfer memoperands.
1476  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1477  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1478  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1479 
1480  ReplaceNode(N, St);
1481 }
1482 
1484  unsigned &Opc, SDValue &Opd0,
1485  unsigned &LSB, unsigned &MSB,
1486  unsigned NumberOfIgnoredLowBits,
1487  bool BiggerPattern) {
1488  assert(N->getOpcode() == ISD::AND &&
1489  "N must be a AND operation to call this function");
1490 
1491  EVT VT = N->getValueType(0);
1492 
1493  // Here we can test the type of VT and return false when the type does not
1494  // match, but since it is done prior to that call in the current context
1495  // we turned that into an assert to avoid redundant code.
1496  assert((VT == MVT::i32 || VT == MVT::i64) &&
1497  "Type checking must have been done before calling this function");
1498 
1499  // FIXME: simplify-demanded-bits in DAGCombine will probably have
1500  // changed the AND node to a 32-bit mask operation. We'll have to
1501  // undo that as part of the transform here if we want to catch all
1502  // the opportunities.
1503  // Currently the NumberOfIgnoredLowBits argument helps to recover
1504  // form these situations when matching bigger pattern (bitfield insert).
1505 
1506  // For unsigned extracts, check for a shift right and mask
1507  uint64_t AndImm = 0;
1508  if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
1509  return false;
1510 
1511  const SDNode *Op0 = N->getOperand(0).getNode();
1512 
1513  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1514  // simplified. Try to undo that
1515  AndImm |= (1 << NumberOfIgnoredLowBits) - 1;
1516 
1517  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1518  if (AndImm & (AndImm + 1))
1519  return false;
1520 
1521  bool ClampMSB = false;
1522  uint64_t SrlImm = 0;
1523  // Handle the SRL + ANY_EXTEND case.
1524  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1525  isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
1526  // Extend the incoming operand of the SRL to 64-bit.
1527  Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1528  // Make sure to clamp the MSB so that we preserve the semantics of the
1529  // original operations.
1530  ClampMSB = true;
1531  } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1533  SrlImm)) {
1534  // If the shift result was truncated, we can still combine them.
1535  Opd0 = Op0->getOperand(0).getOperand(0);
1536 
1537  // Use the type of SRL node.
1538  VT = Opd0->getValueType(0);
1539  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
1540  Opd0 = Op0->getOperand(0);
1541  } else if (BiggerPattern) {
1542  // Let's pretend a 0 shift right has been performed.
1543  // The resulting code will be at least as good as the original one
1544  // plus it may expose more opportunities for bitfield insert pattern.
1545  // FIXME: Currently we limit this to the bigger pattern, because
1546  // some optimizations expect AND and not UBFM.
1547  Opd0 = N->getOperand(0);
1548  } else
1549  return false;
1550 
1551  // Bail out on large immediates. This happens when no proper
1552  // combining/constant folding was performed.
1553  if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
1554  DEBUG((dbgs() << N
1555  << ": Found large shift immediate, this should not happen\n"));
1556  return false;
1557  }
1558 
1559  LSB = SrlImm;
1560  MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
1561  : countTrailingOnes<uint64_t>(AndImm)) -
1562  1;
1563  if (ClampMSB)
1564  // Since we're moving the extend before the right shift operation, we need
1565  // to clamp the MSB to make sure we don't shift in undefined bits instead of
1566  // the zeros which would get shifted in with the original right shift
1567  // operation.
1568  MSB = MSB > 31 ? 31 : MSB;
1569 
1570  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1571  return true;
1572 }
1573 
1574 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
1575  SDValue &Opd0, unsigned &Immr,
1576  unsigned &Imms) {
1578 
1579  EVT VT = N->getValueType(0);
1580  unsigned BitWidth = VT.getSizeInBits();
1581  assert((VT == MVT::i32 || VT == MVT::i64) &&
1582  "Type checking must have been done before calling this function");
1583 
1584  SDValue Op = N->getOperand(0);
1585  if (Op->getOpcode() == ISD::TRUNCATE) {
1586  Op = Op->getOperand(0);
1587  VT = Op->getValueType(0);
1588  BitWidth = VT.getSizeInBits();
1589  }
1590 
1591  uint64_t ShiftImm;
1592  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
1593  !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1594  return false;
1595 
1596  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1597  if (ShiftImm + Width > BitWidth)
1598  return false;
1599 
1600  Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
1601  Opd0 = Op.getOperand(0);
1602  Immr = ShiftImm;
1603  Imms = ShiftImm + Width - 1;
1604  return true;
1605 }
1606 
1607 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
1608  SDValue &Opd0, unsigned &LSB,
1609  unsigned &MSB) {
1610  // We are looking for the following pattern which basically extracts several
1611  // continuous bits from the source value and places it from the LSB of the
1612  // destination value, all other bits of the destination value or set to zero:
1613  //
1614  // Value2 = AND Value, MaskImm
1615  // SRL Value2, ShiftImm
1616  //
1617  // with MaskImm >> ShiftImm to search for the bit width.
1618  //
1619  // This gets selected into a single UBFM:
1620  //
1621  // UBFM Value, ShiftImm, BitWide + SrlImm -1
1622  //
1623 
1624  if (N->getOpcode() != ISD::SRL)
1625  return false;
1626 
1627  uint64_t AndMask = 0;
1628  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
1629  return false;
1630 
1631  Opd0 = N->getOperand(0).getOperand(0);
1632 
1633  uint64_t SrlImm = 0;
1634  if (!isIntImmediate(N->getOperand(1), SrlImm))
1635  return false;
1636 
1637  // Check whether we really have several bits extract here.
1638  unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
1639  if (BitWide && isMask_64(AndMask >> SrlImm)) {
1640  if (N->getValueType(0) == MVT::i32)
1641  Opc = AArch64::UBFMWri;
1642  else
1643  Opc = AArch64::UBFMXri;
1644 
1645  LSB = SrlImm;
1646  MSB = BitWide + SrlImm - 1;
1647  return true;
1648  }
1649 
1650  return false;
1651 }
1652 
1653 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1654  unsigned &Immr, unsigned &Imms,
1655  bool BiggerPattern) {
1656  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1657  "N must be a SHR/SRA operation to call this function");
1658 
1659  EVT VT = N->getValueType(0);
1660 
1661  // Here we can test the type of VT and return false when the type does not
1662  // match, but since it is done prior to that call in the current context
1663  // we turned that into an assert to avoid redundant code.
1664  assert((VT == MVT::i32 || VT == MVT::i64) &&
1665  "Type checking must have been done before calling this function");
1666 
1667  // Check for AND + SRL doing several bits extract.
1668  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
1669  return true;
1670 
1671  // We're looking for a shift of a shift.
1672  uint64_t ShlImm = 0;
1673  uint64_t TruncBits = 0;
1674  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
1675  Opd0 = N->getOperand(0).getOperand(0);
1676  } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1677  N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1678  // We are looking for a shift of truncate. Truncate from i64 to i32 could
1679  // be considered as setting high 32 bits as zero. Our strategy here is to
1680  // always generate 64bit UBFM. This consistency will help the CSE pass
1681  // later find more redundancy.
1682  Opd0 = N->getOperand(0).getOperand(0);
1683  TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1684  VT = Opd0.getValueType();
1685  assert(VT == MVT::i64 && "the promoted type should be i64");
1686  } else if (BiggerPattern) {
1687  // Let's pretend a 0 shift left has been performed.
1688  // FIXME: Currently we limit this to the bigger pattern case,
1689  // because some optimizations expect AND and not UBFM
1690  Opd0 = N->getOperand(0);
1691  } else
1692  return false;
1693 
1694  // Missing combines/constant folding may have left us with strange
1695  // constants.
1696  if (ShlImm >= VT.getSizeInBits()) {
1697  DEBUG((dbgs() << N
1698  << ": Found large shift immediate, this should not happen\n"));
1699  return false;
1700  }
1701 
1702  uint64_t SrlImm = 0;
1703  if (!isIntImmediate(N->getOperand(1), SrlImm))
1704  return false;
1705 
1706  assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
1707  "bad amount in shift node!");
1708  int immr = SrlImm - ShlImm;
1709  Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
1710  Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
1711  // SRA requires a signed extraction
1712  if (VT == MVT::i32)
1713  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1714  else
1715  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1716  return true;
1717 }
1718 
1719 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
1721 
1722  EVT VT = N->getValueType(0);
1723  EVT NarrowVT = N->getOperand(0)->getValueType(0);
1724  if (VT != MVT::i64 || NarrowVT != MVT::i32)
1725  return false;
1726 
1727  uint64_t ShiftImm;
1728  SDValue Op = N->getOperand(0);
1729  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1730  return false;
1731 
1732  SDLoc dl(N);
1733  // Extend the incoming operand of the shift to 64-bits.
1734  SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
1735  unsigned Immr = ShiftImm;
1736  unsigned Imms = NarrowVT.getSizeInBits() - 1;
1737  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1738  CurDAG->getTargetConstant(Imms, dl, VT)};
1739  CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
1740  return true;
1741 }
1742 
1743 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
1744  SDValue &Opd0, unsigned &Immr, unsigned &Imms,
1745  unsigned NumberOfIgnoredLowBits = 0,
1746  bool BiggerPattern = false) {
1747  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
1748  return false;
1749 
1750  switch (N->getOpcode()) {
1751  default:
1752  if (!N->isMachineOpcode())
1753  return false;
1754  break;
1755  case ISD::AND:
1756  return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
1757  NumberOfIgnoredLowBits, BiggerPattern);
1758  case ISD::SRL:
1759  case ISD::SRA:
1760  return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
1761 
1763  return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
1764  }
1765 
1766  unsigned NOpc = N->getMachineOpcode();
1767  switch (NOpc) {
1768  default:
1769  return false;
1770  case AArch64::SBFMWri:
1771  case AArch64::UBFMWri:
1772  case AArch64::SBFMXri:
1773  case AArch64::UBFMXri:
1774  Opc = NOpc;
1775  Opd0 = N->getOperand(0);
1776  Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
1777  Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
1778  return true;
1779  }
1780  // Unreachable
1781  return false;
1782 }
1783 
1784 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
1785  unsigned Opc, Immr, Imms;
1786  SDValue Opd0;
1787  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
1788  return false;
1789 
1790  EVT VT = N->getValueType(0);
1791  SDLoc dl(N);
1792 
1793  // If the bit extract operation is 64bit but the original type is 32bit, we
1794  // need to add one EXTRACT_SUBREG.
1795  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
1796  SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
1797  CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
1798 
1799  SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
1800  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1801  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
1802  MVT::i32, SDValue(BFM, 0), SubReg));
1803  return true;
1804  }
1805 
1806  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1807  CurDAG->getTargetConstant(Imms, dl, VT)};
1808  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1809  return true;
1810 }
1811 
1812 /// Does DstMask form a complementary pair with the mask provided by
1813 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
1814 /// this asks whether DstMask zeroes precisely those bits that will be set by
1815 /// the other half.
1816 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
1817  unsigned NumberOfIgnoredHighBits, EVT VT) {
1818  assert((VT == MVT::i32 || VT == MVT::i64) &&
1819  "i32 or i64 mask type expected!");
1820  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
1821 
1822  APInt SignificantDstMask = APInt(BitWidth, DstMask);
1823  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
1824 
1825  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
1826  (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
1827 }
1828 
1829 // Look for bits that will be useful for later uses.
1830 // A bit is consider useless as soon as it is dropped and never used
1831 // before it as been dropped.
1832 // E.g., looking for useful bit of x
1833 // 1. y = x & 0x7
1834 // 2. z = y >> 2
1835 // After #1, x useful bits are 0x7, then the useful bits of x, live through
1836 // y.
1837 // After #2, the useful bits of x are 0x4.
1838 // However, if x is used on an unpredicatable instruction, then all its bits
1839 // are useful.
1840 // E.g.
1841 // 1. y = x & 0x7
1842 // 2. z = y >> 2
1843 // 3. str x, [@x]
1844 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
1845 
1847  unsigned Depth) {
1848  uint64_t Imm =
1849  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1850  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
1851  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
1852  getUsefulBits(Op, UsefulBits, Depth + 1);
1853 }
1854 
1855 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
1856  uint64_t Imm, uint64_t MSB,
1857  unsigned Depth) {
1858  // inherit the bitwidth value
1859  APInt OpUsefulBits(UsefulBits);
1860  OpUsefulBits = 1;
1861 
1862  if (MSB >= Imm) {
1863  OpUsefulBits <<= MSB - Imm + 1;
1864  --OpUsefulBits;
1865  // The interesting part will be in the lower part of the result
1866  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1867  // The interesting part was starting at Imm in the argument
1868  OpUsefulBits <<= Imm;
1869  } else {
1870  OpUsefulBits <<= MSB + 1;
1871  --OpUsefulBits;
1872  // The interesting part will be shifted in the result
1873  OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
1874  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1875  // The interesting part was at zero in the argument
1876  OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
1877  }
1878 
1879  UsefulBits &= OpUsefulBits;
1880 }
1881 
1882 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
1883  unsigned Depth) {
1884  uint64_t Imm =
1885  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1886  uint64_t MSB =
1887  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1888 
1889  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1890 }
1891 
1893  unsigned Depth) {
1894  uint64_t ShiftTypeAndValue =
1895  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1896  APInt Mask(UsefulBits);
1897  Mask.clearAllBits();
1898  Mask.flipAllBits();
1899 
1900  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
1901  // Shift Left
1902  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1903  Mask <<= ShiftAmt;
1904  getUsefulBits(Op, Mask, Depth + 1);
1905  Mask.lshrInPlace(ShiftAmt);
1906  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
1907  // Shift Right
1908  // We do not handle AArch64_AM::ASR, because the sign will change the
1909  // number of useful bits
1910  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1911  Mask.lshrInPlace(ShiftAmt);
1912  getUsefulBits(Op, Mask, Depth + 1);
1913  Mask <<= ShiftAmt;
1914  } else
1915  return;
1916 
1917  UsefulBits &= Mask;
1918 }
1919 
1920 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
1921  unsigned Depth) {
1922  uint64_t Imm =
1923  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1924  uint64_t MSB =
1925  cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
1926 
1927  APInt OpUsefulBits(UsefulBits);
1928  OpUsefulBits = 1;
1929 
1930  APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
1931  ResultUsefulBits.flipAllBits();
1932  APInt Mask(UsefulBits.getBitWidth(), 0);
1933 
1934  getUsefulBits(Op, ResultUsefulBits, Depth + 1);
1935 
1936  if (MSB >= Imm) {
1937  // The instruction is a BFXIL.
1938  uint64_t Width = MSB - Imm + 1;
1939  uint64_t LSB = Imm;
1940 
1941  OpUsefulBits <<= Width;
1942  --OpUsefulBits;
1943 
1944  if (Op.getOperand(1) == Orig) {
1945  // Copy the low bits from the result to bits starting from LSB.
1946  Mask = ResultUsefulBits & OpUsefulBits;
1947  Mask <<= LSB;
1948  }
1949 
1950  if (Op.getOperand(0) == Orig)
1951  // Bits starting from LSB in the input contribute to the result.
1952  Mask |= (ResultUsefulBits & ~OpUsefulBits);
1953  } else {
1954  // The instruction is a BFI.
1955  uint64_t Width = MSB + 1;
1956  uint64_t LSB = UsefulBits.getBitWidth() - Imm;
1957 
1958  OpUsefulBits <<= Width;
1959  --OpUsefulBits;
1960  OpUsefulBits <<= LSB;
1961 
1962  if (Op.getOperand(1) == Orig) {
1963  // Copy the bits from the result to the zero bits.
1964  Mask = ResultUsefulBits & OpUsefulBits;
1965  Mask.lshrInPlace(LSB);
1966  }
1967 
1968  if (Op.getOperand(0) == Orig)
1969  Mask |= (ResultUsefulBits & ~OpUsefulBits);
1970  }
1971 
1972  UsefulBits &= Mask;
1973 }
1974 
1975 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
1976  SDValue Orig, unsigned Depth) {
1977 
1978  // Users of this node should have already been instruction selected
1979  // FIXME: Can we turn that into an assert?
1980  if (!UserNode->isMachineOpcode())
1981  return;
1982 
1983  switch (UserNode->getMachineOpcode()) {
1984  default:
1985  return;
1986  case AArch64::ANDSWri:
1987  case AArch64::ANDSXri:
1988  case AArch64::ANDWri:
1989  case AArch64::ANDXri:
1990  // We increment Depth only when we call the getUsefulBits
1991  return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
1992  Depth);
1993  case AArch64::UBFMWri:
1994  case AArch64::UBFMXri:
1995  return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
1996 
1997  case AArch64::ORRWrs:
1998  case AArch64::ORRXrs:
1999  if (UserNode->getOperand(1) != Orig)
2000  return;
2001  return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2002  Depth);
2003  case AArch64::BFMWri:
2004  case AArch64::BFMXri:
2005  return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2006 
2007  case AArch64::STRBBui:
2008  case AArch64::STURBBi:
2009  if (UserNode->getOperand(0) != Orig)
2010  return;
2011  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2012  return;
2013 
2014  case AArch64::STRHHui:
2015  case AArch64::STURHHi:
2016  if (UserNode->getOperand(0) != Orig)
2017  return;
2018  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2019  return;
2020  }
2021 }
2022 
2023 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2024  if (Depth >= 6)
2025  return;
2026  // Initialize UsefulBits
2027  if (!Depth) {
2028  unsigned Bitwidth = Op.getScalarValueSizeInBits();
2029  // At the beginning, assume every produced bits is useful
2030  UsefulBits = APInt(Bitwidth, 0);
2031  UsefulBits.flipAllBits();
2032  }
2033  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2034 
2035  for (SDNode *Node : Op.getNode()->uses()) {
2036  // A use cannot produce useful bits
2037  APInt UsefulBitsForUse = APInt(UsefulBits);
2038  getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2039  UsersUsefulBits |= UsefulBitsForUse;
2040  }
2041  // UsefulBits contains the produced bits that are meaningful for the
2042  // current definition, thus a user cannot make a bit meaningful at
2043  // this point
2044  UsefulBits &= UsersUsefulBits;
2045 }
2046 
2047 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
2048 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2049 /// 0, return Op unchanged.
2050 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2051  if (ShlAmount == 0)
2052  return Op;
2053 
2054  EVT VT = Op.getValueType();
2055  SDLoc dl(Op);
2056  unsigned BitWidth = VT.getSizeInBits();
2057  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2058 
2059  SDNode *ShiftNode;
2060  if (ShlAmount > 0) {
2061  // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2062  ShiftNode = CurDAG->getMachineNode(
2063  UBFMOpc, dl, VT, Op,
2064  CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2065  CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2066  } else {
2067  // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2068  assert(ShlAmount < 0 && "expected right shift");
2069  int ShrAmount = -ShlAmount;
2070  ShiftNode = CurDAG->getMachineNode(
2071  UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2072  CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2073  }
2074 
2075  return SDValue(ShiftNode, 0);
2076 }
2077 
2078 /// Does this tree qualify as an attempt to move a bitfield into position,
2079 /// essentially "(and (shl VAL, N), Mask)".
2081  bool BiggerPattern,
2082  SDValue &Src, int &ShiftAmount,
2083  int &MaskWidth) {
2084  EVT VT = Op.getValueType();
2085  unsigned BitWidth = VT.getSizeInBits();
2086  (void)BitWidth;
2087  assert(BitWidth == 32 || BitWidth == 64);
2088 
2089  KnownBits Known;
2090  CurDAG->computeKnownBits(Op, Known);
2091 
2092  // Non-zero in the sense that they're not provably zero, which is the key
2093  // point if we want to use this value
2094  uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2095 
2096  // Discard a constant AND mask if present. It's safe because the node will
2097  // already have been factored into the computeKnownBits calculation above.
2098  uint64_t AndImm;
2099  if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
2100  assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
2101  Op = Op.getOperand(0);
2102  }
2103 
2104  // Don't match if the SHL has more than one use, since then we'll end up
2105  // generating SHL+UBFIZ instead of just keeping SHL+AND.
2106  if (!BiggerPattern && !Op.hasOneUse())
2107  return false;
2108 
2109  uint64_t ShlImm;
2110  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2111  return false;
2112  Op = Op.getOperand(0);
2113 
2114  if (!isShiftedMask_64(NonZeroBits))
2115  return false;
2116 
2117  ShiftAmount = countTrailingZeros(NonZeroBits);
2118  MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
2119 
2120  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2121  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2122  // amount. BiggerPattern is true when this pattern is being matched for BFI,
2123  // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2124  // which case it is not profitable to insert an extra shift.
2125  if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
2126  return false;
2127  Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
2128 
2129  return true;
2130 }
2131 
2132 static bool isShiftedMask(uint64_t Mask, EVT VT) {
2133  assert(VT == MVT::i32 || VT == MVT::i64);
2134  if (VT == MVT::i32)
2135  return isShiftedMask_32(Mask);
2136  return isShiftedMask_64(Mask);
2137 }
2138 
2139 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
2140 // inserted only sets known zero bits.
2142  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2143 
2144  EVT VT = N->getValueType(0);
2145  if (VT != MVT::i32 && VT != MVT::i64)
2146  return false;
2147 
2148  unsigned BitWidth = VT.getSizeInBits();
2149 
2150  uint64_t OrImm;
2151  if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
2152  return false;
2153 
2154  // Skip this transformation if the ORR immediate can be encoded in the ORR.
2155  // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2156  // performance neutral.
2157  if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
2158  return false;
2159 
2160  uint64_t MaskImm;
2161  SDValue And = N->getOperand(0);
2162  // Must be a single use AND with an immediate operand.
2163  if (!And.hasOneUse() ||
2164  !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
2165  return false;
2166 
2167  // Compute the Known Zero for the AND as this allows us to catch more general
2168  // cases than just looking for AND with imm.
2169  KnownBits Known;
2170  CurDAG->computeKnownBits(And, Known);
2171 
2172  // Non-zero in the sense that they're not provably zero, which is the key
2173  // point if we want to use this value.
2174  uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
2175 
2176  // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2177  if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
2178  return false;
2179 
2180  // The bits being inserted must only set those bits that are known to be zero.
2181  if ((OrImm & NotKnownZero) != 0) {
2182  // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2183  // currently handle this case.
2184  return false;
2185  }
2186 
2187  // BFI/BFXIL dst, src, #lsb, #width.
2188  int LSB = countTrailingOnes(NotKnownZero);
2189  int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2190 
2191  // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2192  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2193  unsigned ImmS = Width - 1;
2194 
2195  // If we're creating a BFI instruction avoid cases where we need more
2196  // instructions to materialize the BFI constant as compared to the original
2197  // ORR. A BFXIL will use the same constant as the original ORR, so the code
2198  // should be no worse in this case.
2199  bool IsBFI = LSB != 0;
2200  uint64_t BFIImm = OrImm >> LSB;
2201  if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
2202  // We have a BFI instruction and we know the constant can't be materialized
2203  // with a ORR-immediate with the zero register.
2204  unsigned OrChunks = 0, BFIChunks = 0;
2205  for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
2206  if (((OrImm >> Shift) & 0xFFFF) != 0)
2207  ++OrChunks;
2208  if (((BFIImm >> Shift) & 0xFFFF) != 0)
2209  ++BFIChunks;
2210  }
2211  if (BFIChunks > OrChunks)
2212  return false;
2213  }
2214 
2215  // Materialize the constant to be inserted.
2216  SDLoc DL(N);
2217  unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2218  SDNode *MOVI = CurDAG->getMachineNode(
2219  MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2220 
2221  // Create the BFI/BFXIL instruction.
2222  SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2223  CurDAG->getTargetConstant(ImmR, DL, VT),
2224  CurDAG->getTargetConstant(ImmS, DL, VT)};
2225  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2226  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2227  return true;
2228 }
2229 
2230 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
2231  SelectionDAG *CurDAG) {
2232  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2233 
2234  EVT VT = N->getValueType(0);
2235  if (VT != MVT::i32 && VT != MVT::i64)
2236  return false;
2237 
2238  unsigned BitWidth = VT.getSizeInBits();
2239 
2240  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
2241  // have the expected shape. Try to undo that.
2242 
2243  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
2244  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
2245 
2246  // Given a OR operation, check if we have the following pattern
2247  // ubfm c, b, imm, imm2 (or something that does the same jobs, see
2248  // isBitfieldExtractOp)
2249  // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
2250  // countTrailingZeros(mask2) == imm2 - imm + 1
2251  // f = d | c
2252  // if yes, replace the OR instruction with:
2253  // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
2254 
2255  // OR is commutative, check all combinations of operand order and values of
2256  // BiggerPattern, i.e.
2257  // Opd0, Opd1, BiggerPattern=false
2258  // Opd1, Opd0, BiggerPattern=false
2259  // Opd0, Opd1, BiggerPattern=true
2260  // Opd1, Opd0, BiggerPattern=true
2261  // Several of these combinations may match, so check with BiggerPattern=false
2262  // first since that will produce better results by matching more instructions
2263  // and/or inserting fewer extra instructions.
2264  for (int I = 0; I < 4; ++I) {
2265 
2266  SDValue Dst, Src;
2267  unsigned ImmR, ImmS;
2268  bool BiggerPattern = I / 2;
2269  SDValue OrOpd0Val = N->getOperand(I % 2);
2270  SDNode *OrOpd0 = OrOpd0Val.getNode();
2271  SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
2272  SDNode *OrOpd1 = OrOpd1Val.getNode();
2273 
2274  unsigned BFXOpc;
2275  int DstLSB, Width;
2276  if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
2277  NumberOfIgnoredLowBits, BiggerPattern)) {
2278  // Check that the returned opcode is compatible with the pattern,
2279  // i.e., same type and zero extended (U and not S)
2280  if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
2281  (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
2282  continue;
2283 
2284  // Compute the width of the bitfield insertion
2285  DstLSB = 0;
2286  Width = ImmS - ImmR + 1;
2287  // FIXME: This constraint is to catch bitfield insertion we may
2288  // want to widen the pattern if we want to grab general bitfied
2289  // move case
2290  if (Width <= 0)
2291  continue;
2292 
2293  // If the mask on the insertee is correct, we have a BFXIL operation. We
2294  // can share the ImmR and ImmS values from the already-computed UBFM.
2295  } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
2296  BiggerPattern,
2297  Src, DstLSB, Width)) {
2298  ImmR = (BitWidth - DstLSB) % BitWidth;
2299  ImmS = Width - 1;
2300  } else
2301  continue;
2302 
2303  // Check the second part of the pattern
2304  EVT VT = OrOpd1Val.getValueType();
2305  assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
2306 
2307  // Compute the Known Zero for the candidate of the first operand.
2308  // This allows to catch more general case than just looking for
2309  // AND with imm. Indeed, simplify-demanded-bits may have removed
2310  // the AND instruction because it proves it was useless.
2311  KnownBits Known;
2312  CurDAG->computeKnownBits(OrOpd1Val, Known);
2313 
2314  // Check if there is enough room for the second operand to appear
2315  // in the first one
2316  APInt BitsToBeInserted =
2317  APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
2318 
2319  if ((BitsToBeInserted & ~Known.Zero) != 0)
2320  continue;
2321 
2322  // Set the first operand
2323  uint64_t Imm;
2324  if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
2325  isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
2326  // In that case, we can eliminate the AND
2327  Dst = OrOpd1->getOperand(0);
2328  else
2329  // Maybe the AND has been removed by simplify-demanded-bits
2330  // or is useful because it discards more bits
2331  Dst = OrOpd1Val;
2332 
2333  // both parts match
2334  SDLoc DL(N);
2335  SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
2336  CurDAG->getTargetConstant(ImmS, DL, VT)};
2337  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2338  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2339  return true;
2340  }
2341 
2342  // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
2343  // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
2344  // mask (e.g., 0x000ffff0).
2345  uint64_t Mask0Imm, Mask1Imm;
2346  SDValue And0 = N->getOperand(0);
2347  SDValue And1 = N->getOperand(1);
2348  if (And0.hasOneUse() && And1.hasOneUse() &&
2349  isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
2350  isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
2351  APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
2352  (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
2353 
2354  // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
2355  // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
2356  // bits to be inserted.
2357  if (isShiftedMask(Mask0Imm, VT)) {
2358  std::swap(And0, And1);
2359  std::swap(Mask0Imm, Mask1Imm);
2360  }
2361 
2362  SDValue Src = And1->getOperand(0);
2363  SDValue Dst = And0->getOperand(0);
2364  unsigned LSB = countTrailingZeros(Mask1Imm);
2365  int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
2366 
2367  // The BFXIL inserts the low-order bits from a source register, so right
2368  // shift the needed bits into place.
2369  SDLoc DL(N);
2370  unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2371  SDNode *LSR = CurDAG->getMachineNode(
2372  ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
2373  CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
2374 
2375  // BFXIL is an alias of BFM, so translate to BFM operands.
2376  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2377  unsigned ImmS = Width - 1;
2378 
2379  // Create the BFXIL instruction.
2380  SDValue Ops[] = {Dst, SDValue(LSR, 0),
2381  CurDAG->getTargetConstant(ImmR, DL, VT),
2382  CurDAG->getTargetConstant(ImmS, DL, VT)};
2383  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2384  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2385  return true;
2386  }
2387 
2388  return false;
2389 }
2390 
2391 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
2392  if (N->getOpcode() != ISD::OR)
2393  return false;
2394 
2395  APInt NUsefulBits;
2396  getUsefulBits(SDValue(N, 0), NUsefulBits);
2397 
2398  // If all bits are not useful, just return UNDEF.
2399  if (!NUsefulBits) {
2400  CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
2401  return true;
2402  }
2403 
2404  if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
2405  return true;
2406 
2407  return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
2408 }
2409 
2410 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
2411 /// equivalent of a left shift by a constant amount followed by an and masking
2412 /// out a contiguous set of bits.
2413 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
2414  if (N->getOpcode() != ISD::AND)
2415  return false;
2416 
2417  EVT VT = N->getValueType(0);
2418  if (VT != MVT::i32 && VT != MVT::i64)
2419  return false;
2420 
2421  SDValue Op0;
2422  int DstLSB, Width;
2423  if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
2424  Op0, DstLSB, Width))
2425  return false;
2426 
2427  // ImmR is the rotate right amount.
2428  unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
2429  // ImmS is the most significant bit of the source to be moved.
2430  unsigned ImmS = Width - 1;
2431 
2432  SDLoc DL(N);
2433  SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
2434  CurDAG->getTargetConstant(ImmS, DL, VT)};
2435  unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2436  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2437  return true;
2438 }
2439 
2440 bool
2441 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2442  unsigned RegWidth) {
2443  APFloat FVal(0.0);
2444  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2445  FVal = CN->getValueAPF();
2446  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2447  // Some otherwise illegal constants are allowed in this case.
2448  if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2449  !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2450  return false;
2451 
2452  ConstantPoolSDNode *CN =
2453  dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2454  FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2455  } else
2456  return false;
2457 
2458  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2459  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2460  // x-register.
2461  //
2462  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2463  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2464  // integers.
2465  bool IsExact;
2466 
2467  // fbits is between 1 and 64 in the worst-case, which means the fmul
2468  // could have 2^64 as an actual operand. Need 65 bits of precision.
2469  APSInt IntVal(65, true);
2470  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2471 
2472  // N.b. isPowerOf2 also checks for > 0.
2473  if (!IsExact || !IntVal.isPowerOf2()) return false;
2474  unsigned FBits = IntVal.logBase2();
2475 
2476  // Checks above should have guaranteed that we haven't lost information in
2477  // finding FBits, but it must still be in range.
2478  if (FBits == 0 || FBits > RegWidth) return false;
2479 
2480  FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
2481  return true;
2482 }
2483 
2484 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
2485 // of the string and obtains the integer values from them and combines these
2486 // into a single value to be used in the MRS/MSR instruction.
2489  RegString.split(Fields, ':');
2490 
2491  if (Fields.size() == 1)
2492  return -1;
2493 
2494  assert(Fields.size() == 5
2495  && "Invalid number of fields in read register string");
2496 
2497  SmallVector<int, 5> Ops;
2498  bool AllIntFields = true;
2499 
2500  for (StringRef Field : Fields) {
2501  unsigned IntField;
2502  AllIntFields &= !Field.getAsInteger(10, IntField);
2503  Ops.push_back(IntField);
2504  }
2505 
2506  assert(AllIntFields &&
2507  "Unexpected non-integer value in special register string.");
2508 
2509  // Need to combine the integer fields of the string into a single value
2510  // based on the bit encoding of MRS/MSR instruction.
2511  return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2512  (Ops[3] << 3) | (Ops[4]);
2513 }
2514 
2515 // Lower the read_register intrinsic to an MRS instruction node if the special
2516 // register string argument is either of the form detailed in the ALCE (the
2517 // form described in getIntOperandsFromRegsterString) or is a named register
2518 // known by the MRS SysReg mapper.
2519 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
2520  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2521  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2522  SDLoc DL(N);
2523 
2524  int Reg = getIntOperandFromRegisterString(RegString->getString());
2525  if (Reg != -1) {
2526  ReplaceNode(N, CurDAG->getMachineNode(
2527  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2528  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2529  N->getOperand(0)));
2530  return true;
2531  }
2532 
2533  // Use the sysreg mapper to map the remaining possible strings to the
2534  // value for the register to be used for the instruction operand.
2535  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2536  if (TheReg && TheReg->Readable &&
2537  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2538  Reg = TheReg->Encoding;
2539  else
2540  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2541 
2542  if (Reg != -1) {
2543  ReplaceNode(N, CurDAG->getMachineNode(
2544  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2545  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2546  N->getOperand(0)));
2547  return true;
2548  }
2549 
2550  return false;
2551 }
2552 
2553 // Lower the write_register intrinsic to an MSR instruction node if the special
2554 // register string argument is either of the form detailed in the ALCE (the
2555 // form described in getIntOperandsFromRegsterString) or is a named register
2556 // known by the MSR SysReg mapper.
2557 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
2558  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2559  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2560  SDLoc DL(N);
2561 
2562  int Reg = getIntOperandFromRegisterString(RegString->getString());
2563  if (Reg != -1) {
2564  ReplaceNode(
2565  N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2566  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2567  N->getOperand(2), N->getOperand(0)));
2568  return true;
2569  }
2570 
2571  // Check if the register was one of those allowed as the pstatefield value in
2572  // the MSR (immediate) instruction. To accept the values allowed in the
2573  // pstatefield for the MSR (immediate) instruction, we also require that an
2574  // immediate value has been provided as an argument, we know that this is
2575  // the case as it has been ensured by semantic checking.
2576  auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
2577  if (PMapper) {
2578  assert (isa<ConstantSDNode>(N->getOperand(2))
2579  && "Expected a constant integer expression.");
2580  unsigned Reg = PMapper->Encoding;
2581  uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
2582  unsigned State;
2583  if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO) {
2584  assert(Immed < 2 && "Bad imm");
2585  State = AArch64::MSRpstateImm1;
2586  } else {
2587  assert(Immed < 16 && "Bad imm");
2588  State = AArch64::MSRpstateImm4;
2589  }
2590  ReplaceNode(N, CurDAG->getMachineNode(
2591  State, DL, MVT::Other,
2592  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2593  CurDAG->getTargetConstant(Immed, DL, MVT::i16),
2594  N->getOperand(0)));
2595  return true;
2596  }
2597 
2598  // Use the sysreg mapper to attempt to map the remaining possible strings
2599  // to the value for the register to be used for the MSR (register)
2600  // instruction operand.
2601  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2602  if (TheReg && TheReg->Writeable &&
2603  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2604  Reg = TheReg->Encoding;
2605  else
2606  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2607  if (Reg != -1) {
2608  ReplaceNode(N, CurDAG->getMachineNode(
2609  AArch64::MSR, DL, MVT::Other,
2610  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2611  N->getOperand(2), N->getOperand(0)));
2612  return true;
2613  }
2614 
2615  return false;
2616 }
2617 
2618 /// We've got special pseudo-instructions for these
2619 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2620  unsigned Opcode;
2621  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2622 
2623  // Leave IR for LSE if subtarget supports it.
2624  if (Subtarget->hasLSE()) return false;
2625 
2626  if (MemTy == MVT::i8)
2627  Opcode = AArch64::CMP_SWAP_8;
2628  else if (MemTy == MVT::i16)
2629  Opcode = AArch64::CMP_SWAP_16;
2630  else if (MemTy == MVT::i32)
2631  Opcode = AArch64::CMP_SWAP_32;
2632  else if (MemTy == MVT::i64)
2633  Opcode = AArch64::CMP_SWAP_64;
2634  else
2635  llvm_unreachable("Unknown AtomicCmpSwap type");
2636 
2637  MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
2638  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2639  N->getOperand(0)};
2640  SDNode *CmpSwap = CurDAG->getMachineNode(
2641  Opcode, SDLoc(N),
2642  CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
2643 
2644  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2645  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2646  cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2647 
2648  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2649  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2650  CurDAG->RemoveDeadNode(N);
2651 
2652  return true;
2653 }
2654 
2655 void AArch64DAGToDAGISel::Select(SDNode *Node) {
2656  // If we have a custom node, we already have selected!
2657  if (Node->isMachineOpcode()) {
2658  DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
2659  Node->setNodeId(-1);
2660  return;
2661  }
2662 
2663  // Few custom selection stuff.
2664  EVT VT = Node->getValueType(0);
2665 
2666  switch (Node->getOpcode()) {
2667  default:
2668  break;
2669 
2670  case ISD::ATOMIC_CMP_SWAP:
2671  if (SelectCMP_SWAP(Node))
2672  return;
2673  break;
2674 
2675  case ISD::READ_REGISTER:
2676  if (tryReadRegister(Node))
2677  return;
2678  break;
2679 
2680  case ISD::WRITE_REGISTER:
2681  if (tryWriteRegister(Node))
2682  return;
2683  break;
2684 
2685  case ISD::ADD:
2686  if (tryMLAV64LaneV128(Node))
2687  return;
2688  break;
2689 
2690  case ISD::LOAD: {
2691  // Try to select as an indexed load. Fall through to normal processing
2692  // if we can't.
2693  if (tryIndexedLoad(Node))
2694  return;
2695  break;
2696  }
2697 
2698  case ISD::SRL:
2699  case ISD::AND:
2700  case ISD::SRA:
2702  if (tryBitfieldExtractOp(Node))
2703  return;
2704  if (tryBitfieldInsertInZeroOp(Node))
2705  return;
2706  break;
2707 
2708  case ISD::SIGN_EXTEND:
2709  if (tryBitfieldExtractOpFromSExt(Node))
2710  return;
2711  break;
2712 
2713  case ISD::OR:
2714  if (tryBitfieldInsertOp(Node))
2715  return;
2716  break;
2717 
2718  case ISD::EXTRACT_VECTOR_ELT: {
2719  // Extracting lane zero is a special case where we can just use a plain
2720  // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
2721  // the rest of the compiler, especially the register allocator and copyi
2722  // propagation, to reason about, so is preferred when it's possible to
2723  // use it.
2724  ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
2725  // Bail and use the default Select() for non-zero lanes.
2726  if (LaneNode->getZExtValue() != 0)
2727  break;
2728  // If the element type is not the same as the result type, likewise
2729  // bail and use the default Select(), as there's more to do than just
2730  // a cross-class COPY. This catches extracts of i8 and i16 elements
2731  // since they will need an explicit zext.
2732  if (VT != Node->getOperand(0).getValueType().getVectorElementType())
2733  break;
2734  unsigned SubReg;
2735  switch (Node->getOperand(0)
2736  .getValueType()
2738  .getSizeInBits()) {
2739  default:
2740  llvm_unreachable("Unexpected vector element type!");
2741  case 64:
2742  SubReg = AArch64::dsub;
2743  break;
2744  case 32:
2745  SubReg = AArch64::ssub;
2746  break;
2747  case 16:
2748  SubReg = AArch64::hsub;
2749  break;
2750  case 8:
2751  llvm_unreachable("unexpected zext-requiring extract element!");
2752  }
2753  SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
2754  Node->getOperand(0));
2755  DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
2756  DEBUG(Extract->dumpr(CurDAG));
2757  DEBUG(dbgs() << "\n");
2758  ReplaceNode(Node, Extract.getNode());
2759  return;
2760  }
2761  case ISD::Constant: {
2762  // Materialize zero constants as copies from WZR/XZR. This allows
2763  // the coalescer to propagate these into other instructions.
2764  ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
2765  if (ConstNode->isNullValue()) {
2766  if (VT == MVT::i32) {
2767  SDValue New = CurDAG->getCopyFromReg(
2768  CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
2769  ReplaceNode(Node, New.getNode());
2770  return;
2771  } else if (VT == MVT::i64) {
2772  SDValue New = CurDAG->getCopyFromReg(
2773  CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
2774  ReplaceNode(Node, New.getNode());
2775  return;
2776  }
2777  }
2778  break;
2779  }
2780  case ISD::CopyToReg: {
2781  // Special case for copy of zero to avoid a double copy.
2782  SDNode *CopyVal = Node->getOperand(2).getNode();
2783  ConstantSDNode *CopyValConst = dyn_cast<ConstantSDNode>(CopyVal);
2784  if (!CopyValConst || !CopyValConst->isNullValue())
2785  break;
2786  const SDValue &Dest = Node->getOperand(1);
2788  cast<RegisterSDNode>(Dest)->getReg()))
2789  break;
2790  unsigned ZeroReg;
2791  EVT ZeroVT = CopyValConst->getValueType(0);
2792  if (ZeroVT == MVT::i32)
2793  ZeroReg = AArch64::WZR;
2794  else if (ZeroVT == MVT::i64)
2795  ZeroReg = AArch64::XZR;
2796  else
2797  break;
2798  unsigned NumOperands = Node->getNumOperands();
2799  SDValue ZeroRegVal = CurDAG->getRegister(ZeroReg, ZeroVT);
2800  // Replace the source operand (#0) with ZeroRegVal.
2801  SDValue Ops[] = {Node->getOperand(0), Node->getOperand(1), ZeroRegVal,
2802  (NumOperands == 4) ? Node->getOperand(3) : SDValue()};
2803  SDValue New =
2804  CurDAG->getNode(ISD::CopyToReg, SDLoc(Node), Node->getVTList(),
2805  makeArrayRef(Ops, NumOperands));
2806  ReplaceNode(Node, New.getNode());
2807  return;
2808  }
2809  case ISD::FrameIndex: {
2810  // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
2811  int FI = cast<FrameIndexSDNode>(Node)->getIndex();
2812  unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
2813  const TargetLowering *TLI = getTargetLowering();
2814  SDValue TFI = CurDAG->getTargetFrameIndex(
2815  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2816  SDLoc DL(Node);
2817  SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
2818  CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
2819  CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
2820  return;
2821  }
2822  case ISD::INTRINSIC_W_CHAIN: {
2823  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2824  switch (IntNo) {
2825  default:
2826  break;
2827  case Intrinsic::aarch64_ldaxp:
2828  case Intrinsic::aarch64_ldxp: {
2829  unsigned Op =
2830  IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
2831  SDValue MemAddr = Node->getOperand(2);
2832  SDLoc DL(Node);
2833  SDValue Chain = Node->getOperand(0);
2834 
2835  SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
2836  MVT::Other, MemAddr, Chain);
2837 
2838  // Transfer memoperands.
2839  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2840  MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2841  cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
2842  ReplaceNode(Node, Ld);
2843  return;
2844  }
2845  case Intrinsic::aarch64_stlxp:
2846  case Intrinsic::aarch64_stxp: {
2847  unsigned Op =
2848  IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
2849  SDLoc DL(Node);
2850  SDValue Chain = Node->getOperand(0);
2851  SDValue ValLo = Node->getOperand(2);
2852  SDValue ValHi = Node->getOperand(3);
2853  SDValue MemAddr = Node->getOperand(4);
2854 
2855  // Place arguments in the right order.
2856  SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
2857 
2858  SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
2859  // Transfer memoperands.
2860  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2861  MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2862  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
2863 
2864  ReplaceNode(Node, St);
2865  return;
2866  }
2867  case Intrinsic::aarch64_neon_ld1x2:
2868  if (VT == MVT::v8i8) {
2869  SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
2870  return;
2871  } else if (VT == MVT::v16i8) {
2872  SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
2873  return;
2874  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2875  SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
2876  return;
2877  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2878  SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
2879  return;
2880  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2881  SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
2882  return;
2883  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2884  SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
2885  return;
2886  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2887  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2888  return;
2889  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2890  SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
2891  return;
2892  }
2893  break;
2894  case Intrinsic::aarch64_neon_ld1x3:
2895  if (VT == MVT::v8i8) {
2896  SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
2897  return;
2898  } else if (VT == MVT::v16i8) {
2899  SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
2900  return;
2901  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2902  SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
2903  return;
2904  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2905  SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
2906  return;
2907  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2908  SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
2909  return;
2910  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2911  SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
2912  return;
2913  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2914  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
2915  return;
2916  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2917  SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
2918  return;
2919  }
2920  break;
2921  case Intrinsic::aarch64_neon_ld1x4:
2922  if (VT == MVT::v8i8) {
2923  SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
2924  return;
2925  } else if (VT == MVT::v16i8) {
2926  SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
2927  return;
2928  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2929  SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
2930  return;
2931  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2932  SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
2933  return;
2934  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2935  SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
2936  return;
2937  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2938  SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
2939  return;
2940  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2941  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
2942  return;
2943  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2944  SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
2945  return;
2946  }
2947  break;
2948  case Intrinsic::aarch64_neon_ld2:
2949  if (VT == MVT::v8i8) {
2950  SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
2951  return;
2952  } else if (VT == MVT::v16i8) {
2953  SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
2954  return;
2955  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2956  SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
2957  return;
2958  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2959  SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
2960  return;
2961  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2962  SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
2963  return;
2964  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2965  SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
2966  return;
2967  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2968  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2969  return;
2970  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2971  SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
2972  return;
2973  }
2974  break;
2975  case Intrinsic::aarch64_neon_ld3:
2976  if (VT == MVT::v8i8) {
2977  SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
2978  return;
2979  } else if (VT == MVT::v16i8) {
2980  SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
2981  return;
2982  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2983  SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
2984  return;
2985  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2986  SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
2987  return;
2988  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2989  SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
2990  return;
2991  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2992  SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
2993  return;
2994  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2995  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
2996  return;
2997  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2998  SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
2999  return;
3000  }
3001  break;
3002  case Intrinsic::aarch64_neon_ld4:
3003  if (VT == MVT::v8i8) {
3004  SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
3005  return;
3006  } else if (VT == MVT::v16i8) {
3007  SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
3008  return;
3009  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3010  SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
3011  return;
3012  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3013  SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
3014  return;
3015  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3016  SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
3017  return;
3018  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3019  SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
3020  return;
3021  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3022  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3023  return;
3024  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3025  SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
3026  return;
3027  }
3028  break;
3029  case Intrinsic::aarch64_neon_ld2r:
3030  if (VT == MVT::v8i8) {
3031  SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
3032  return;
3033  } else if (VT == MVT::v16i8) {
3034  SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
3035  return;
3036  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3037  SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
3038  return;
3039  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3040  SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
3041  return;
3042  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3043  SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
3044  return;
3045  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3046  SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
3047  return;
3048  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3049  SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
3050  return;
3051  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3052  SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
3053  return;
3054  }
3055  break;
3056  case Intrinsic::aarch64_neon_ld3r:
3057  if (VT == MVT::v8i8) {
3058  SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
3059  return;
3060  } else if (VT == MVT::v16i8) {
3061  SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
3062  return;
3063  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3064  SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
3065  return;
3066  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3067  SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
3068  return;
3069  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3070  SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
3071  return;
3072  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3073  SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
3074  return;
3075  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3076  SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
3077  return;
3078  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3079  SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
3080  return;
3081  }
3082  break;
3083  case Intrinsic::aarch64_neon_ld4r:
3084  if (VT == MVT::v8i8) {
3085  SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
3086  return;
3087  } else if (VT == MVT::v16i8) {
3088  SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
3089  return;
3090  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3091  SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
3092  return;
3093  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3094  SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
3095  return;
3096  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3097  SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
3098  return;
3099  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3100  SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
3101  return;
3102  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3103  SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
3104  return;
3105  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3106  SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
3107  return;
3108  }
3109  break;
3110  case Intrinsic::aarch64_neon_ld2lane:
3111  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3112  SelectLoadLane(Node, 2, AArch64::LD2i8);
3113  return;
3114  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3115  VT == MVT::v8f16) {
3116  SelectLoadLane(Node, 2, AArch64::LD2i16);
3117  return;
3118  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3119  VT == MVT::v2f32) {
3120  SelectLoadLane(Node, 2, AArch64::LD2i32);
3121  return;
3122  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3123  VT == MVT::v1f64) {
3124  SelectLoadLane(Node, 2, AArch64::LD2i64);
3125  return;
3126  }
3127  break;
3128  case Intrinsic::aarch64_neon_ld3lane:
3129  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3130  SelectLoadLane(Node, 3, AArch64::LD3i8);
3131  return;
3132  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3133  VT == MVT::v8f16) {
3134  SelectLoadLane(Node, 3, AArch64::LD3i16);
3135  return;
3136  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3137  VT == MVT::v2f32) {
3138  SelectLoadLane(Node, 3, AArch64::LD3i32);
3139  return;
3140  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3141  VT == MVT::v1f64) {
3142  SelectLoadLane(Node, 3, AArch64::LD3i64);
3143  return;
3144  }
3145  break;
3146  case Intrinsic::aarch64_neon_ld4lane:
3147  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3148  SelectLoadLane(Node, 4, AArch64::LD4i8);
3149  return;
3150  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3151  VT == MVT::v8f16) {
3152  SelectLoadLane(Node, 4, AArch64::LD4i16);
3153  return;
3154  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3155  VT == MVT::v2f32) {
3156  SelectLoadLane(Node, 4, AArch64::LD4i32);
3157  return;
3158  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3159  VT == MVT::v1f64) {
3160  SelectLoadLane(Node, 4, AArch64::LD4i64);
3161  return;
3162  }
3163  break;
3164  }
3165  } break;
3166  case ISD::INTRINSIC_WO_CHAIN: {
3167  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
3168  switch (IntNo) {
3169  default:
3170  break;
3171  case Intrinsic::aarch64_neon_tbl2:
3172  SelectTable(Node, 2,
3173  VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
3174  false);
3175  return;
3176  case Intrinsic::aarch64_neon_tbl3:
3177  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
3178  : AArch64::TBLv16i8Three,
3179  false);
3180  return;
3181  case Intrinsic::aarch64_neon_tbl4:
3182  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
3183  : AArch64::TBLv16i8Four,
3184  false);
3185  return;
3186  case Intrinsic::aarch64_neon_tbx2:
3187  SelectTable(Node, 2,
3188  VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
3189  true);
3190  return;
3191  case Intrinsic::aarch64_neon_tbx3:
3192  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
3193  : AArch64::TBXv16i8Three,
3194  true);
3195  return;
3196  case Intrinsic::aarch64_neon_tbx4:
3197  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
3198  : AArch64::TBXv16i8Four,
3199  true);
3200  return;
3201  case Intrinsic::aarch64_neon_smull:
3202  case Intrinsic::aarch64_neon_umull:
3203  if (tryMULLV64LaneV128(IntNo, Node))
3204  return;
3205  break;
3206  }
3207  break;
3208  }
3209  case ISD::INTRINSIC_VOID: {
3210  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3211  if (Node->getNumOperands() >= 3)
3212  VT = Node->getOperand(2)->getValueType(0);
3213  switch (IntNo) {
3214  default:
3215  break;
3216  case Intrinsic::aarch64_neon_st1x2: {
3217  if (VT == MVT::v8i8) {
3218  SelectStore(Node, 2, AArch64::ST1Twov8b);
3219  return;
3220  } else if (VT == MVT::v16i8) {
3221  SelectStore(Node, 2, AArch64::ST1Twov16b);
3222  return;
3223  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3224  SelectStore(Node, 2, AArch64::ST1Twov4h);
3225  return;
3226  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3227  SelectStore(Node, 2, AArch64::ST1Twov8h);
3228  return;
3229  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3230  SelectStore(Node, 2, AArch64::ST1Twov2s);
3231  return;
3232  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3233  SelectStore(Node, 2, AArch64::ST1Twov4s);
3234  return;
3235  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3236  SelectStore(Node, 2, AArch64::ST1Twov2d);
3237  return;
3238  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3239  SelectStore(Node, 2, AArch64::ST1Twov1d);
3240  return;
3241  }
3242  break;
3243  }
3244  case Intrinsic::aarch64_neon_st1x3: {
3245  if (VT == MVT::v8i8) {
3246  SelectStore(Node, 3, AArch64::ST1Threev8b);
3247  return;
3248  } else if (VT == MVT::v16i8) {
3249  SelectStore(Node, 3, AArch64::ST1Threev16b);
3250  return;
3251  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3252  SelectStore(Node, 3, AArch64::ST1Threev4h);
3253  return;
3254  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3255  SelectStore(Node, 3, AArch64::ST1Threev8h);
3256  return;
3257  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3258  SelectStore(Node, 3, AArch64::ST1Threev2s);
3259  return;
3260  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3261  SelectStore(Node, 3, AArch64::ST1Threev4s);
3262  return;
3263  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3264  SelectStore(Node, 3, AArch64::ST1Threev2d);
3265  return;
3266  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3267  SelectStore(Node, 3, AArch64::ST1Threev1d);
3268  return;
3269  }
3270  break;
3271  }
3272  case Intrinsic::aarch64_neon_st1x4: {
3273  if (VT == MVT::v8i8) {
3274  SelectStore(Node, 4, AArch64::ST1Fourv8b);
3275  return;
3276  } else if (VT == MVT::v16i8) {
3277  SelectStore(Node, 4, AArch64::ST1Fourv16b);
3278  return;
3279  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3280  SelectStore(Node, 4, AArch64::ST1Fourv4h);
3281  return;
3282  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3283  SelectStore(Node, 4, AArch64::ST1Fourv8h);
3284  return;
3285  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3286  SelectStore(Node, 4, AArch64::ST1Fourv2s);
3287  return;
3288  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3289  SelectStore(Node, 4, AArch64::ST1Fourv4s);
3290  return;
3291  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3292  SelectStore(Node, 4, AArch64::ST1Fourv2d);
3293  return;
3294  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3295  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3296  return;
3297  }
3298  break;
3299  }
3300  case Intrinsic::aarch64_neon_st2: {
3301  if (VT == MVT::v8i8) {
3302  SelectStore(Node, 2, AArch64::ST2Twov8b);
3303  return;
3304  } else if (VT == MVT::v16i8) {
3305  SelectStore(Node, 2, AArch64::ST2Twov16b);
3306  return;
3307  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3308  SelectStore(Node, 2, AArch64::ST2Twov4h);
3309  return;
3310  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3311  SelectStore(Node, 2, AArch64::ST2Twov8h);
3312  return;
3313  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3314  SelectStore(Node, 2, AArch64::ST2Twov2s);
3315  return;
3316  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3317  SelectStore(Node, 2, AArch64::ST2Twov4s);
3318  return;
3319  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3320  SelectStore(Node, 2, AArch64::ST2Twov2d);
3321  return;
3322  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3323  SelectStore(Node, 2, AArch64::ST1Twov1d);
3324  return;
3325  }
3326  break;
3327  }
3328  case Intrinsic::aarch64_neon_st3: {
3329  if (VT == MVT::v8i8) {
3330  SelectStore(Node, 3, AArch64::ST3Threev8b);
3331  return;
3332  } else if (VT == MVT::v16i8) {
3333  SelectStore(Node, 3, AArch64::ST3Threev16b);
3334  return;
3335  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3336  SelectStore(Node, 3, AArch64::ST3Threev4h);
3337  return;
3338  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3339  SelectStore(Node, 3, AArch64::ST3Threev8h);
3340  return;
3341  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3342  SelectStore(Node, 3, AArch64::ST3Threev2s);
3343  return;
3344  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3345  SelectStore(Node, 3, AArch64::ST3Threev4s);
3346  return;
3347  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3348  SelectStore(Node, 3, AArch64::ST3Threev2d);
3349  return;
3350  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3351  SelectStore(Node, 3, AArch64::ST1Threev1d);
3352  return;
3353  }
3354  break;
3355  }
3356  case Intrinsic::aarch64_neon_st4: {
3357  if (VT == MVT::v8i8) {
3358  SelectStore(Node, 4, AArch64::ST4Fourv8b);
3359  return;
3360  } else if (VT == MVT::v16i8) {
3361  SelectStore(Node, 4, AArch64::ST4Fourv16b);
3362  return;
3363  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3364  SelectStore(Node, 4, AArch64::ST4Fourv4h);
3365  return;
3366  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3367  SelectStore(Node, 4, AArch64::ST4Fourv8h);
3368  return;
3369  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3370  SelectStore(Node, 4, AArch64::ST4Fourv2s);
3371  return;
3372  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3373  SelectStore(Node, 4, AArch64::ST4Fourv4s);
3374  return;
3375  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3376  SelectStore(Node, 4, AArch64::ST4Fourv2d);
3377  return;
3378  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3379  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3380  return;
3381  }
3382  break;
3383  }
3384  case Intrinsic::aarch64_neon_st2lane: {
3385  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3386  SelectStoreLane(Node, 2, AArch64::ST2i8);
3387  return;
3388  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3389  VT == MVT::v8f16) {
3390  SelectStoreLane(Node, 2, AArch64::ST2i16);
3391  return;
3392  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3393  VT == MVT::v2f32) {
3394  SelectStoreLane(Node, 2, AArch64::ST2i32);
3395  return;
3396  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3397  VT == MVT::v1f64) {
3398  SelectStoreLane(Node, 2, AArch64::ST2i64);
3399  return;
3400  }
3401  break;
3402  }
3403  case Intrinsic::aarch64_neon_st3lane: {
3404  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3405  SelectStoreLane(Node, 3, AArch64::ST3i8);
3406  return;
3407  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3408  VT == MVT::v8f16) {
3409  SelectStoreLane(Node, 3, AArch64::ST3i16);
3410  return;
3411  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3412  VT == MVT::v2f32) {
3413  SelectStoreLane(Node, 3, AArch64::ST3i32);
3414  return;
3415  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3416  VT == MVT::v1f64) {
3417  SelectStoreLane(Node, 3, AArch64::ST3i64);
3418  return;
3419  }
3420  break;
3421  }
3422  case Intrinsic::aarch64_neon_st4lane: {
3423  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3424  SelectStoreLane(Node, 4, AArch64::ST4i8);
3425  return;
3426  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3427  VT == MVT::v8f16) {
3428  SelectStoreLane(Node, 4, AArch64::ST4i16);
3429  return;
3430  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3431  VT == MVT::v2f32) {
3432  SelectStoreLane(Node, 4, AArch64::ST4i32);
3433  return;
3434  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3435  VT == MVT::v1f64) {
3436  SelectStoreLane(Node, 4, AArch64::ST4i64);
3437  return;
3438  }
3439  break;
3440  }
3441  }
3442  break;
3443  }
3444  case AArch64ISD::LD2post: {
3445  if (VT == MVT::v8i8) {
3446  SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
3447  return;
3448  } else if (VT == MVT::v16i8) {
3449  SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
3450  return;
3451  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3452  SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
3453  return;
3454  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3455  SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
3456  return;
3457  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3458  SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
3459  return;
3460  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3461  SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
3462  return;
3463  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3464  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3465  return;
3466  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3467  SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
3468  return;
3469  }
3470  break;
3471  }
3472  case AArch64ISD::LD3post: {
3473  if (VT == MVT::v8i8) {
3474  SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
3475  return;
3476  } else if (VT == MVT::v16i8) {
3477  SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
3478  return;
3479  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3480  SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
3481  return;
3482  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3483  SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
3484  return;
3485  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3486  SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
3487  return;
3488  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3489  SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
3490  return;
3491  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3492  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3493  return;
3494  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3495  SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
3496  return;
3497  }
3498  break;
3499  }
3500  case AArch64ISD::LD4post: {
3501  if (VT == MVT::v8i8) {
3502  SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
3503  return;
3504  } else if (VT == MVT::v16i8) {
3505  SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
3506  return;
3507  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3508  SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
3509  return;
3510  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3511  SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
3512  return;
3513  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3514  SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
3515  return;
3516  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3517  SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
3518  return;
3519  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3520  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3521  return;
3522  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3523  SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
3524  return;
3525  }
3526  break;
3527  }
3528  case AArch64ISD::LD1x2post: {
3529  if (VT == MVT::v8i8) {
3530  SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
3531  return;
3532  } else if (VT == MVT::v16i8) {
3533  SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
3534  return;
3535  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3536  SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
3537  return;
3538  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3539  SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
3540  return;
3541  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3542  SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
3543  return;
3544  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3545  SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
3546  return;
3547  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3548  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3549  return;
3550  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3551  SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
3552  return;
3553  }
3554  break;
3555  }
3556  case AArch64ISD::LD1x3post: {
3557  if (VT == MVT::v8i8) {
3558  SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
3559  return;
3560  } else if (VT == MVT::v16i8) {
3561  SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
3562  return;
3563  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3564  SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
3565  return;
3566  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3567  SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
3568  return;
3569  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3570  SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
3571  return;
3572  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3573  SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
3574  return;
3575  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3576  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3577  return;
3578  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3579  SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
3580  return;
3581  }
3582  break;
3583  }
3584  case AArch64ISD::LD1x4post: {
3585  if (VT == MVT::v8i8) {
3586  SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
3587  return;
3588  } else if (VT == MVT::v16i8) {
3589  SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
3590  return;
3591  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3592  SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
3593  return;
3594  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3595  SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
3596  return;
3597  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3598  SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
3599  return;
3600  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3601  SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
3602  return;
3603  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3604  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3605  return;
3606  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3607  SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
3608  return;
3609  }
3610  break;
3611  }
3612  case AArch64ISD::LD1DUPpost: {
3613  if (VT == MVT::v8i8) {
3614  SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
3615  return;
3616  } else if (VT == MVT::v16i8) {
3617  SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
3618  return;
3619  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3620  SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
3621  return;
3622  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3623  SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
3624  return;
3625  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3626  SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
3627  return;
3628  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3629  SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
3630  return;
3631  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3632  SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
3633  return;
3634  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3635  SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
3636  return;
3637  }
3638  break;
3639  }
3640  case AArch64ISD::LD2DUPpost: {
3641  if (VT == MVT::v8i8) {
3642  SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
3643  return;
3644  } else if (VT == MVT::v16i8) {
3645  SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
3646  return;
3647  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3648  SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
3649  return;
3650  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3651  SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
3652  return;
3653  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3654  SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
3655  return;
3656  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3657  SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
3658  return;
3659  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3660  SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
3661  return;
3662  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3663  SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
3664  return;
3665  }
3666  break;
3667  }
3668  case AArch64ISD::LD3DUPpost: {
3669  if (VT == MVT::v8i8) {
3670  SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
3671  return;
3672  } else if (VT == MVT::v16i8) {
3673  SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
3674  return;
3675  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3676  SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
3677  return;
3678  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3679  SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
3680  return;
3681  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3682  SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
3683  return;
3684  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3685  SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
3686  return;
3687  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3688  SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
3689  return;
3690  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3691  SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
3692  return;
3693  }
3694  break;
3695  }
3696  case AArch64ISD::LD4DUPpost: {
3697  if (VT == MVT::v8i8) {
3698  SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
3699  return;
3700  } else if (VT == MVT::v16i8) {
3701  SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
3702  return;
3703  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3704  SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
3705  return;
3706  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3707  SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
3708  return;
3709  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3710  SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
3711  return;
3712  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3713  SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
3714  return;
3715  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3716  SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
3717  return;
3718  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3719  SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
3720  return;
3721  }
3722  break;
3723  }
3724  case AArch64ISD::LD1LANEpost: {
3725  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3726  SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
3727  return;
3728  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3729  VT == MVT::v8f16) {
3730  SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
3731  return;
3732  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3733  VT == MVT::v2f32) {
3734  SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
3735  return;
3736  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3737  VT == MVT::v1f64) {
3738  SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
3739  return;
3740  }
3741  break;
3742  }
3743  case AArch64ISD::LD2LANEpost: {
3744  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3745  SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
3746  return;
3747  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3748  VT == MVT::v8f16) {
3749  SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
3750  return;
3751  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3752  VT == MVT::v2f32) {
3753  SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
3754  return;
3755  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3756  VT == MVT::v1f64) {
3757  SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
3758  return;
3759  }
3760  break;
3761  }
3762  case AArch64ISD::LD3LANEpost: {
3763  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3764  SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
3765  return;
3766  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3767  VT == MVT::v8f16) {
3768  SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
3769  return;
3770  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3771  VT == MVT::v2f32) {
3772  SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
3773  return;
3774  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3775  VT == MVT::v1f64) {
3776  SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
3777  return;
3778  }
3779  break;
3780  }
3781  case AArch64ISD::LD4LANEpost: {
3782  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3783  SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
3784  return;
3785  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3786  VT == MVT::v8f16) {
3787  SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
3788  return;
3789  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3790  VT == MVT::v2f32) {
3791  SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
3792  return;
3793  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3794  VT == MVT::v1f64) {
3795  SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
3796  return;
3797  }
3798  break;
3799  }
3800  case AArch64ISD::ST2post: {
3801  VT = Node->getOperand(1).getValueType();
3802  if (VT == MVT::v8i8) {
3803  SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
3804  return;
3805  } else if (VT == MVT::v16i8) {
3806  SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
3807  return;
3808  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3809  SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
3810  return;
3811  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3812  SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
3813  return;
3814  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3815  SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
3816  return;
3817  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3818  SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
3819  return;
3820  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3821  SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
3822  return;
3823  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3824  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3825  return;
3826  }
3827  break;
3828  }
3829  case AArch64ISD::ST3post: {
3830  VT = Node->getOperand(1).getValueType();
3831  if (VT == MVT::v8i8) {
3832  SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
3833  return;
3834  } else if (VT == MVT::v16i8) {
3835  SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
3836  return;
3837  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3838  SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
3839  return;
3840  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3841  SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
3842  return;
3843  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3844  SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
3845  return;
3846  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3847  SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
3848  return;
3849  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3850  SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
3851  return;
3852  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3853  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
3854  return;
3855  }
3856  break;
3857  }
3858  case AArch64ISD::ST4post: {
3859  VT = Node->getOperand(1).getValueType();
3860  if (VT == MVT::v8i8) {
3861  SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
3862  return;
3863  } else if (VT == MVT::v16i8) {
3864  SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
3865  return;
3866  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3867  SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
3868  return;
3869  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3870  SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
3871  return;
3872  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3873  SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
3874  return;
3875  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3876  SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
3877  return;
3878  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3879  SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
3880  return;
3881  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3882  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
3883  return;
3884  }
3885  break;
3886  }
3887  case AArch64ISD::ST1x2post: {
3888  VT = Node->getOperand(1).getValueType();
3889  if (VT == MVT::v8i8) {
3890  SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
3891  return;
3892  } else if (VT == MVT::v16i8) {
3893  SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
3894  return;
3895  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3896  SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
3897  return;
3898  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3899  SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
3900  return;
3901  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3902  SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
3903  return;
3904  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3905  SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
3906  return;
3907  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3908  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3909  return;
3910  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3911  SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
3912  return;
3913  }
3914  break;
3915  }
3916  case AArch64ISD::ST1x3post: {
3917  VT = Node->getOperand(1).getValueType();
3918  if (VT == MVT::v8i8) {
3919  SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
3920  return;
3921  } else if (VT == MVT::v16i8) {
3922  SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
3923  return;
3924  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3925  SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
3926  return;
3927  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3928  SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
3929  return;
3930  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3931  SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
3932  return;
3933  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3934  SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
3935  return;
3936  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3937  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
3938  return;
3939  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3940  SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
3941  return;
3942  }
3943  break;
3944  }
3945  case AArch64ISD::ST1x4post: {
3946  VT = Node->getOperand(1).getValueType();
3947  if (VT == MVT::v8i8) {
3948  SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
3949  return;
3950  } else if (VT == MVT::v16i8) {
3951  SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
3952  return;
3953  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3954  SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
3955  return;
3956  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3957  SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
3958  return;
3959  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3960  SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
3961  return;
3962  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3963  SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
3964  return;
3965  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3966  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
3967  return;
3968  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3969  SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
3970  return;
3971  }
3972  break;
3973  }
3974  case AArch64ISD::ST2LANEpost: {
3975  VT = Node->getOperand(1).getValueType();
3976  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3977  SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
3978  return;
3979  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3980  VT == MVT::v8f16) {
3981  SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
3982  return;
3983  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3984  VT == MVT::v2f32) {
3985  SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
3986  return;
3987  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3988  VT == MVT::v1f64) {
3989  SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
3990  return;
3991  }
3992  break;
3993  }
3994  case AArch64ISD::ST3LANEpost: {
3995  VT = Node->getOperand(1).getValueType();
3996  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3997  SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
3998  return;
3999  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4000  VT == MVT::v8f16) {
4001  SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
4002  return;
4003  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4004  VT == MVT::v2f32) {
4005  SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
4006  return;
4007  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4008  VT == MVT::v1f64) {
4009  SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
4010  return;
4011  }
4012  break;
4013  }
4014  case AArch64ISD::ST4LANEpost: {
4015  VT = Node->getOperand(1).getValueType();
4016  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4017  SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
4018  return;
4019  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4020  VT == MVT::v8f16) {
4021  SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
4022  return;
4023  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4024  VT == MVT::v2f32) {
4025  SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
4026  return;
4027  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4028  VT == MVT::v1f64) {
4029  SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
4030  return;
4031  }
4032  break;
4033  }
4034  }
4035 
4036  // Select the default instruction
4037  SelectCode(Node);
4038 }
4039 
4040 /// createAArch64ISelDag - This pass converts a legalized DAG into a
4041 /// AArch64-specific DAG, ready for instruction scheduling.
4043  CodeGenOpt::Level OptLevel) {
4044  return new AArch64DAGToDAGISel(TM, OptLevel);
4045 }
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1431
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
uint64_t CallInst * C
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &ShiftAmount, int &MaskWidth)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL...
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOffset() const
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1456
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1542
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
DiagnosticInfoOptimizationBase::Argument NV
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:334
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:262
iterator begin() const
Definition: ArrayRef.h:137
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static bool isWorthFoldingADDlow(SDValue N)
If there&#39;s a use of this ADDlow that&#39;s not itself a load/store then we&#39;ll need to create a real ADD i...
SDVTList getVTList() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
const SDValue & getChain() const
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1067
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:884
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:329
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1069
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1611
A description of a memory reference used in the backend.
static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, SDValue &LaneOp, int &LaneIdx)
static bool isPreferredADD(int64_t ImmOff)
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
Shift and rotation operations.
Definition: ISDOpcodes.h:380
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:470
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
unsigned SubReg
const MDNode * getMD() const
unsigned getScalarValueSizeInBits() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Reg
All possible values of the reg field in the ModR/M byte.
SimpleValueType SimpleTy
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:966
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
int64_t getSExtValue() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:403
const RegList & Regs
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
op_iterator op_begin() const
unsigned getAlignment() const
Definition: Globals.cpp:97
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:561
bool isStrongerThanMonotonic(AtomicOrdering ao)
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:85
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:112
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1637
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:893
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:576
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32...
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:764
Extended Value Type.
Definition: ValueTypes.h:34
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
void dump() const
Dump this node, for debugging.
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:315
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:210
SDNode * SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type...
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:727
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:724
An SDNode that represents everything that will be needed to construct a MachineInstr.
const Constant * getConstVal() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
unsigned logBase2() const
Definition: APInt.h:1727
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:531
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< use_iterator > uses()
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:457
const SysReg * lookupSysRegByName(StringRef)
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:446
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:449
uint32_t parseGenericRegister(StringRef Name)
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOpt::Level OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG...
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:600
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:177
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:415
static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx)
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:182
static int getIntOperandFromRegisterString(StringRef RegString)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:464
void computeKnownBits(SDValue Op, KnownBits &Known, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:582
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted, suitable for use in a BFI instruction.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Type * getValueType() const
Definition: GlobalValue.h:275
unsigned getOpcode() const
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:896
static bool isShiftedMask(uint64_t Mask, EVT VT)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void dumpr() const
Dump (recursively) this node and its use-def subgraph.
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
#define DEBUG(X)
Definition: Debug.h:118
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:409
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A single uniqued string.
Definition: Metadata.h:602
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1575
Conversion operators.
Definition: ISDOpcodes.h:443
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:757
uint64_t getZExtValue() const
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:452
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
std::size_t countLeadingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the most significant bit to the first zero bit.
Definition: MathExtras.h:454
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:753
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
virtual const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const
Returns a TargetRegisterClass used for pointer values.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:873
This class is used to represent ISD::LOAD nodes.