LLVM  8.0.0svn
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the AArch64 target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64TargetMachine.h"
16 #include "llvm/ADT/APSInt.h"
18 #include "llvm/IR/Function.h" // To access function attributes.
19 #include "llvm/IR/GlobalValue.h"
20 #include "llvm/IR/Intrinsics.h"
21 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/KnownBits.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "aarch64-isel"
30 
31 //===--------------------------------------------------------------------===//
32 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
33 /// instructions for SelectionDAG operations.
34 ///
35 namespace {
36 
37 class AArch64DAGToDAGISel : public SelectionDAGISel {
38 
39  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
40  /// make the right decision when generating code for different targets.
41  const AArch64Subtarget *Subtarget;
42 
43  bool ForCodeSize;
44 
45 public:
46  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
47  CodeGenOpt::Level OptLevel)
48  : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
49  ForCodeSize(false) {}
50 
51  StringRef getPassName() const override {
52  return "AArch64 Instruction Selection";
53  }
54 
55  bool runOnMachineFunction(MachineFunction &MF) override {
56  ForCodeSize = MF.getFunction().optForSize();
57  Subtarget = &MF.getSubtarget<AArch64Subtarget>();
59  }
60 
61  void Select(SDNode *Node) override;
62 
63  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
64  /// inline asm expressions.
65  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
66  unsigned ConstraintID,
67  std::vector<SDValue> &OutOps) override;
68 
69  bool tryMLAV64LaneV128(SDNode *N);
70  bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
71  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
72  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
74  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
75  return SelectShiftedRegister(N, false, Reg, Shift);
76  }
77  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
78  return SelectShiftedRegister(N, true, Reg, Shift);
79  }
80  bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
81  return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
82  }
83  bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
84  return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
85  }
86  bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
87  return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
88  }
89  bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
90  return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
91  }
92  bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
93  return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
94  }
95  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
96  return SelectAddrModeIndexed(N, 1, Base, OffImm);
97  }
98  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
99  return SelectAddrModeIndexed(N, 2, Base, OffImm);
100  }
101  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
102  return SelectAddrModeIndexed(N, 4, Base, OffImm);
103  }
104  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
105  return SelectAddrModeIndexed(N, 8, Base, OffImm);
106  }
107  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
108  return SelectAddrModeIndexed(N, 16, Base, OffImm);
109  }
110  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
111  return SelectAddrModeUnscaled(N, 1, Base, OffImm);
112  }
113  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
114  return SelectAddrModeUnscaled(N, 2, Base, OffImm);
115  }
116  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
117  return SelectAddrModeUnscaled(N, 4, Base, OffImm);
118  }
119  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
120  return SelectAddrModeUnscaled(N, 8, Base, OffImm);
121  }
122  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
123  return SelectAddrModeUnscaled(N, 16, Base, OffImm);
124  }
125 
126  template<int Width>
127  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
128  SDValue &SignExtend, SDValue &DoShift) {
129  return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
130  }
131 
132  template<int Width>
133  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
134  SDValue &SignExtend, SDValue &DoShift) {
135  return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
136  }
137 
138 
139  /// Form sequences of consecutive 64/128-bit registers for use in NEON
140  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
141  /// between 1 and 4 elements. If it contains a single element that is returned
142  /// unchanged; otherwise a REG_SEQUENCE value is returned.
143  SDValue createDTuple(ArrayRef<SDValue> Vecs);
144  SDValue createQTuple(ArrayRef<SDValue> Vecs);
145 
146  /// Generic helper for the createDTuple/createQTuple
147  /// functions. Those should almost always be called instead.
148  SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
149  const unsigned SubRegs[]);
150 
151  void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
152 
153  bool tryIndexedLoad(SDNode *N);
154 
155  void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
156  unsigned SubRegIdx);
157  void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
158  unsigned SubRegIdx);
159  void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
160  void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
161 
162  void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
163  void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
164  void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
165  void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
166 
167  bool tryBitfieldExtractOp(SDNode *N);
168  bool tryBitfieldExtractOpFromSExt(SDNode *N);
169  bool tryBitfieldInsertOp(SDNode *N);
170  bool tryBitfieldInsertInZeroOp(SDNode *N);
171  bool tryShiftAmountMod(SDNode *N);
172 
173  bool tryReadRegister(SDNode *N);
174  bool tryWriteRegister(SDNode *N);
175 
176 // Include the pieces autogenerated from the target description.
177 #include "AArch64GenDAGISel.inc"
178 
179 private:
180  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
181  SDValue &Shift);
182  bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
183  SDValue &OffImm);
184  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
185  SDValue &OffImm);
186  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
187  SDValue &OffImm);
188  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
189  SDValue &Offset, SDValue &SignExtend,
190  SDValue &DoShift);
191  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
192  SDValue &Offset, SDValue &SignExtend,
193  SDValue &DoShift);
194  bool isWorthFolding(SDValue V) const;
195  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
196  SDValue &Offset, SDValue &SignExtend);
197 
198  template<unsigned RegWidth>
199  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
200  return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
201  }
202 
203  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
204 
205  bool SelectCMP_SWAP(SDNode *N);
206 
207 };
208 } // end anonymous namespace
209 
210 /// isIntImmediate - This method tests to see if the node is a constant
211 /// operand. If so Imm will receive the 32-bit value.
212 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
213  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
214  Imm = C->getZExtValue();
215  return true;
216  }
217  return false;
218 }
219 
220 // isIntImmediate - This method tests to see if a constant operand.
221 // If so Imm will receive the value.
222 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
223  return isIntImmediate(N.getNode(), Imm);
224 }
225 
226 // isOpcWithIntImmediate - This method tests to see if the node is a specific
227 // opcode and that it has a immediate integer right operand.
228 // If so Imm will receive the 32 bit value.
229 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
230  uint64_t &Imm) {
231  return N->getOpcode() == Opc &&
232  isIntImmediate(N->getOperand(1).getNode(), Imm);
233 }
234 
235 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
236  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
237  switch(ConstraintID) {
238  default:
239  llvm_unreachable("Unexpected asm memory constraint");
243  // We need to make sure that this one operand does not end up in XZR, thus
244  // require the address to be in a PointerRegClass register.
245  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
246  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
247  SDLoc dl(Op);
248  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
249  SDValue NewOp =
250  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
251  dl, Op.getValueType(),
252  Op, RC), 0);
253  OutOps.push_back(NewOp);
254  return false;
255  }
256  return true;
257 }
258 
259 /// SelectArithImmed - Select an immediate value that can be represented as
260 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
261 /// Val set to the 12-bit value and Shift set to the shifter operand.
262 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
263  SDValue &Shift) {
264  // This function is called from the addsub_shifted_imm ComplexPattern,
265  // which lists [imm] as the list of opcode it's interested in, however
266  // we still need to check whether the operand is actually an immediate
267  // here because the ComplexPattern opcode list is only used in
268  // root-level opcode matching.
269  if (!isa<ConstantSDNode>(N.getNode()))
270  return false;
271 
272  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
273  unsigned ShiftAmt;
274 
275  if (Immed >> 12 == 0) {
276  ShiftAmt = 0;
277  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
278  ShiftAmt = 12;
279  Immed = Immed >> 12;
280  } else
281  return false;
282 
283  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
284  SDLoc dl(N);
285  Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
286  Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
287  return true;
288 }
289 
290 /// SelectNegArithImmed - As above, but negates the value before trying to
291 /// select it.
292 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
293  SDValue &Shift) {
294  // This function is called from the addsub_shifted_imm ComplexPattern,
295  // which lists [imm] as the list of opcode it's interested in, however
296  // we still need to check whether the operand is actually an immediate
297  // here because the ComplexPattern opcode list is only used in
298  // root-level opcode matching.
299  if (!isa<ConstantSDNode>(N.getNode()))
300  return false;
301 
302  // The immediate operand must be a 24-bit zero-extended immediate.
303  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
304 
305  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
306  // have the opposite effect on the C flag, so this pattern mustn't match under
307  // those circumstances.
308  if (Immed == 0)
309  return false;
310 
311  if (N.getValueType() == MVT::i32)
312  Immed = ~((uint32_t)Immed) + 1;
313  else
314  Immed = ~Immed + 1ULL;
315  if (Immed & 0xFFFFFFFFFF000000ULL)
316  return false;
317 
318  Immed &= 0xFFFFFFULL;
319  return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
320  Shift);
321 }
322 
323 /// getShiftTypeForNode - Translate a shift node to the corresponding
324 /// ShiftType value.
326  switch (N.getOpcode()) {
327  default:
329  case ISD::SHL:
330  return AArch64_AM::LSL;
331  case ISD::SRL:
332  return AArch64_AM::LSR;
333  case ISD::SRA:
334  return AArch64_AM::ASR;
335  case ISD::ROTR:
336  return AArch64_AM::ROR;
337  }
338 }
339 
340 /// Determine whether it is worth it to fold SHL into the addressing
341 /// mode.
342 static bool isWorthFoldingSHL(SDValue V) {
343  assert(V.getOpcode() == ISD::SHL && "invalid opcode");
344  // It is worth folding logical shift of up to three places.
345  auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
346  if (!CSD)
347  return false;
348  unsigned ShiftVal = CSD->getZExtValue();
349  if (ShiftVal > 3)
350  return false;
351 
352  // Check if this particular node is reused in any non-memory related
353  // operation. If yes, do not try to fold this node into the address
354  // computation, since the computation will be kept.
355  const SDNode *Node = V.getNode();
356  for (SDNode *UI : Node->uses())
357  if (!isa<MemSDNode>(*UI))
358  for (SDNode *UII : UI->uses())
359  if (!isa<MemSDNode>(*UII))
360  return false;
361  return true;
362 }
363 
364 /// Determine whether it is worth to fold V into an extended register.
365 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
366  // Trivial if we are optimizing for code size or if there is only
367  // one use of the value.
368  if (ForCodeSize || V.hasOneUse())
369  return true;
370  // If a subtarget has a fastpath LSL we can fold a logical shift into
371  // the addressing mode and save a cycle.
372  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
374  return true;
375  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
376  const SDValue LHS = V.getOperand(0);
377  const SDValue RHS = V.getOperand(1);
378  if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
379  return true;
380  if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
381  return true;
382  }
383 
384  // It hurts otherwise, since the value will be reused.
385  return false;
386 }
387 
388 /// SelectShiftedRegister - Select a "shifted register" operand. If the value
389 /// is not shifted, set the Shift operand to default of "LSL 0". The logical
390 /// instructions allow the shifted register to be rotated, but the arithmetic
391 /// instructions do not. The AllowROR parameter specifies whether ROR is
392 /// supported.
393 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
394  SDValue &Reg, SDValue &Shift) {
396  if (ShType == AArch64_AM::InvalidShiftExtend)
397  return false;
398  if (!AllowROR && ShType == AArch64_AM::ROR)
399  return false;
400 
401  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
402  unsigned BitSize = N.getValueSizeInBits();
403  unsigned Val = RHS->getZExtValue() & (BitSize - 1);
404  unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
405 
406  Reg = N.getOperand(0);
407  Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
408  return isWorthFolding(N);
409  }
410 
411  return false;
412 }
413 
414 /// getExtendTypeForNode - Translate an extend node to the corresponding
415 /// ExtendType value.
417 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
418  if (N.getOpcode() == ISD::SIGN_EXTEND ||
420  EVT SrcVT;
422  SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
423  else
424  SrcVT = N.getOperand(0).getValueType();
425 
426  if (!IsLoadStore && SrcVT == MVT::i8)
427  return AArch64_AM::SXTB;
428  else if (!IsLoadStore && SrcVT == MVT::i16)
429  return AArch64_AM::SXTH;
430  else if (SrcVT == MVT::i32)
431  return AArch64_AM::SXTW;
432  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
433 
435  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
436  N.getOpcode() == ISD::ANY_EXTEND) {
437  EVT SrcVT = N.getOperand(0).getValueType();
438  if (!IsLoadStore && SrcVT == MVT::i8)
439  return AArch64_AM::UXTB;
440  else if (!IsLoadStore && SrcVT == MVT::i16)
441  return AArch64_AM::UXTH;
442  else if (SrcVT == MVT::i32)
443  return AArch64_AM::UXTW;
444  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
445 
447  } else if (N.getOpcode() == ISD::AND) {
449  if (!CSD)
451  uint64_t AndMask = CSD->getZExtValue();
452 
453  switch (AndMask) {
454  default:
456  case 0xFF:
457  return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
458  case 0xFFFF:
459  return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
460  case 0xFFFFFFFF:
461  return AArch64_AM::UXTW;
462  }
463  }
464 
466 }
467 
468 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
469 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
470  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
472  return false;
473 
474  SDValue SV = DL->getOperand(0);
475  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
476  return false;
477 
478  SDValue EV = SV.getOperand(1);
479  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
480  return false;
481 
482  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
483  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
484  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
485  LaneOp = EV.getOperand(0);
486 
487  return true;
488 }
489 
490 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
491 // high lane extract.
492 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
493  SDValue &LaneOp, int &LaneIdx) {
494 
495  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
496  std::swap(Op0, Op1);
497  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
498  return false;
499  }
500  StdOp = Op1;
501  return true;
502 }
503 
504 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
505 /// is a lane in the upper half of a 128-bit vector. Recognize and select this
506 /// so that we don't emit unnecessary lane extracts.
507 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
508  SDLoc dl(N);
509  SDValue Op0 = N->getOperand(0);
510  SDValue Op1 = N->getOperand(1);
511  SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
512  SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
513  int LaneIdx = -1; // Will hold the lane index.
514 
515  if (Op1.getOpcode() != ISD::MUL ||
516  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
517  LaneIdx)) {
518  std::swap(Op0, Op1);
519  if (Op1.getOpcode() != ISD::MUL ||
520  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
521  LaneIdx))
522  return false;
523  }
524 
525  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
526 
527  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
528 
529  unsigned MLAOpc = ~0U;
530 
531  switch (N->getSimpleValueType(0).SimpleTy) {
532  default:
533  llvm_unreachable("Unrecognized MLA.");
534  case MVT::v4i16:
535  MLAOpc = AArch64::MLAv4i16_indexed;
536  break;
537  case MVT::v8i16:
538  MLAOpc = AArch64::MLAv8i16_indexed;
539  break;
540  case MVT::v2i32:
541  MLAOpc = AArch64::MLAv2i32_indexed;
542  break;
543  case MVT::v4i32:
544  MLAOpc = AArch64::MLAv4i32_indexed;
545  break;
546  }
547 
548  ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
549  return true;
550 }
551 
552 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
553  SDLoc dl(N);
554  SDValue SMULLOp0;
555  SDValue SMULLOp1;
556  int LaneIdx;
557 
558  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
559  LaneIdx))
560  return false;
561 
562  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
563 
564  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
565 
566  unsigned SMULLOpc = ~0U;
567 
568  if (IntNo == Intrinsic::aarch64_neon_smull) {
569  switch (N->getSimpleValueType(0).SimpleTy) {
570  default:
571  llvm_unreachable("Unrecognized SMULL.");
572  case MVT::v4i32:
573  SMULLOpc = AArch64::SMULLv4i16_indexed;
574  break;
575  case MVT::v2i64:
576  SMULLOpc = AArch64::SMULLv2i32_indexed;
577  break;
578  }
579  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
580  switch (N->getSimpleValueType(0).SimpleTy) {
581  default:
582  llvm_unreachable("Unrecognized SMULL.");
583  case MVT::v4i32:
584  SMULLOpc = AArch64::UMULLv4i16_indexed;
585  break;
586  case MVT::v2i64:
587  SMULLOpc = AArch64::UMULLv2i32_indexed;
588  break;
589  }
590  } else
591  llvm_unreachable("Unrecognized intrinsic.");
592 
593  ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
594  return true;
595 }
596 
597 /// Instructions that accept extend modifiers like UXTW expect the register
598 /// being extended to be a GPR32, but the incoming DAG might be acting on a
599 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
600 /// this is the case.
602  if (N.getValueType() == MVT::i32)
603  return N;
604 
605  SDLoc dl(N);
606  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
607  MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
608  dl, MVT::i32, N, SubReg);
609  return SDValue(Node, 0);
610 }
611 
612 
613 /// SelectArithExtendedRegister - Select a "extended register" operand. This
614 /// operand folds in an extend followed by an optional left shift.
615 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
616  SDValue &Shift) {
617  unsigned ShiftVal = 0;
619 
620  if (N.getOpcode() == ISD::SHL) {
622  if (!CSD)
623  return false;
624  ShiftVal = CSD->getZExtValue();
625  if (ShiftVal > 4)
626  return false;
627 
628  Ext = getExtendTypeForNode(N.getOperand(0));
630  return false;
631 
632  Reg = N.getOperand(0).getOperand(0);
633  } else {
634  Ext = getExtendTypeForNode(N);
636  return false;
637 
638  Reg = N.getOperand(0);
639 
640  // Don't match if free 32-bit -> 64-bit zext can be used instead.
641  if (Ext == AArch64_AM::UXTW &&
642  Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
643  return false;
644  }
645 
646  // AArch64 mandates that the RHS of the operation must use the smallest
647  // register class that could contain the size being extended from. Thus,
648  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
649  // there might not be an actual 32-bit value in the program. We can
650  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
651  assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
652  Reg = narrowIfNeeded(CurDAG, Reg);
653  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
654  MVT::i32);
655  return isWorthFolding(N);
656 }
657 
658 /// If there's a use of this ADDlow that's not itself a load/store then we'll
659 /// need to create a real ADD instruction from it anyway and there's no point in
660 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
661 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
662 /// leads to duplicated ADRP instructions.
664  for (auto Use : N->uses()) {
665  if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
666  Use->getOpcode() != ISD::ATOMIC_LOAD &&
667  Use->getOpcode() != ISD::ATOMIC_STORE)
668  return false;
669 
670  // ldar and stlr have much more restrictive addressing modes (just a
671  // register).
672  if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering()))
673  return false;
674  }
675 
676  return true;
677 }
678 
679 /// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
680 /// immediate" address. The "Size" argument is the size in bytes of the memory
681 /// reference, which determines the scale.
682 bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
683  SDValue &Base,
684  SDValue &OffImm) {
685  SDLoc dl(N);
686  const DataLayout &DL = CurDAG->getDataLayout();
687  const TargetLowering *TLI = getTargetLowering();
688  if (N.getOpcode() == ISD::FrameIndex) {
689  int FI = cast<FrameIndexSDNode>(N)->getIndex();
690  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
691  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
692  return true;
693  }
694 
695  // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
696  // selected here doesn't support labels/immediates, only base+offset.
697 
698  if (CurDAG->isBaseWithConstantOffset(N)) {
699  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
700  int64_t RHSC = RHS->getSExtValue();
701  unsigned Scale = Log2_32(Size);
702  if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) &&
703  RHSC < (0x40 << Scale)) {
704  Base = N.getOperand(0);
705  if (Base.getOpcode() == ISD::FrameIndex) {
706  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
707  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
708  }
709  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
710  return true;
711  }
712  }
713  }
714 
715  // Base only. The address will be materialized into a register before
716  // the memory is accessed.
717  // add x0, Xbase, #offset
718  // stp x1, x2, [x0]
719  Base = N;
720  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
721  return true;
722 }
723 
724 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
725 /// immediate" address. The "Size" argument is the size in bytes of the memory
726 /// reference, which determines the scale.
727 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
728  SDValue &Base, SDValue &OffImm) {
729  SDLoc dl(N);
730  const DataLayout &DL = CurDAG->getDataLayout();
731  const TargetLowering *TLI = getTargetLowering();
732  if (N.getOpcode() == ISD::FrameIndex) {
733  int FI = cast<FrameIndexSDNode>(N)->getIndex();
734  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
735  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
736  return true;
737  }
738 
740  GlobalAddressSDNode *GAN =
742  Base = N.getOperand(0);
743  OffImm = N.getOperand(1);
744  if (!GAN)
745  return true;
746 
747  if (GAN->getOffset() % Size == 0) {
748  const GlobalValue *GV = GAN->getGlobal();
749  unsigned Alignment = GV->getAlignment();
750  Type *Ty = GV->getValueType();
751  if (Alignment == 0 && Ty->isSized())
752  Alignment = DL.getABITypeAlignment(Ty);
753 
754  if (Alignment >= Size)
755  return true;
756  }
757  }
758 
759  if (CurDAG->isBaseWithConstantOffset(N)) {
760  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
761  int64_t RHSC = (int64_t)RHS->getZExtValue();
762  unsigned Scale = Log2_32(Size);
763  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
764  Base = N.getOperand(0);
765  if (Base.getOpcode() == ISD::FrameIndex) {
766  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
767  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
768  }
769  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
770  return true;
771  }
772  }
773  }
774 
775  // Before falling back to our general case, check if the unscaled
776  // instructions can handle this. If so, that's preferable.
777  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
778  return false;
779 
780  // Base only. The address will be materialized into a register before
781  // the memory is accessed.
782  // add x0, Xbase, #offset
783  // ldr x0, [x0]
784  Base = N;
785  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
786  return true;
787 }
788 
789 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
790 /// immediate" address. This should only match when there is an offset that
791 /// is not valid for a scaled immediate addressing mode. The "Size" argument
792 /// is the size in bytes of the memory reference, which is needed here to know
793 /// what is valid for a scaled immediate.
794 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
795  SDValue &Base,
796  SDValue &OffImm) {
797  if (!CurDAG->isBaseWithConstantOffset(N))
798  return false;
799  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
800  int64_t RHSC = RHS->getSExtValue();
801  // If the offset is valid as a scaled immediate, don't match here.
802  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
803  RHSC < (0x1000 << Log2_32(Size)))
804  return false;
805  if (RHSC >= -256 && RHSC < 256) {
806  Base = N.getOperand(0);
807  if (Base.getOpcode() == ISD::FrameIndex) {
808  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
809  const TargetLowering *TLI = getTargetLowering();
810  Base = CurDAG->getTargetFrameIndex(
811  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
812  }
813  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
814  return true;
815  }
816  }
817  return false;
818 }
819 
820 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
821  SDLoc dl(N);
822  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
823  SDValue ImpDef = SDValue(
824  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
825  MachineSDNode *Node = CurDAG->getMachineNode(
826  TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
827  return SDValue(Node, 0);
828 }
829 
830 /// Check if the given SHL node (\p N), can be used to form an
831 /// extended register for an addressing mode.
832 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
833  bool WantExtend, SDValue &Offset,
834  SDValue &SignExtend) {
835  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
837  if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
838  return false;
839 
840  SDLoc dl(N);
841  if (WantExtend) {
843  getExtendTypeForNode(N.getOperand(0), true);
845  return false;
846 
847  Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
848  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
849  MVT::i32);
850  } else {
851  Offset = N.getOperand(0);
852  SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
853  }
854 
855  unsigned LegalShiftVal = Log2_32(Size);
856  unsigned ShiftVal = CSD->getZExtValue();
857 
858  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
859  return false;
860 
861  return isWorthFolding(N);
862 }
863 
864 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
865  SDValue &Base, SDValue &Offset,
866  SDValue &SignExtend,
867  SDValue &DoShift) {
868  if (N.getOpcode() != ISD::ADD)
869  return false;
870  SDValue LHS = N.getOperand(0);
871  SDValue RHS = N.getOperand(1);
872  SDLoc dl(N);
873 
874  // We don't want to match immediate adds here, because they are better lowered
875  // to the register-immediate addressing modes.
876  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
877  return false;
878 
879  // Check if this particular node is reused in any non-memory related
880  // operation. If yes, do not try to fold this node into the address
881  // computation, since the computation will be kept.
882  const SDNode *Node = N.getNode();
883  for (SDNode *UI : Node->uses()) {
884  if (!isa<MemSDNode>(*UI))
885  return false;
886  }
887 
888  // Remember if it is worth folding N when it produces extended register.
889  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
890 
891  // Try to match a shifted extend on the RHS.
892  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
893  SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
894  Base = LHS;
895  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
896  return true;
897  }
898 
899  // Try to match a shifted extend on the LHS.
900  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
901  SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
902  Base = RHS;
903  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
904  return true;
905  }
906 
907  // There was no shift, whatever else we find.
908  DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
909 
911  // Try to match an unshifted extend on the LHS.
912  if (IsExtendedRegisterWorthFolding &&
913  (Ext = getExtendTypeForNode(LHS, true)) !=
915  Base = RHS;
916  Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
917  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
918  MVT::i32);
919  if (isWorthFolding(LHS))
920  return true;
921  }
922 
923  // Try to match an unshifted extend on the RHS.
924  if (IsExtendedRegisterWorthFolding &&
925  (Ext = getExtendTypeForNode(RHS, true)) !=
927  Base = LHS;
928  Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
929  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
930  MVT::i32);
931  if (isWorthFolding(RHS))
932  return true;
933  }
934 
935  return false;
936 }
937 
938 // Check if the given immediate is preferred by ADD. If an immediate can be
939 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
940 // encoded by one MOVZ, return true.
941 static bool isPreferredADD(int64_t ImmOff) {
942  // Constant in [0x0, 0xfff] can be encoded in ADD.
943  if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
944  return true;
945  // Check if it can be encoded in an "ADD LSL #12".
946  if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
947  // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
948  return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
949  (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
950  return false;
951 }
952 
953 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
954  SDValue &Base, SDValue &Offset,
955  SDValue &SignExtend,
956  SDValue &DoShift) {
957  if (N.getOpcode() != ISD::ADD)
958  return false;
959  SDValue LHS = N.getOperand(0);
960  SDValue RHS = N.getOperand(1);
961  SDLoc DL(N);
962 
963  // Check if this particular node is reused in any non-memory related
964  // operation. If yes, do not try to fold this node into the address
965  // computation, since the computation will be kept.
966  const SDNode *Node = N.getNode();
967  for (SDNode *UI : Node->uses()) {
968  if (!isa<MemSDNode>(*UI))
969  return false;
970  }
971 
972  // Watch out if RHS is a wide immediate, it can not be selected into
973  // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
974  // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
975  // instructions like:
976  // MOV X0, WideImmediate
977  // ADD X1, BaseReg, X0
978  // LDR X2, [X1, 0]
979  // For such situation, using [BaseReg, XReg] addressing mode can save one
980  // ADD/SUB:
981  // MOV X0, WideImmediate
982  // LDR X2, [BaseReg, X0]
983  if (isa<ConstantSDNode>(RHS)) {
984  int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
985  unsigned Scale = Log2_32(Size);
986  // Skip the immediate can be selected by load/store addressing mode.
987  // Also skip the immediate can be encoded by a single ADD (SUB is also
988  // checked by using -ImmOff).
989  if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
990  isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
991  return false;
992 
993  SDValue Ops[] = { RHS };
994  SDNode *MOVI =
995  CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
996  SDValue MOVIV = SDValue(MOVI, 0);
997  // This ADD of two X register will be selected into [Reg+Reg] mode.
998  N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
999  }
1000 
1001  // Remember if it is worth folding N when it produces extended register.
1002  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1003 
1004  // Try to match a shifted extend on the RHS.
1005  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1006  SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1007  Base = LHS;
1008  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1009  return true;
1010  }
1011 
1012  // Try to match a shifted extend on the LHS.
1013  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1014  SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1015  Base = RHS;
1016  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1017  return true;
1018  }
1019 
1020  // Match any non-shifted, non-extend, non-immediate add expression.
1021  Base = LHS;
1022  Offset = RHS;
1023  SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1024  DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1025  // Reg1 + Reg2 is free: no check needed.
1026  return true;
1027 }
1028 
1029 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1030  static const unsigned RegClassIDs[] = {
1031  AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1032  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1033  AArch64::dsub2, AArch64::dsub3};
1034 
1035  return createTuple(Regs, RegClassIDs, SubRegs);
1036 }
1037 
1038 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1039  static const unsigned RegClassIDs[] = {
1040  AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1041  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1042  AArch64::qsub2, AArch64::qsub3};
1043 
1044  return createTuple(Regs, RegClassIDs, SubRegs);
1045 }
1046 
1047 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1048  const unsigned RegClassIDs[],
1049  const unsigned SubRegs[]) {
1050  // There's no special register-class for a vector-list of 1 element: it's just
1051  // a vector.
1052  if (Regs.size() == 1)
1053  return Regs[0];
1054 
1055  assert(Regs.size() >= 2 && Regs.size() <= 4);
1056 
1057  SDLoc DL(Regs[0]);
1058 
1060 
1061  // First operand of REG_SEQUENCE is the desired RegClass.
1062  Ops.push_back(
1063  CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1064 
1065  // Then we get pairs of source & subregister-position for the components.
1066  for (unsigned i = 0; i < Regs.size(); ++i) {
1067  Ops.push_back(Regs[i]);
1068  Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1069  }
1070 
1071  SDNode *N =
1072  CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1073  return SDValue(N, 0);
1074 }
1075 
1076 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1077  bool isExt) {
1078  SDLoc dl(N);
1079  EVT VT = N->getValueType(0);
1080 
1081  unsigned ExtOff = isExt;
1082 
1083  // Form a REG_SEQUENCE to force register allocation.
1084  unsigned Vec0Off = ExtOff + 1;
1085  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1086  N->op_begin() + Vec0Off + NumVecs);
1087  SDValue RegSeq = createQTuple(Regs);
1088 
1090  if (isExt)
1091  Ops.push_back(N->getOperand(1));
1092  Ops.push_back(RegSeq);
1093  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1094  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1095 }
1096 
1097 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1098  LoadSDNode *LD = cast<LoadSDNode>(N);
1099  if (LD->isUnindexed())
1100  return false;
1101  EVT VT = LD->getMemoryVT();
1102  EVT DstVT = N->getValueType(0);
1104  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1105 
1106  // We're not doing validity checking here. That was done when checking
1107  // if we should mark the load as indexed or not. We're just selecting
1108  // the right instruction.
1109  unsigned Opcode = 0;
1110 
1112  bool InsertTo64 = false;
1113  if (VT == MVT::i64)
1114  Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1115  else if (VT == MVT::i32) {
1116  if (ExtType == ISD::NON_EXTLOAD)
1117  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1118  else if (ExtType == ISD::SEXTLOAD)
1119  Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1120  else {
1121  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1122  InsertTo64 = true;
1123  // The result of the load is only i32. It's the subreg_to_reg that makes
1124  // it into an i64.
1125  DstVT = MVT::i32;
1126  }
1127  } else if (VT == MVT::i16) {
1128  if (ExtType == ISD::SEXTLOAD) {
1129  if (DstVT == MVT::i64)
1130  Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1131  else
1132  Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1133  } else {
1134  Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1135  InsertTo64 = DstVT == MVT::i64;
1136  // The result of the load is only i32. It's the subreg_to_reg that makes
1137  // it into an i64.
1138  DstVT = MVT::i32;
1139  }
1140  } else if (VT == MVT::i8) {
1141  if (ExtType == ISD::SEXTLOAD) {
1142  if (DstVT == MVT::i64)
1143  Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1144  else
1145  Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1146  } else {
1147  Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1148  InsertTo64 = DstVT == MVT::i64;
1149  // The result of the load is only i32. It's the subreg_to_reg that makes
1150  // it into an i64.
1151  DstVT = MVT::i32;
1152  }
1153  } else if (VT == MVT::f16) {
1154  Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1155  } else if (VT == MVT::f32) {
1156  Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1157  } else if (VT == MVT::f64 || VT.is64BitVector()) {
1158  Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1159  } else if (VT.is128BitVector()) {
1160  Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1161  } else
1162  return false;
1163  SDValue Chain = LD->getChain();
1164  SDValue Base = LD->getBasePtr();
1165  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1166  int OffsetVal = (int)OffsetOp->getZExtValue();
1167  SDLoc dl(N);
1168  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1169  SDValue Ops[] = { Base, Offset, Chain };
1170  SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1171  MVT::Other, Ops);
1172  // Either way, we're replacing the node, so tell the caller that.
1173  SDValue LoadedVal = SDValue(Res, 1);
1174  if (InsertTo64) {
1175  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1176  LoadedVal =
1177  SDValue(CurDAG->getMachineNode(
1178  AArch64::SUBREG_TO_REG, dl, MVT::i64,
1179  CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1180  SubReg),
1181  0);
1182  }
1183 
1184  ReplaceUses(SDValue(N, 0), LoadedVal);
1185  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1186  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1187  CurDAG->RemoveDeadNode(N);
1188  return true;
1189 }
1190 
1191 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1192  unsigned SubRegIdx) {
1193  SDLoc dl(N);
1194  EVT VT = N->getValueType(0);
1195  SDValue Chain = N->getOperand(0);
1196 
1197  SDValue Ops[] = {N->getOperand(2), // Mem operand;
1198  Chain};
1199 
1200  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1201 
1202  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1203  SDValue SuperReg = SDValue(Ld, 0);
1204  for (unsigned i = 0; i < NumVecs; ++i)
1205  ReplaceUses(SDValue(N, i),
1206  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1207 
1208  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1209 
1210  // Transfer memoperands.
1211  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1212  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1213 
1214  CurDAG->RemoveDeadNode(N);
1215 }
1216 
1217 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1218  unsigned Opc, unsigned SubRegIdx) {
1219  SDLoc dl(N);
1220  EVT VT = N->getValueType(0);
1221  SDValue Chain = N->getOperand(0);
1222 
1223  SDValue Ops[] = {N->getOperand(1), // Mem operand
1224  N->getOperand(2), // Incremental
1225  Chain};
1226 
1227  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1229 
1230  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1231 
1232  // Update uses of write back register
1233  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1234 
1235  // Update uses of vector list
1236  SDValue SuperReg = SDValue(Ld, 1);
1237  if (NumVecs == 1)
1238  ReplaceUses(SDValue(N, 0), SuperReg);
1239  else
1240  for (unsigned i = 0; i < NumVecs; ++i)
1241  ReplaceUses(SDValue(N, i),
1242  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1243 
1244  // Update the chain
1245  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1246  CurDAG->RemoveDeadNode(N);
1247 }
1248 
1249 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1250  unsigned Opc) {
1251  SDLoc dl(N);
1252  EVT VT = N->getOperand(2)->getValueType(0);
1253 
1254  // Form a REG_SEQUENCE to force register allocation.
1255  bool Is128Bit = VT.getSizeInBits() == 128;
1256  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1257  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1258 
1259  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1260  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1261 
1262  // Transfer memoperands.
1263  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1264  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1265 
1266  ReplaceNode(N, St);
1267 }
1268 
1269 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1270  unsigned Opc) {
1271  SDLoc dl(N);
1272  EVT VT = N->getOperand(2)->getValueType(0);
1273  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1274  MVT::Other}; // Type for the Chain
1275 
1276  // Form a REG_SEQUENCE to force register allocation.
1277  bool Is128Bit = VT.getSizeInBits() == 128;
1278  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1279  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1280 
1281  SDValue Ops[] = {RegSeq,
1282  N->getOperand(NumVecs + 1), // base register
1283  N->getOperand(NumVecs + 2), // Incremental
1284  N->getOperand(0)}; // Chain
1285  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1286 
1287  ReplaceNode(N, St);
1288 }
1289 
1290 namespace {
1291 /// WidenVector - Given a value in the V64 register class, produce the
1292 /// equivalent value in the V128 register class.
1293 class WidenVector {
1294  SelectionDAG &DAG;
1295 
1296 public:
1297  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1298 
1299  SDValue operator()(SDValue V64Reg) {
1300  EVT VT = V64Reg.getValueType();
1301  unsigned NarrowSize = VT.getVectorNumElements();
1302  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1303  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1304  SDLoc DL(V64Reg);
1305 
1306  SDValue Undef =
1307  SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1308  return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1309  }
1310 };
1311 } // namespace
1312 
1313 /// NarrowVector - Given a value in the V128 register class, produce the
1314 /// equivalent value in the V64 register class.
1315 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1316  EVT VT = V128Reg.getValueType();
1317  unsigned WideSize = VT.getVectorNumElements();
1318  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1319  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1320 
1321  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1322  V128Reg);
1323 }
1324 
1325 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1326  unsigned Opc) {
1327  SDLoc dl(N);
1328  EVT VT = N->getValueType(0);
1329  bool Narrow = VT.getSizeInBits() == 64;
1330 
1331  // Form a REG_SEQUENCE to force register allocation.
1332  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1333 
1334  if (Narrow)
1335  transform(Regs, Regs.begin(),
1336  WidenVector(*CurDAG));
1337 
1338  SDValue RegSeq = createQTuple(Regs);
1339 
1340  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1341 
1342  unsigned LaneNo =
1343  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1344 
1345  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1346  N->getOperand(NumVecs + 3), N->getOperand(0)};
1347  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1348  SDValue SuperReg = SDValue(Ld, 0);
1349 
1350  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1351  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1352  AArch64::qsub2, AArch64::qsub3 };
1353  for (unsigned i = 0; i < NumVecs; ++i) {
1354  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1355  if (Narrow)
1356  NV = NarrowVector(NV, *CurDAG);
1357  ReplaceUses(SDValue(N, i), NV);
1358  }
1359 
1360  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1361  CurDAG->RemoveDeadNode(N);
1362 }
1363 
1364 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1365  unsigned Opc) {
1366  SDLoc dl(N);
1367  EVT VT = N->getValueType(0);
1368  bool Narrow = VT.getSizeInBits() == 64;
1369 
1370  // Form a REG_SEQUENCE to force register allocation.
1371  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1372 
1373  if (Narrow)
1374  transform(Regs, Regs.begin(),
1375  WidenVector(*CurDAG));
1376 
1377  SDValue RegSeq = createQTuple(Regs);
1378 
1379  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1380  RegSeq->getValueType(0), MVT::Other};
1381 
1382  unsigned LaneNo =
1383  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1384 
1385  SDValue Ops[] = {RegSeq,
1386  CurDAG->getTargetConstant(LaneNo, dl,
1387  MVT::i64), // Lane Number
1388  N->getOperand(NumVecs + 2), // Base register
1389  N->getOperand(NumVecs + 3), // Incremental
1390  N->getOperand(0)};
1391  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1392 
1393  // Update uses of the write back register
1394  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1395 
1396  // Update uses of the vector list
1397  SDValue SuperReg = SDValue(Ld, 1);
1398  if (NumVecs == 1) {
1399  ReplaceUses(SDValue(N, 0),
1400  Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1401  } else {
1402  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1403  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1404  AArch64::qsub2, AArch64::qsub3 };
1405  for (unsigned i = 0; i < NumVecs; ++i) {
1406  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1407  SuperReg);
1408  if (Narrow)
1409  NV = NarrowVector(NV, *CurDAG);
1410  ReplaceUses(SDValue(N, i), NV);
1411  }
1412  }
1413 
1414  // Update the Chain
1415  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1416  CurDAG->RemoveDeadNode(N);
1417 }
1418 
1419 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1420  unsigned Opc) {
1421  SDLoc dl(N);
1422  EVT VT = N->getOperand(2)->getValueType(0);
1423  bool Narrow = VT.getSizeInBits() == 64;
1424 
1425  // Form a REG_SEQUENCE to force register allocation.
1426  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1427 
1428  if (Narrow)
1429  transform(Regs, Regs.begin(),
1430  WidenVector(*CurDAG));
1431 
1432  SDValue RegSeq = createQTuple(Regs);
1433 
1434  unsigned LaneNo =
1435  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1436 
1437  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1438  N->getOperand(NumVecs + 3), N->getOperand(0)};
1439  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1440 
1441  // Transfer memoperands.
1442  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1443  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1444 
1445  ReplaceNode(N, St);
1446 }
1447 
1448 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1449  unsigned Opc) {
1450  SDLoc dl(N);
1451  EVT VT = N->getOperand(2)->getValueType(0);
1452  bool Narrow = VT.getSizeInBits() == 64;
1453 
1454  // Form a REG_SEQUENCE to force register allocation.
1455  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1456 
1457  if (Narrow)
1458  transform(Regs, Regs.begin(),
1459  WidenVector(*CurDAG));
1460 
1461  SDValue RegSeq = createQTuple(Regs);
1462 
1463  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1464  MVT::Other};
1465 
1466  unsigned LaneNo =
1467  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1468 
1469  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1470  N->getOperand(NumVecs + 2), // Base Register
1471  N->getOperand(NumVecs + 3), // Incremental
1472  N->getOperand(0)};
1473  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1474 
1475  // Transfer memoperands.
1476  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1477  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1478 
1479  ReplaceNode(N, St);
1480 }
1481 
1483  unsigned &Opc, SDValue &Opd0,
1484  unsigned &LSB, unsigned &MSB,
1485  unsigned NumberOfIgnoredLowBits,
1486  bool BiggerPattern) {
1487  assert(N->getOpcode() == ISD::AND &&
1488  "N must be a AND operation to call this function");
1489 
1490  EVT VT = N->getValueType(0);
1491 
1492  // Here we can test the type of VT and return false when the type does not
1493  // match, but since it is done prior to that call in the current context
1494  // we turned that into an assert to avoid redundant code.
1495  assert((VT == MVT::i32 || VT == MVT::i64) &&
1496  "Type checking must have been done before calling this function");
1497 
1498  // FIXME: simplify-demanded-bits in DAGCombine will probably have
1499  // changed the AND node to a 32-bit mask operation. We'll have to
1500  // undo that as part of the transform here if we want to catch all
1501  // the opportunities.
1502  // Currently the NumberOfIgnoredLowBits argument helps to recover
1503  // form these situations when matching bigger pattern (bitfield insert).
1504 
1505  // For unsigned extracts, check for a shift right and mask
1506  uint64_t AndImm = 0;
1507  if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
1508  return false;
1509 
1510  const SDNode *Op0 = N->getOperand(0).getNode();
1511 
1512  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1513  // simplified. Try to undo that
1514  AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
1515 
1516  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1517  if (AndImm & (AndImm + 1))
1518  return false;
1519 
1520  bool ClampMSB = false;
1521  uint64_t SrlImm = 0;
1522  // Handle the SRL + ANY_EXTEND case.
1523  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1524  isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
1525  // Extend the incoming operand of the SRL to 64-bit.
1526  Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1527  // Make sure to clamp the MSB so that we preserve the semantics of the
1528  // original operations.
1529  ClampMSB = true;
1530  } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1532  SrlImm)) {
1533  // If the shift result was truncated, we can still combine them.
1534  Opd0 = Op0->getOperand(0).getOperand(0);
1535 
1536  // Use the type of SRL node.
1537  VT = Opd0->getValueType(0);
1538  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
1539  Opd0 = Op0->getOperand(0);
1540  } else if (BiggerPattern) {
1541  // Let's pretend a 0 shift right has been performed.
1542  // The resulting code will be at least as good as the original one
1543  // plus it may expose more opportunities for bitfield insert pattern.
1544  // FIXME: Currently we limit this to the bigger pattern, because
1545  // some optimizations expect AND and not UBFM.
1546  Opd0 = N->getOperand(0);
1547  } else
1548  return false;
1549 
1550  // Bail out on large immediates. This happens when no proper
1551  // combining/constant folding was performed.
1552  if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
1553  LLVM_DEBUG(
1554  (dbgs() << N
1555  << ": Found large shift immediate, this should not happen\n"));
1556  return false;
1557  }
1558 
1559  LSB = SrlImm;
1560  MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
1561  : countTrailingOnes<uint64_t>(AndImm)) -
1562  1;
1563  if (ClampMSB)
1564  // Since we're moving the extend before the right shift operation, we need
1565  // to clamp the MSB to make sure we don't shift in undefined bits instead of
1566  // the zeros which would get shifted in with the original right shift
1567  // operation.
1568  MSB = MSB > 31 ? 31 : MSB;
1569 
1570  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1571  return true;
1572 }
1573 
1574 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
1575  SDValue &Opd0, unsigned &Immr,
1576  unsigned &Imms) {
1578 
1579  EVT VT = N->getValueType(0);
1580  unsigned BitWidth = VT.getSizeInBits();
1581  assert((VT == MVT::i32 || VT == MVT::i64) &&
1582  "Type checking must have been done before calling this function");
1583 
1584  SDValue Op = N->getOperand(0);
1585  if (Op->getOpcode() == ISD::TRUNCATE) {
1586  Op = Op->getOperand(0);
1587  VT = Op->getValueType(0);
1588  BitWidth = VT.getSizeInBits();
1589  }
1590 
1591  uint64_t ShiftImm;
1592  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
1593  !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1594  return false;
1595 
1596  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1597  if (ShiftImm + Width > BitWidth)
1598  return false;
1599 
1600  Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
1601  Opd0 = Op.getOperand(0);
1602  Immr = ShiftImm;
1603  Imms = ShiftImm + Width - 1;
1604  return true;
1605 }
1606 
1607 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
1608  SDValue &Opd0, unsigned &LSB,
1609  unsigned &MSB) {
1610  // We are looking for the following pattern which basically extracts several
1611  // continuous bits from the source value and places it from the LSB of the
1612  // destination value, all other bits of the destination value or set to zero:
1613  //
1614  // Value2 = AND Value, MaskImm
1615  // SRL Value2, ShiftImm
1616  //
1617  // with MaskImm >> ShiftImm to search for the bit width.
1618  //
1619  // This gets selected into a single UBFM:
1620  //
1621  // UBFM Value, ShiftImm, BitWide + SrlImm -1
1622  //
1623 
1624  if (N->getOpcode() != ISD::SRL)
1625  return false;
1626 
1627  uint64_t AndMask = 0;
1628  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
1629  return false;
1630 
1631  Opd0 = N->getOperand(0).getOperand(0);
1632 
1633  uint64_t SrlImm = 0;
1634  if (!isIntImmediate(N->getOperand(1), SrlImm))
1635  return false;
1636 
1637  // Check whether we really have several bits extract here.
1638  unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
1639  if (BitWide && isMask_64(AndMask >> SrlImm)) {
1640  if (N->getValueType(0) == MVT::i32)
1641  Opc = AArch64::UBFMWri;
1642  else
1643  Opc = AArch64::UBFMXri;
1644 
1645  LSB = SrlImm;
1646  MSB = BitWide + SrlImm - 1;
1647  return true;
1648  }
1649 
1650  return false;
1651 }
1652 
1653 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1654  unsigned &Immr, unsigned &Imms,
1655  bool BiggerPattern) {
1656  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1657  "N must be a SHR/SRA operation to call this function");
1658 
1659  EVT VT = N->getValueType(0);
1660 
1661  // Here we can test the type of VT and return false when the type does not
1662  // match, but since it is done prior to that call in the current context
1663  // we turned that into an assert to avoid redundant code.
1664  assert((VT == MVT::i32 || VT == MVT::i64) &&
1665  "Type checking must have been done before calling this function");
1666 
1667  // Check for AND + SRL doing several bits extract.
1668  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
1669  return true;
1670 
1671  // We're looking for a shift of a shift.
1672  uint64_t ShlImm = 0;
1673  uint64_t TruncBits = 0;
1674  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
1675  Opd0 = N->getOperand(0).getOperand(0);
1676  } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1677  N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1678  // We are looking for a shift of truncate. Truncate from i64 to i32 could
1679  // be considered as setting high 32 bits as zero. Our strategy here is to
1680  // always generate 64bit UBFM. This consistency will help the CSE pass
1681  // later find more redundancy.
1682  Opd0 = N->getOperand(0).getOperand(0);
1683  TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1684  VT = Opd0.getValueType();
1685  assert(VT == MVT::i64 && "the promoted type should be i64");
1686  } else if (BiggerPattern) {
1687  // Let's pretend a 0 shift left has been performed.
1688  // FIXME: Currently we limit this to the bigger pattern case,
1689  // because some optimizations expect AND and not UBFM
1690  Opd0 = N->getOperand(0);
1691  } else
1692  return false;
1693 
1694  // Missing combines/constant folding may have left us with strange
1695  // constants.
1696  if (ShlImm >= VT.getSizeInBits()) {
1697  LLVM_DEBUG(
1698  (dbgs() << N
1699  << ": Found large shift immediate, this should not happen\n"));
1700  return false;
1701  }
1702 
1703  uint64_t SrlImm = 0;
1704  if (!isIntImmediate(N->getOperand(1), SrlImm))
1705  return false;
1706 
1707  assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
1708  "bad amount in shift node!");
1709  int immr = SrlImm - ShlImm;
1710  Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
1711  Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
1712  // SRA requires a signed extraction
1713  if (VT == MVT::i32)
1714  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1715  else
1716  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1717  return true;
1718 }
1719 
1720 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
1722 
1723  EVT VT = N->getValueType(0);
1724  EVT NarrowVT = N->getOperand(0)->getValueType(0);
1725  if (VT != MVT::i64 || NarrowVT != MVT::i32)
1726  return false;
1727 
1728  uint64_t ShiftImm;
1729  SDValue Op = N->getOperand(0);
1730  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1731  return false;
1732 
1733  SDLoc dl(N);
1734  // Extend the incoming operand of the shift to 64-bits.
1735  SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
1736  unsigned Immr = ShiftImm;
1737  unsigned Imms = NarrowVT.getSizeInBits() - 1;
1738  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1739  CurDAG->getTargetConstant(Imms, dl, VT)};
1740  CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
1741  return true;
1742 }
1743 
1744 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
1745  SDValue &Opd0, unsigned &Immr, unsigned &Imms,
1746  unsigned NumberOfIgnoredLowBits = 0,
1747  bool BiggerPattern = false) {
1748  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
1749  return false;
1750 
1751  switch (N->getOpcode()) {
1752  default:
1753  if (!N->isMachineOpcode())
1754  return false;
1755  break;
1756  case ISD::AND:
1757  return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
1758  NumberOfIgnoredLowBits, BiggerPattern);
1759  case ISD::SRL:
1760  case ISD::SRA:
1761  return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
1762 
1764  return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
1765  }
1766 
1767  unsigned NOpc = N->getMachineOpcode();
1768  switch (NOpc) {
1769  default:
1770  return false;
1771  case AArch64::SBFMWri:
1772  case AArch64::UBFMWri:
1773  case AArch64::SBFMXri:
1774  case AArch64::UBFMXri:
1775  Opc = NOpc;
1776  Opd0 = N->getOperand(0);
1777  Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
1778  Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
1779  return true;
1780  }
1781  // Unreachable
1782  return false;
1783 }
1784 
1785 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
1786  unsigned Opc, Immr, Imms;
1787  SDValue Opd0;
1788  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
1789  return false;
1790 
1791  EVT VT = N->getValueType(0);
1792  SDLoc dl(N);
1793 
1794  // If the bit extract operation is 64bit but the original type is 32bit, we
1795  // need to add one EXTRACT_SUBREG.
1796  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
1797  SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
1798  CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
1799 
1800  SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
1801  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1802  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
1803  MVT::i32, SDValue(BFM, 0), SubReg));
1804  return true;
1805  }
1806 
1807  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1808  CurDAG->getTargetConstant(Imms, dl, VT)};
1809  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1810  return true;
1811 }
1812 
1813 /// Does DstMask form a complementary pair with the mask provided by
1814 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
1815 /// this asks whether DstMask zeroes precisely those bits that will be set by
1816 /// the other half.
1817 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
1818  unsigned NumberOfIgnoredHighBits, EVT VT) {
1819  assert((VT == MVT::i32 || VT == MVT::i64) &&
1820  "i32 or i64 mask type expected!");
1821  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
1822 
1823  APInt SignificantDstMask = APInt(BitWidth, DstMask);
1824  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
1825 
1826  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
1827  (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
1828 }
1829 
1830 // Look for bits that will be useful for later uses.
1831 // A bit is consider useless as soon as it is dropped and never used
1832 // before it as been dropped.
1833 // E.g., looking for useful bit of x
1834 // 1. y = x & 0x7
1835 // 2. z = y >> 2
1836 // After #1, x useful bits are 0x7, then the useful bits of x, live through
1837 // y.
1838 // After #2, the useful bits of x are 0x4.
1839 // However, if x is used on an unpredicatable instruction, then all its bits
1840 // are useful.
1841 // E.g.
1842 // 1. y = x & 0x7
1843 // 2. z = y >> 2
1844 // 3. str x, [@x]
1845 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
1846 
1848  unsigned Depth) {
1849  uint64_t Imm =
1850  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1851  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
1852  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
1853  getUsefulBits(Op, UsefulBits, Depth + 1);
1854 }
1855 
1856 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
1857  uint64_t Imm, uint64_t MSB,
1858  unsigned Depth) {
1859  // inherit the bitwidth value
1860  APInt OpUsefulBits(UsefulBits);
1861  OpUsefulBits = 1;
1862 
1863  if (MSB >= Imm) {
1864  OpUsefulBits <<= MSB - Imm + 1;
1865  --OpUsefulBits;
1866  // The interesting part will be in the lower part of the result
1867  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1868  // The interesting part was starting at Imm in the argument
1869  OpUsefulBits <<= Imm;
1870  } else {
1871  OpUsefulBits <<= MSB + 1;
1872  --OpUsefulBits;
1873  // The interesting part will be shifted in the result
1874  OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
1875  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1876  // The interesting part was at zero in the argument
1877  OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
1878  }
1879 
1880  UsefulBits &= OpUsefulBits;
1881 }
1882 
1883 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
1884  unsigned Depth) {
1885  uint64_t Imm =
1886  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1887  uint64_t MSB =
1888  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1889 
1890  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1891 }
1892 
1894  unsigned Depth) {
1895  uint64_t ShiftTypeAndValue =
1896  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1897  APInt Mask(UsefulBits);
1898  Mask.clearAllBits();
1899  Mask.flipAllBits();
1900 
1901  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
1902  // Shift Left
1903  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1904  Mask <<= ShiftAmt;
1905  getUsefulBits(Op, Mask, Depth + 1);
1906  Mask.lshrInPlace(ShiftAmt);
1907  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
1908  // Shift Right
1909  // We do not handle AArch64_AM::ASR, because the sign will change the
1910  // number of useful bits
1911  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1912  Mask.lshrInPlace(ShiftAmt);
1913  getUsefulBits(Op, Mask, Depth + 1);
1914  Mask <<= ShiftAmt;
1915  } else
1916  return;
1917 
1918  UsefulBits &= Mask;
1919 }
1920 
1921 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
1922  unsigned Depth) {
1923  uint64_t Imm =
1924  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1925  uint64_t MSB =
1926  cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
1927 
1928  APInt OpUsefulBits(UsefulBits);
1929  OpUsefulBits = 1;
1930 
1931  APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
1932  ResultUsefulBits.flipAllBits();
1933  APInt Mask(UsefulBits.getBitWidth(), 0);
1934 
1935  getUsefulBits(Op, ResultUsefulBits, Depth + 1);
1936 
1937  if (MSB >= Imm) {
1938  // The instruction is a BFXIL.
1939  uint64_t Width = MSB - Imm + 1;
1940  uint64_t LSB = Imm;
1941 
1942  OpUsefulBits <<= Width;
1943  --OpUsefulBits;
1944 
1945  if (Op.getOperand(1) == Orig) {
1946  // Copy the low bits from the result to bits starting from LSB.
1947  Mask = ResultUsefulBits & OpUsefulBits;
1948  Mask <<= LSB;
1949  }
1950 
1951  if (Op.getOperand(0) == Orig)
1952  // Bits starting from LSB in the input contribute to the result.
1953  Mask |= (ResultUsefulBits & ~OpUsefulBits);
1954  } else {
1955  // The instruction is a BFI.
1956  uint64_t Width = MSB + 1;
1957  uint64_t LSB = UsefulBits.getBitWidth() - Imm;
1958 
1959  OpUsefulBits <<= Width;
1960  --OpUsefulBits;
1961  OpUsefulBits <<= LSB;
1962 
1963  if (Op.getOperand(1) == Orig) {
1964  // Copy the bits from the result to the zero bits.
1965  Mask = ResultUsefulBits & OpUsefulBits;
1966  Mask.lshrInPlace(LSB);
1967  }
1968 
1969  if (Op.getOperand(0) == Orig)
1970  Mask |= (ResultUsefulBits & ~OpUsefulBits);
1971  }
1972 
1973  UsefulBits &= Mask;
1974 }
1975 
1976 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
1977  SDValue Orig, unsigned Depth) {
1978 
1979  // Users of this node should have already been instruction selected
1980  // FIXME: Can we turn that into an assert?
1981  if (!UserNode->isMachineOpcode())
1982  return;
1983 
1984  switch (UserNode->getMachineOpcode()) {
1985  default:
1986  return;
1987  case AArch64::ANDSWri:
1988  case AArch64::ANDSXri:
1989  case AArch64::ANDWri:
1990  case AArch64::ANDXri:
1991  // We increment Depth only when we call the getUsefulBits
1992  return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
1993  Depth);
1994  case AArch64::UBFMWri:
1995  case AArch64::UBFMXri:
1996  return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
1997 
1998  case AArch64::ORRWrs:
1999  case AArch64::ORRXrs:
2000  if (UserNode->getOperand(1) != Orig)
2001  return;
2002  return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2003  Depth);
2004  case AArch64::BFMWri:
2005  case AArch64::BFMXri:
2006  return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2007 
2008  case AArch64::STRBBui:
2009  case AArch64::STURBBi:
2010  if (UserNode->getOperand(0) != Orig)
2011  return;
2012  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2013  return;
2014 
2015  case AArch64::STRHHui:
2016  case AArch64::STURHHi:
2017  if (UserNode->getOperand(0) != Orig)
2018  return;
2019  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2020  return;
2021  }
2022 }
2023 
2024 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2025  if (Depth >= 6)
2026  return;
2027  // Initialize UsefulBits
2028  if (!Depth) {
2029  unsigned Bitwidth = Op.getScalarValueSizeInBits();
2030  // At the beginning, assume every produced bits is useful
2031  UsefulBits = APInt(Bitwidth, 0);
2032  UsefulBits.flipAllBits();
2033  }
2034  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2035 
2036  for (SDNode *Node : Op.getNode()->uses()) {
2037  // A use cannot produce useful bits
2038  APInt UsefulBitsForUse = APInt(UsefulBits);
2039  getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2040  UsersUsefulBits |= UsefulBitsForUse;
2041  }
2042  // UsefulBits contains the produced bits that are meaningful for the
2043  // current definition, thus a user cannot make a bit meaningful at
2044  // this point
2045  UsefulBits &= UsersUsefulBits;
2046 }
2047 
2048 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
2049 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2050 /// 0, return Op unchanged.
2051 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2052  if (ShlAmount == 0)
2053  return Op;
2054 
2055  EVT VT = Op.getValueType();
2056  SDLoc dl(Op);
2057  unsigned BitWidth = VT.getSizeInBits();
2058  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2059 
2060  SDNode *ShiftNode;
2061  if (ShlAmount > 0) {
2062  // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2063  ShiftNode = CurDAG->getMachineNode(
2064  UBFMOpc, dl, VT, Op,
2065  CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2066  CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2067  } else {
2068  // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2069  assert(ShlAmount < 0 && "expected right shift");
2070  int ShrAmount = -ShlAmount;
2071  ShiftNode = CurDAG->getMachineNode(
2072  UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2073  CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2074  }
2075 
2076  return SDValue(ShiftNode, 0);
2077 }
2078 
2079 /// Does this tree qualify as an attempt to move a bitfield into position,
2080 /// essentially "(and (shl VAL, N), Mask)".
2082  bool BiggerPattern,
2083  SDValue &Src, int &ShiftAmount,
2084  int &MaskWidth) {
2085  EVT VT = Op.getValueType();
2086  unsigned BitWidth = VT.getSizeInBits();
2087  (void)BitWidth;
2088  assert(BitWidth == 32 || BitWidth == 64);
2089 
2090  KnownBits Known;
2091  CurDAG->computeKnownBits(Op, Known);
2092 
2093  // Non-zero in the sense that they're not provably zero, which is the key
2094  // point if we want to use this value
2095  uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2096 
2097  // Discard a constant AND mask if present. It's safe because the node will
2098  // already have been factored into the computeKnownBits calculation above.
2099  uint64_t AndImm;
2100  if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
2101  assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
2102  Op = Op.getOperand(0);
2103  }
2104 
2105  // Don't match if the SHL has more than one use, since then we'll end up
2106  // generating SHL+UBFIZ instead of just keeping SHL+AND.
2107  if (!BiggerPattern && !Op.hasOneUse())
2108  return false;
2109 
2110  uint64_t ShlImm;
2111  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2112  return false;
2113  Op = Op.getOperand(0);
2114 
2115  if (!isShiftedMask_64(NonZeroBits))
2116  return false;
2117 
2118  ShiftAmount = countTrailingZeros(NonZeroBits);
2119  MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
2120 
2121  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2122  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2123  // amount. BiggerPattern is true when this pattern is being matched for BFI,
2124  // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2125  // which case it is not profitable to insert an extra shift.
2126  if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
2127  return false;
2128  Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
2129 
2130  return true;
2131 }
2132 
2133 static bool isShiftedMask(uint64_t Mask, EVT VT) {
2134  assert(VT == MVT::i32 || VT == MVT::i64);
2135  if (VT == MVT::i32)
2136  return isShiftedMask_32(Mask);
2137  return isShiftedMask_64(Mask);
2138 }
2139 
2140 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
2141 // inserted only sets known zero bits.
2143  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2144 
2145  EVT VT = N->getValueType(0);
2146  if (VT != MVT::i32 && VT != MVT::i64)
2147  return false;
2148 
2149  unsigned BitWidth = VT.getSizeInBits();
2150 
2151  uint64_t OrImm;
2152  if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
2153  return false;
2154 
2155  // Skip this transformation if the ORR immediate can be encoded in the ORR.
2156  // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2157  // performance neutral.
2158  if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
2159  return false;
2160 
2161  uint64_t MaskImm;
2162  SDValue And = N->getOperand(0);
2163  // Must be a single use AND with an immediate operand.
2164  if (!And.hasOneUse() ||
2165  !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
2166  return false;
2167 
2168  // Compute the Known Zero for the AND as this allows us to catch more general
2169  // cases than just looking for AND with imm.
2170  KnownBits Known;
2171  CurDAG->computeKnownBits(And, Known);
2172 
2173  // Non-zero in the sense that they're not provably zero, which is the key
2174  // point if we want to use this value.
2175  uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
2176 
2177  // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2178  if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
2179  return false;
2180 
2181  // The bits being inserted must only set those bits that are known to be zero.
2182  if ((OrImm & NotKnownZero) != 0) {
2183  // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2184  // currently handle this case.
2185  return false;
2186  }
2187 
2188  // BFI/BFXIL dst, src, #lsb, #width.
2189  int LSB = countTrailingOnes(NotKnownZero);
2190  int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2191 
2192  // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2193  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2194  unsigned ImmS = Width - 1;
2195 
2196  // If we're creating a BFI instruction avoid cases where we need more
2197  // instructions to materialize the BFI constant as compared to the original
2198  // ORR. A BFXIL will use the same constant as the original ORR, so the code
2199  // should be no worse in this case.
2200  bool IsBFI = LSB != 0;
2201  uint64_t BFIImm = OrImm >> LSB;
2202  if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
2203  // We have a BFI instruction and we know the constant can't be materialized
2204  // with a ORR-immediate with the zero register.
2205  unsigned OrChunks = 0, BFIChunks = 0;
2206  for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
2207  if (((OrImm >> Shift) & 0xFFFF) != 0)
2208  ++OrChunks;
2209  if (((BFIImm >> Shift) & 0xFFFF) != 0)
2210  ++BFIChunks;
2211  }
2212  if (BFIChunks > OrChunks)
2213  return false;
2214  }
2215 
2216  // Materialize the constant to be inserted.
2217  SDLoc DL(N);
2218  unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2219  SDNode *MOVI = CurDAG->getMachineNode(
2220  MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2221 
2222  // Create the BFI/BFXIL instruction.
2223  SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2224  CurDAG->getTargetConstant(ImmR, DL, VT),
2225  CurDAG->getTargetConstant(ImmS, DL, VT)};
2226  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2227  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2228  return true;
2229 }
2230 
2231 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
2232  SelectionDAG *CurDAG) {
2233  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2234 
2235  EVT VT = N->getValueType(0);
2236  if (VT != MVT::i32 && VT != MVT::i64)
2237  return false;
2238 
2239  unsigned BitWidth = VT.getSizeInBits();
2240 
2241  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
2242  // have the expected shape. Try to undo that.
2243 
2244  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
2245  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
2246 
2247  // Given a OR operation, check if we have the following pattern
2248  // ubfm c, b, imm, imm2 (or something that does the same jobs, see
2249  // isBitfieldExtractOp)
2250  // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
2251  // countTrailingZeros(mask2) == imm2 - imm + 1
2252  // f = d | c
2253  // if yes, replace the OR instruction with:
2254  // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
2255 
2256  // OR is commutative, check all combinations of operand order and values of
2257  // BiggerPattern, i.e.
2258  // Opd0, Opd1, BiggerPattern=false
2259  // Opd1, Opd0, BiggerPattern=false
2260  // Opd0, Opd1, BiggerPattern=true
2261  // Opd1, Opd0, BiggerPattern=true
2262  // Several of these combinations may match, so check with BiggerPattern=false
2263  // first since that will produce better results by matching more instructions
2264  // and/or inserting fewer extra instructions.
2265  for (int I = 0; I < 4; ++I) {
2266 
2267  SDValue Dst, Src;
2268  unsigned ImmR, ImmS;
2269  bool BiggerPattern = I / 2;
2270  SDValue OrOpd0Val = N->getOperand(I % 2);
2271  SDNode *OrOpd0 = OrOpd0Val.getNode();
2272  SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
2273  SDNode *OrOpd1 = OrOpd1Val.getNode();
2274 
2275  unsigned BFXOpc;
2276  int DstLSB, Width;
2277  if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
2278  NumberOfIgnoredLowBits, BiggerPattern)) {
2279  // Check that the returned opcode is compatible with the pattern,
2280  // i.e., same type and zero extended (U and not S)
2281  if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
2282  (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
2283  continue;
2284 
2285  // Compute the width of the bitfield insertion
2286  DstLSB = 0;
2287  Width = ImmS - ImmR + 1;
2288  // FIXME: This constraint is to catch bitfield insertion we may
2289  // want to widen the pattern if we want to grab general bitfied
2290  // move case
2291  if (Width <= 0)
2292  continue;
2293 
2294  // If the mask on the insertee is correct, we have a BFXIL operation. We
2295  // can share the ImmR and ImmS values from the already-computed UBFM.
2296  } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
2297  BiggerPattern,
2298  Src, DstLSB, Width)) {
2299  ImmR = (BitWidth - DstLSB) % BitWidth;
2300  ImmS = Width - 1;
2301  } else
2302  continue;
2303 
2304  // Check the second part of the pattern
2305  EVT VT = OrOpd1Val.getValueType();
2306  assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
2307 
2308  // Compute the Known Zero for the candidate of the first operand.
2309  // This allows to catch more general case than just looking for
2310  // AND with imm. Indeed, simplify-demanded-bits may have removed
2311  // the AND instruction because it proves it was useless.
2312  KnownBits Known;
2313  CurDAG->computeKnownBits(OrOpd1Val, Known);
2314 
2315  // Check if there is enough room for the second operand to appear
2316  // in the first one
2317  APInt BitsToBeInserted =
2318  APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
2319 
2320  if ((BitsToBeInserted & ~Known.Zero) != 0)
2321  continue;
2322 
2323  // Set the first operand
2324  uint64_t Imm;
2325  if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
2326  isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
2327  // In that case, we can eliminate the AND
2328  Dst = OrOpd1->getOperand(0);
2329  else
2330  // Maybe the AND has been removed by simplify-demanded-bits
2331  // or is useful because it discards more bits
2332  Dst = OrOpd1Val;
2333 
2334  // both parts match
2335  SDLoc DL(N);
2336  SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
2337  CurDAG->getTargetConstant(ImmS, DL, VT)};
2338  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2339  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2340  return true;
2341  }
2342 
2343  // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
2344  // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
2345  // mask (e.g., 0x000ffff0).
2346  uint64_t Mask0Imm, Mask1Imm;
2347  SDValue And0 = N->getOperand(0);
2348  SDValue And1 = N->getOperand(1);
2349  if (And0.hasOneUse() && And1.hasOneUse() &&
2350  isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
2351  isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
2352  APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
2353  (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
2354 
2355  // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
2356  // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
2357  // bits to be inserted.
2358  if (isShiftedMask(Mask0Imm, VT)) {
2359  std::swap(And0, And1);
2360  std::swap(Mask0Imm, Mask1Imm);
2361  }
2362 
2363  SDValue Src = And1->getOperand(0);
2364  SDValue Dst = And0->getOperand(0);
2365  unsigned LSB = countTrailingZeros(Mask1Imm);
2366  int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
2367 
2368  // The BFXIL inserts the low-order bits from a source register, so right
2369  // shift the needed bits into place.
2370  SDLoc DL(N);
2371  unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2372  SDNode *LSR = CurDAG->getMachineNode(
2373  ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
2374  CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
2375 
2376  // BFXIL is an alias of BFM, so translate to BFM operands.
2377  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2378  unsigned ImmS = Width - 1;
2379 
2380  // Create the BFXIL instruction.
2381  SDValue Ops[] = {Dst, SDValue(LSR, 0),
2382  CurDAG->getTargetConstant(ImmR, DL, VT),
2383  CurDAG->getTargetConstant(ImmS, DL, VT)};
2384  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2385  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2386  return true;
2387  }
2388 
2389  return false;
2390 }
2391 
2392 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
2393  if (N->getOpcode() != ISD::OR)
2394  return false;
2395 
2396  APInt NUsefulBits;
2397  getUsefulBits(SDValue(N, 0), NUsefulBits);
2398 
2399  // If all bits are not useful, just return UNDEF.
2400  if (!NUsefulBits) {
2401  CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
2402  return true;
2403  }
2404 
2405  if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
2406  return true;
2407 
2408  return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
2409 }
2410 
2411 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
2412 /// equivalent of a left shift by a constant amount followed by an and masking
2413 /// out a contiguous set of bits.
2414 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
2415  if (N->getOpcode() != ISD::AND)
2416  return false;
2417 
2418  EVT VT = N->getValueType(0);
2419  if (VT != MVT::i32 && VT != MVT::i64)
2420  return false;
2421 
2422  SDValue Op0;
2423  int DstLSB, Width;
2424  if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
2425  Op0, DstLSB, Width))
2426  return false;
2427 
2428  // ImmR is the rotate right amount.
2429  unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
2430  // ImmS is the most significant bit of the source to be moved.
2431  unsigned ImmS = Width - 1;
2432 
2433  SDLoc DL(N);
2434  SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
2435  CurDAG->getTargetConstant(ImmS, DL, VT)};
2436  unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2437  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2438  return true;
2439 }
2440 
2441 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
2442 /// variable shift/rotate instructions.
2443 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
2444  EVT VT = N->getValueType(0);
2445 
2446  unsigned Opc;
2447  switch (N->getOpcode()) {
2448  case ISD::ROTR:
2449  Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
2450  break;
2451  case ISD::SHL:
2452  Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
2453  break;
2454  case ISD::SRL:
2455  Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
2456  break;
2457  case ISD::SRA:
2458  Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
2459  break;
2460  default:
2461  return false;
2462  }
2463 
2464  uint64_t Size;
2465  uint64_t Bits;
2466  if (VT == MVT::i32) {
2467  Bits = 5;
2468  Size = 32;
2469  } else if (VT == MVT::i64) {
2470  Bits = 6;
2471  Size = 64;
2472  } else
2473  return false;
2474 
2475  SDValue ShiftAmt = N->getOperand(1);
2476  SDLoc DL(N);
2477  SDValue NewShiftAmt;
2478 
2479  // Skip over an extend of the shift amount.
2480  if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
2481  ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
2482  ShiftAmt = ShiftAmt->getOperand(0);
2483 
2484  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
2485  SDValue Add0 = ShiftAmt->getOperand(0);
2486  SDValue Add1 = ShiftAmt->getOperand(1);
2487  uint64_t Add0Imm;
2488  uint64_t Add1Imm;
2489  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
2490  // to avoid the ADD/SUB.
2491  if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0))
2492  NewShiftAmt = Add0;
2493  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2494  // generate a NEG instead of a SUB of a constant.
2495  else if (ShiftAmt->getOpcode() == ISD::SUB &&
2496  isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
2497  (Add0Imm % Size == 0)) {
2498  unsigned NegOpc;
2499  unsigned ZeroReg;
2500  EVT SubVT = ShiftAmt->getValueType(0);
2501  if (SubVT == MVT::i32) {
2502  NegOpc = AArch64::SUBWrr;
2503  ZeroReg = AArch64::WZR;
2504  } else {
2505  assert(SubVT == MVT::i64);
2506  NegOpc = AArch64::SUBXrr;
2507  ZeroReg = AArch64::XZR;
2508  }
2509  SDValue Zero =
2510  CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
2511  MachineSDNode *Neg =
2512  CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
2513  NewShiftAmt = SDValue(Neg, 0);
2514  } else
2515  return false;
2516  } else {
2517  // If the shift amount is masked with an AND, check that the mask covers the
2518  // bits that are implicitly ANDed off by the above opcodes and if so, skip
2519  // the AND.
2520  uint64_t MaskImm;
2521  if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm))
2522  return false;
2523 
2524  if (countTrailingOnes(MaskImm) < Bits)
2525  return false;
2526 
2527  NewShiftAmt = ShiftAmt->getOperand(0);
2528  }
2529 
2530  // Narrow/widen the shift amount to match the size of the shift operation.
2531  if (VT == MVT::i32)
2532  NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
2533  else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
2534  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
2535  MachineSDNode *Ext = CurDAG->getMachineNode(
2536  AArch64::SUBREG_TO_REG, DL, VT,
2537  CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
2538  NewShiftAmt = SDValue(Ext, 0);
2539  }
2540 
2541  SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
2542  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2543  return true;
2544 }
2545 
2546 bool
2547 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2548  unsigned RegWidth) {
2549  APFloat FVal(0.0);
2550  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2551  FVal = CN->getValueAPF();
2552  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2553  // Some otherwise illegal constants are allowed in this case.
2554  if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2555  !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2556  return false;
2557 
2558  ConstantPoolSDNode *CN =
2559  dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2560  FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2561  } else
2562  return false;
2563 
2564  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2565  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2566  // x-register.
2567  //
2568  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2569  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2570  // integers.
2571  bool IsExact;
2572 
2573  // fbits is between 1 and 64 in the worst-case, which means the fmul
2574  // could have 2^64 as an actual operand. Need 65 bits of precision.
2575  APSInt IntVal(65, true);
2576  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2577 
2578  // N.b. isPowerOf2 also checks for > 0.
2579  if (!IsExact || !IntVal.isPowerOf2()) return false;
2580  unsigned FBits = IntVal.logBase2();
2581 
2582  // Checks above should have guaranteed that we haven't lost information in
2583  // finding FBits, but it must still be in range.
2584  if (FBits == 0 || FBits > RegWidth) return false;
2585 
2586  FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
2587  return true;
2588 }
2589 
2590 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
2591 // of the string and obtains the integer values from them and combines these
2592 // into a single value to be used in the MRS/MSR instruction.
2595  RegString.split(Fields, ':');
2596 
2597  if (Fields.size() == 1)
2598  return -1;
2599 
2600  assert(Fields.size() == 5
2601  && "Invalid number of fields in read register string");
2602 
2603  SmallVector<int, 5> Ops;
2604  bool AllIntFields = true;
2605 
2606  for (StringRef Field : Fields) {
2607  unsigned IntField;
2608  AllIntFields &= !Field.getAsInteger(10, IntField);
2609  Ops.push_back(IntField);
2610  }
2611 
2612  assert(AllIntFields &&
2613  "Unexpected non-integer value in special register string.");
2614 
2615  // Need to combine the integer fields of the string into a single value
2616  // based on the bit encoding of MRS/MSR instruction.
2617  return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2618  (Ops[3] << 3) | (Ops[4]);
2619 }
2620 
2621 // Lower the read_register intrinsic to an MRS instruction node if the special
2622 // register string argument is either of the form detailed in the ALCE (the
2623 // form described in getIntOperandsFromRegsterString) or is a named register
2624 // known by the MRS SysReg mapper.
2625 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
2626  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2627  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2628  SDLoc DL(N);
2629 
2630  int Reg = getIntOperandFromRegisterString(RegString->getString());
2631  if (Reg != -1) {
2632  ReplaceNode(N, CurDAG->getMachineNode(
2633  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2634  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2635  N->getOperand(0)));
2636  return true;
2637  }
2638 
2639  // Use the sysreg mapper to map the remaining possible strings to the
2640  // value for the register to be used for the instruction operand.
2641  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2642  if (TheReg && TheReg->Readable &&
2643  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2644  Reg = TheReg->Encoding;
2645  else
2646  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2647 
2648  if (Reg != -1) {
2649  ReplaceNode(N, CurDAG->getMachineNode(
2650  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2651  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2652  N->getOperand(0)));
2653  return true;
2654  }
2655 
2656  return false;
2657 }
2658 
2659 // Lower the write_register intrinsic to an MSR instruction node if the special
2660 // register string argument is either of the form detailed in the ALCE (the
2661 // form described in getIntOperandsFromRegsterString) or is a named register
2662 // known by the MSR SysReg mapper.
2663 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
2664  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2665  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2666  SDLoc DL(N);
2667 
2668  int Reg = getIntOperandFromRegisterString(RegString->getString());
2669  if (Reg != -1) {
2670  ReplaceNode(
2671  N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2672  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2673  N->getOperand(2), N->getOperand(0)));
2674  return true;
2675  }
2676 
2677  // Check if the register was one of those allowed as the pstatefield value in
2678  // the MSR (immediate) instruction. To accept the values allowed in the
2679  // pstatefield for the MSR (immediate) instruction, we also require that an
2680  // immediate value has been provided as an argument, we know that this is
2681  // the case as it has been ensured by semantic checking.
2682  auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
2683  if (PMapper) {
2684  assert (isa<ConstantSDNode>(N->getOperand(2))
2685  && "Expected a constant integer expression.");
2686  unsigned Reg = PMapper->Encoding;
2687  uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
2688  unsigned State;
2689  if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) {
2690  assert(Immed < 2 && "Bad imm");
2691  State = AArch64::MSRpstateImm1;
2692  } else {
2693  assert(Immed < 16 && "Bad imm");
2694  State = AArch64::MSRpstateImm4;
2695  }
2696  ReplaceNode(N, CurDAG->getMachineNode(
2697  State, DL, MVT::Other,
2698  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2699  CurDAG->getTargetConstant(Immed, DL, MVT::i16),
2700  N->getOperand(0)));
2701  return true;
2702  }
2703 
2704  // Use the sysreg mapper to attempt to map the remaining possible strings
2705  // to the value for the register to be used for the MSR (register)
2706  // instruction operand.
2707  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2708  if (TheReg && TheReg->Writeable &&
2709  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2710  Reg = TheReg->Encoding;
2711  else
2712  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2713  if (Reg != -1) {
2714  ReplaceNode(N, CurDAG->getMachineNode(
2715  AArch64::MSR, DL, MVT::Other,
2716  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2717  N->getOperand(2), N->getOperand(0)));
2718  return true;
2719  }
2720 
2721  return false;
2722 }
2723 
2724 /// We've got special pseudo-instructions for these
2725 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2726  unsigned Opcode;
2727  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2728 
2729  // Leave IR for LSE if subtarget supports it.
2730  if (Subtarget->hasLSE()) return false;
2731 
2732  if (MemTy == MVT::i8)
2733  Opcode = AArch64::CMP_SWAP_8;
2734  else if (MemTy == MVT::i16)
2735  Opcode = AArch64::CMP_SWAP_16;
2736  else if (MemTy == MVT::i32)
2737  Opcode = AArch64::CMP_SWAP_32;
2738  else if (MemTy == MVT::i64)
2739  Opcode = AArch64::CMP_SWAP_64;
2740  else
2741  llvm_unreachable("Unknown AtomicCmpSwap type");
2742 
2743  MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
2744  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2745  N->getOperand(0)};
2746  SDNode *CmpSwap = CurDAG->getMachineNode(
2747  Opcode, SDLoc(N),
2748  CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
2749 
2750  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2751  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2752 
2753  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2754  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2755  CurDAG->RemoveDeadNode(N);
2756 
2757  return true;
2758 }
2759 
2760 void AArch64DAGToDAGISel::Select(SDNode *Node) {
2761  // If we have a custom node, we already have selected!
2762  if (Node->isMachineOpcode()) {
2763  LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
2764  Node->setNodeId(-1);
2765  return;
2766  }
2767 
2768  // Few custom selection stuff.
2769  EVT VT = Node->getValueType(0);
2770 
2771  switch (Node->getOpcode()) {
2772  default:
2773  break;
2774 
2775  case ISD::ATOMIC_CMP_SWAP:
2776  if (SelectCMP_SWAP(Node))
2777  return;
2778  break;
2779 
2780  case ISD::READ_REGISTER:
2781  if (tryReadRegister(Node))
2782  return;
2783  break;
2784 
2785  case ISD::WRITE_REGISTER:
2786  if (tryWriteRegister(Node))
2787  return;
2788  break;
2789 
2790  case ISD::ADD:
2791  if (tryMLAV64LaneV128(Node))
2792  return;
2793  break;
2794 
2795  case ISD::LOAD: {
2796  // Try to select as an indexed load. Fall through to normal processing
2797  // if we can't.
2798  if (tryIndexedLoad(Node))
2799  return;
2800  break;
2801  }
2802 
2803  case ISD::SRL:
2804  case ISD::AND:
2805  case ISD::SRA:
2807  if (tryBitfieldExtractOp(Node))
2808  return;
2809  if (tryBitfieldInsertInZeroOp(Node))
2810  return;
2812  case ISD::ROTR:
2813  case ISD::SHL:
2814  if (tryShiftAmountMod(Node))
2815  return;
2816  break;
2817 
2818  case ISD::SIGN_EXTEND:
2819  if (tryBitfieldExtractOpFromSExt(Node))
2820  return;
2821  break;
2822 
2823  case ISD::OR:
2824  if (tryBitfieldInsertOp(Node))
2825  return;
2826  break;
2827 
2828  case ISD::EXTRACT_VECTOR_ELT: {
2829  // Extracting lane zero is a special case where we can just use a plain
2830  // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
2831  // the rest of the compiler, especially the register allocator and copyi
2832  // propagation, to reason about, so is preferred when it's possible to
2833  // use it.
2834  ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
2835  // Bail and use the default Select() for non-zero lanes.
2836  if (LaneNode->getZExtValue() != 0)
2837  break;
2838  // If the element type is not the same as the result type, likewise
2839  // bail and use the default Select(), as there's more to do than just
2840  // a cross-class COPY. This catches extracts of i8 and i16 elements
2841  // since they will need an explicit zext.
2842  if (VT != Node->getOperand(0).getValueType().getVectorElementType())
2843  break;
2844  unsigned SubReg;
2845  switch (Node->getOperand(0)
2846  .getValueType()
2848  .getSizeInBits()) {
2849  default:
2850  llvm_unreachable("Unexpected vector element type!");
2851  case 64:
2852  SubReg = AArch64::dsub;
2853  break;
2854  case 32:
2855  SubReg = AArch64::ssub;
2856  break;
2857  case 16:
2858  SubReg = AArch64::hsub;
2859  break;
2860  case 8:
2861  llvm_unreachable("unexpected zext-requiring extract element!");
2862  }
2863  SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
2864  Node->getOperand(0));
2865  LLVM_DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
2866  LLVM_DEBUG(Extract->dumpr(CurDAG));
2867  LLVM_DEBUG(dbgs() << "\n");
2868  ReplaceNode(Node, Extract.getNode());
2869  return;
2870  }
2871  case ISD::Constant: {
2872  // Materialize zero constants as copies from WZR/XZR. This allows
2873  // the coalescer to propagate these into other instructions.
2874  ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
2875  if (ConstNode->isNullValue()) {
2876  if (VT == MVT::i32) {
2877  SDValue New = CurDAG->getCopyFromReg(
2878  CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
2879  ReplaceNode(Node, New.getNode());
2880  return;
2881  } else if (VT == MVT::i64) {
2882  SDValue New = CurDAG->getCopyFromReg(
2883  CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
2884  ReplaceNode(Node, New.getNode());
2885  return;
2886  }
2887  }
2888  break;
2889  }
2890 
2891  case ISD::FrameIndex: {
2892  // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
2893  int FI = cast<FrameIndexSDNode>(Node)->getIndex();
2894  unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
2895  const TargetLowering *TLI = getTargetLowering();
2896  SDValue TFI = CurDAG->getTargetFrameIndex(
2897  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2898  SDLoc DL(Node);
2899  SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
2900  CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
2901  CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
2902  return;
2903  }
2904  case ISD::INTRINSIC_W_CHAIN: {
2905  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2906  switch (IntNo) {
2907  default:
2908  break;
2909  case Intrinsic::aarch64_ldaxp:
2910  case Intrinsic::aarch64_ldxp: {
2911  unsigned Op =
2912  IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
2913  SDValue MemAddr = Node->getOperand(2);
2914  SDLoc DL(Node);
2915  SDValue Chain = Node->getOperand(0);
2916 
2917  SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
2918  MVT::Other, MemAddr, Chain);
2919 
2920  // Transfer memoperands.
2921  MachineMemOperand *MemOp =
2922  cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2923  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
2924  ReplaceNode(Node, Ld);
2925  return;
2926  }
2927  case Intrinsic::aarch64_stlxp:
2928  case Intrinsic::aarch64_stxp: {
2929  unsigned Op =
2930  IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
2931  SDLoc DL(Node);
2932  SDValue Chain = Node->getOperand(0);
2933  SDValue ValLo = Node->getOperand(2);
2934  SDValue ValHi = Node->getOperand(3);
2935  SDValue MemAddr = Node->getOperand(4);
2936 
2937  // Place arguments in the right order.
2938  SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
2939 
2940  SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
2941  // Transfer memoperands.
2942  MachineMemOperand *MemOp =
2943  cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2944  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2945 
2946  ReplaceNode(Node, St);
2947  return;
2948  }
2949  case Intrinsic::aarch64_neon_ld1x2:
2950  if (VT == MVT::v8i8) {
2951  SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
2952  return;
2953  } else if (VT == MVT::v16i8) {
2954  SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
2955  return;
2956  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2957  SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
2958  return;
2959  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2960  SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
2961  return;
2962  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2963  SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
2964  return;
2965  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2966  SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
2967  return;
2968  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2969  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2970  return;
2971  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2972  SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
2973  return;
2974  }
2975  break;
2976  case Intrinsic::aarch64_neon_ld1x3:
2977  if (VT == MVT::v8i8) {
2978  SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
2979  return;
2980  } else if (VT == MVT::v16i8) {
2981  SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
2982  return;
2983  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2984  SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
2985  return;
2986  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2987  SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
2988  return;
2989  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2990  SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
2991  return;
2992  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2993  SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
2994  return;
2995  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2996  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
2997  return;
2998  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2999  SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
3000  return;
3001  }
3002  break;
3003  case Intrinsic::aarch64_neon_ld1x4:
3004  if (VT == MVT::v8i8) {
3005  SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
3006  return;
3007  } else if (VT == MVT::v16i8) {
3008  SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
3009  return;
3010  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3011  SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
3012  return;
3013  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3014  SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
3015  return;
3016  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3017  SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
3018  return;
3019  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3020  SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
3021  return;
3022  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3023  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3024  return;
3025  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3026  SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
3027  return;
3028  }
3029  break;
3030  case Intrinsic::aarch64_neon_ld2:
3031  if (VT == MVT::v8i8) {
3032  SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
3033  return;
3034  } else if (VT == MVT::v16i8) {
3035  SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
3036  return;
3037  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3038  SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
3039  return;
3040  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3041  SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
3042  return;
3043  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3044  SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
3045  return;
3046  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3047  SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
3048  return;
3049  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3050  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3051  return;
3052  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3053  SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
3054  return;
3055  }
3056  break;
3057  case Intrinsic::aarch64_neon_ld3:
3058  if (VT == MVT::v8i8) {
3059  SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
3060  return;
3061  } else if (VT == MVT::v16i8) {
3062  SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
3063  return;
3064  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3065  SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
3066  return;
3067  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3068  SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
3069  return;
3070  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3071  SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
3072  return;
3073  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3074  SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
3075  return;
3076  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3077  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3078  return;
3079  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3080  SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
3081  return;
3082  }
3083  break;
3084  case Intrinsic::aarch64_neon_ld4:
3085  if (VT == MVT::v8i8) {
3086  SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
3087  return;
3088  } else if (VT == MVT::v16i8) {
3089  SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
3090  return;
3091  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3092  SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
3093  return;
3094  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3095  SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
3096  return;
3097  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3098  SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
3099  return;
3100  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3101  SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
3102  return;
3103  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3104  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3105  return;
3106  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3107  SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
3108  return;
3109  }
3110  break;
3111  case Intrinsic::aarch64_neon_ld2r:
3112  if (VT == MVT::v8i8) {
3113  SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
3114  return;
3115  } else if (VT == MVT::v16i8) {
3116  SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
3117  return;
3118  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3119  SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
3120  return;
3121  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3122  SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
3123  return;
3124  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3125  SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
3126  return;
3127  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3128  SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
3129  return;
3130  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3131  SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
3132  return;
3133  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3134  SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
3135  return;
3136  }
3137  break;
3138  case Intrinsic::aarch64_neon_ld3r:
3139  if (VT == MVT::v8i8) {
3140  SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
3141  return;
3142  } else if (VT == MVT::v16i8) {
3143  SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
3144  return;
3145  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3146  SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
3147  return;
3148  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3149  SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
3150  return;
3151  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3152  SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
3153  return;
3154  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3155  SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
3156  return;
3157  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3158  SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
3159  return;
3160  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3161  SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
3162  return;
3163  }
3164  break;
3165  case Intrinsic::aarch64_neon_ld4r:
3166  if (VT == MVT::v8i8) {
3167  SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
3168  return;
3169  } else if (VT == MVT::v16i8) {
3170  SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
3171  return;
3172  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3173  SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
3174  return;
3175  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3176  SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
3177  return;
3178  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3179  SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
3180  return;
3181  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3182  SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
3183  return;
3184  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3185  SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
3186  return;
3187  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3188  SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
3189  return;
3190  }
3191  break;
3192  case Intrinsic::aarch64_neon_ld2lane:
3193  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3194  SelectLoadLane(Node, 2, AArch64::LD2i8);
3195  return;
3196  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3197  VT == MVT::v8f16) {
3198  SelectLoadLane(Node, 2, AArch64::LD2i16);
3199  return;
3200  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3201  VT == MVT::v2f32) {
3202  SelectLoadLane(Node, 2, AArch64::LD2i32);
3203  return;
3204  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3205  VT == MVT::v1f64) {
3206  SelectLoadLane(Node, 2, AArch64::LD2i64);
3207  return;
3208  }
3209  break;
3210  case Intrinsic::aarch64_neon_ld3lane:
3211  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3212  SelectLoadLane(Node, 3, AArch64::LD3i8);
3213  return;
3214  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3215  VT == MVT::v8f16) {
3216  SelectLoadLane(Node, 3, AArch64::LD3i16);
3217  return;
3218  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3219  VT == MVT::v2f32) {
3220  SelectLoadLane(Node, 3, AArch64::LD3i32);
3221  return;
3222  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3223  VT == MVT::v1f64) {
3224  SelectLoadLane(Node, 3, AArch64::LD3i64);
3225  return;
3226  }
3227  break;
3228  case Intrinsic::aarch64_neon_ld4lane:
3229  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3230  SelectLoadLane(Node, 4, AArch64::LD4i8);
3231  return;
3232  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3233  VT == MVT::v8f16) {
3234  SelectLoadLane(Node, 4, AArch64::LD4i16);
3235  return;
3236  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3237  VT == MVT::v2f32) {
3238  SelectLoadLane(Node, 4, AArch64::LD4i32);
3239  return;
3240  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3241  VT == MVT::v1f64) {
3242  SelectLoadLane(Node, 4, AArch64::LD4i64);
3243  return;
3244  }
3245  break;
3246  }
3247  } break;
3248  case ISD::INTRINSIC_WO_CHAIN: {
3249  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
3250  switch (IntNo) {
3251  default:
3252  break;
3253  case Intrinsic::aarch64_neon_tbl2:
3254  SelectTable(Node, 2,
3255  VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
3256  false);
3257  return;
3258  case Intrinsic::aarch64_neon_tbl3:
3259  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
3260  : AArch64::TBLv16i8Three,
3261  false);
3262  return;
3263  case Intrinsic::aarch64_neon_tbl4:
3264  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
3265  : AArch64::TBLv16i8Four,
3266  false);
3267  return;
3268  case Intrinsic::aarch64_neon_tbx2:
3269  SelectTable(Node, 2,
3270  VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
3271  true);
3272  return;
3273  case Intrinsic::aarch64_neon_tbx3:
3274  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
3275  : AArch64::TBXv16i8Three,
3276  true);
3277  return;
3278  case Intrinsic::aarch64_neon_tbx4:
3279  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
3280  : AArch64::TBXv16i8Four,
3281  true);
3282  return;
3283  case Intrinsic::aarch64_neon_smull:
3284  case Intrinsic::aarch64_neon_umull:
3285  if (tryMULLV64LaneV128(IntNo, Node))
3286  return;
3287  break;
3288  }
3289  break;
3290  }
3291  case ISD::INTRINSIC_VOID: {
3292  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3293  if (Node->getNumOperands() >= 3)
3294  VT = Node->getOperand(2)->getValueType(0);
3295  switch (IntNo) {
3296  default:
3297  break;
3298  case Intrinsic::aarch64_neon_st1x2: {
3299  if (VT == MVT::v8i8) {
3300  SelectStore(Node, 2, AArch64::ST1Twov8b);
3301  return;
3302  } else if (VT == MVT::v16i8) {
3303  SelectStore(Node, 2, AArch64::ST1Twov16b);
3304  return;
3305  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3306  SelectStore(Node, 2, AArch64::ST1Twov4h);
3307  return;
3308  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3309  SelectStore(Node, 2, AArch64::ST1Twov8h);
3310  return;
3311  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3312  SelectStore(Node, 2, AArch64::ST1Twov2s);
3313  return;
3314  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3315  SelectStore(Node, 2, AArch64::ST1Twov4s);
3316  return;
3317  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3318  SelectStore(Node, 2, AArch64::ST1Twov2d);
3319  return;
3320  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3321  SelectStore(Node, 2, AArch64::ST1Twov1d);
3322  return;
3323  }
3324  break;
3325  }
3326  case Intrinsic::aarch64_neon_st1x3: {
3327  if (VT == MVT::v8i8) {
3328  SelectStore(Node, 3, AArch64::ST1Threev8b);
3329  return;
3330  } else if (VT == MVT::v16i8) {
3331  SelectStore(Node, 3, AArch64::ST1Threev16b);
3332  return;
3333  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3334  SelectStore(Node, 3, AArch64::ST1Threev4h);
3335  return;
3336  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3337  SelectStore(Node, 3, AArch64::ST1Threev8h);
3338  return;
3339  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3340  SelectStore(Node, 3, AArch64::ST1Threev2s);
3341  return;
3342  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3343  SelectStore(Node, 3, AArch64::ST1Threev4s);
3344  return;
3345  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3346  SelectStore(Node, 3, AArch64::ST1Threev2d);
3347  return;
3348  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3349  SelectStore(Node, 3, AArch64::ST1Threev1d);
3350  return;
3351  }
3352  break;
3353  }
3354  case Intrinsic::aarch64_neon_st1x4: {
3355  if (VT == MVT::v8i8) {
3356  SelectStore(Node, 4, AArch64::ST1Fourv8b);
3357  return;
3358  } else if (VT == MVT::v16i8) {
3359  SelectStore(Node, 4, AArch64::ST1Fourv16b);
3360  return;
3361  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3362  SelectStore(Node, 4, AArch64::ST1Fourv4h);
3363  return;
3364  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3365  SelectStore(Node, 4, AArch64::ST1Fourv8h);
3366  return;
3367  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3368  SelectStore(Node, 4, AArch64::ST1Fourv2s);
3369  return;
3370  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3371  SelectStore(Node, 4, AArch64::ST1Fourv4s);
3372  return;
3373  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3374  SelectStore(Node, 4, AArch64::ST1Fourv2d);
3375  return;
3376  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3377  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3378  return;
3379  }
3380  break;
3381  }
3382  case Intrinsic::aarch64_neon_st2: {
3383  if (VT == MVT::v8i8) {
3384  SelectStore(Node, 2, AArch64::ST2Twov8b);
3385  return;
3386  } else if (VT == MVT::v16i8) {
3387  SelectStore(Node, 2, AArch64::ST2Twov16b);
3388  return;
3389  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3390  SelectStore(Node, 2, AArch64::ST2Twov4h);
3391  return;
3392  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3393  SelectStore(Node, 2, AArch64::ST2Twov8h);
3394  return;
3395  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3396  SelectStore(Node, 2, AArch64::ST2Twov2s);
3397  return;
3398  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3399  SelectStore(Node, 2, AArch64::ST2Twov4s);
3400  return;
3401  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3402  SelectStore(Node, 2, AArch64::ST2Twov2d);
3403  return;
3404  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3405  SelectStore(Node, 2, AArch64::ST1Twov1d);
3406  return;
3407  }
3408  break;
3409  }
3410  case Intrinsic::aarch64_neon_st3: {
3411  if (VT == MVT::v8i8) {
3412  SelectStore(Node, 3, AArch64::ST3Threev8b);
3413  return;
3414  } else if (VT == MVT::v16i8) {
3415  SelectStore(Node, 3, AArch64::ST3Threev16b);
3416  return;
3417  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3418  SelectStore(Node, 3, AArch64::ST3Threev4h);
3419  return;
3420  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3421  SelectStore(Node, 3, AArch64::ST3Threev8h);
3422  return;
3423  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3424  SelectStore(Node, 3, AArch64::ST3Threev2s);
3425  return;
3426  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3427  SelectStore(Node, 3, AArch64::ST3Threev4s);
3428  return;
3429  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3430  SelectStore(Node, 3, AArch64::ST3Threev2d);
3431  return;
3432  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3433  SelectStore(Node, 3, AArch64::ST1Threev1d);
3434  return;
3435  }
3436  break;
3437  }
3438  case Intrinsic::aarch64_neon_st4: {
3439  if (VT == MVT::v8i8) {
3440  SelectStore(Node, 4, AArch64::ST4Fourv8b);
3441  return;
3442  } else if (VT == MVT::v16i8) {
3443  SelectStore(Node, 4, AArch64::ST4Fourv16b);
3444  return;
3445  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3446  SelectStore(Node, 4, AArch64::ST4Fourv4h);
3447  return;
3448  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3449  SelectStore(Node, 4, AArch64::ST4Fourv8h);
3450  return;
3451  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3452  SelectStore(Node, 4, AArch64::ST4Fourv2s);
3453  return;
3454  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3455  SelectStore(Node, 4, AArch64::ST4Fourv4s);
3456  return;
3457  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3458  SelectStore(Node, 4, AArch64::ST4Fourv2d);
3459  return;
3460  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3461  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3462  return;
3463  }
3464  break;
3465  }
3466  case Intrinsic::aarch64_neon_st2lane: {
3467  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3468  SelectStoreLane(Node, 2, AArch64::ST2i8);
3469  return;
3470  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3471  VT == MVT::v8f16) {
3472  SelectStoreLane(Node, 2, AArch64::ST2i16);
3473  return;
3474  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3475  VT == MVT::v2f32) {
3476  SelectStoreLane(Node, 2, AArch64::ST2i32);
3477  return;
3478  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3479  VT == MVT::v1f64) {
3480  SelectStoreLane(Node, 2, AArch64::ST2i64);
3481  return;
3482  }
3483  break;
3484  }
3485  case Intrinsic::aarch64_neon_st3lane: {
3486  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3487  SelectStoreLane(Node, 3, AArch64::ST3i8);
3488  return;
3489  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3490  VT == MVT::v8f16) {
3491  SelectStoreLane(Node, 3, AArch64::ST3i16);
3492  return;
3493  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3494  VT == MVT::v2f32) {
3495  SelectStoreLane(Node, 3, AArch64::ST3i32);
3496  return;
3497  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3498  VT == MVT::v1f64) {
3499  SelectStoreLane(Node, 3, AArch64::ST3i64);
3500  return;
3501  }
3502  break;
3503  }
3504  case Intrinsic::aarch64_neon_st4lane: {
3505  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3506  SelectStoreLane(Node, 4, AArch64::ST4i8);
3507  return;
3508  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3509  VT == MVT::v8f16) {
3510  SelectStoreLane(Node, 4, AArch64::ST4i16);
3511  return;
3512  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3513  VT == MVT::v2f32) {
3514  SelectStoreLane(Node, 4, AArch64::ST4i32);
3515  return;
3516  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3517  VT == MVT::v1f64) {
3518  SelectStoreLane(Node, 4, AArch64::ST4i64);
3519  return;
3520  }
3521  break;
3522  }
3523  }
3524  break;
3525  }
3526  case AArch64ISD::LD2post: {
3527  if (VT == MVT::v8i8) {
3528  SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
3529  return;
3530  } else if (VT == MVT::v16i8) {
3531  SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
3532  return;
3533  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3534  SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
3535  return;
3536  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3537  SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
3538  return;
3539  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3540  SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
3541  return;
3542  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3543  SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
3544  return;
3545  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3546  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3547  return;
3548  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3549  SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
3550  return;
3551  }
3552  break;
3553  }
3554  case AArch64ISD::LD3post: {
3555  if (VT == MVT::v8i8) {
3556  SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
3557  return;
3558  } else if (VT == MVT::v16i8) {
3559  SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
3560  return;
3561  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3562  SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
3563  return;
3564  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3565  SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
3566  return;
3567  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3568  SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
3569  return;
3570  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3571  SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
3572  return;
3573  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3574  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3575  return;
3576  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3577  SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
3578  return;
3579  }
3580  break;
3581  }
3582  case AArch64ISD::LD4post: {
3583  if (VT == MVT::v8i8) {
3584  SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
3585  return;
3586  } else if (VT == MVT::v16i8) {
3587  SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
3588  return;
3589  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3590  SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
3591  return;
3592  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3593  SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
3594  return;
3595  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3596  SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
3597  return;
3598  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3599  SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
3600  return;
3601  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3602  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3603  return;
3604  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3605  SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
3606  return;
3607  }
3608  break;
3609  }
3610  case AArch64ISD::LD1x2post: {
3611  if (VT == MVT::v8i8) {
3612  SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
3613  return;
3614  } else if (VT == MVT::v16i8) {
3615  SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
3616  return;
3617  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3618  SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
3619  return;
3620  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3621  SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
3622  return;
3623  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3624  SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
3625  return;
3626  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3627  SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
3628  return;
3629  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3630  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3631  return;
3632  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3633  SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
3634  return;
3635  }
3636  break;
3637  }
3638  case AArch64ISD::LD1x3post: {
3639  if (VT == MVT::v8i8) {
3640  SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
3641  return;
3642  } else if (VT == MVT::v16i8) {
3643  SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
3644  return;
3645  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3646  SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
3647  return;
3648  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3649  SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
3650  return;
3651  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3652  SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
3653  return;
3654  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3655  SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
3656  return;
3657  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3658  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3659  return;
3660  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3661  SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
3662  return;
3663  }
3664  break;
3665  }
3666  case AArch64ISD::LD1x4post: {
3667  if (VT == MVT::v8i8) {
3668  SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
3669  return;
3670  } else if (VT == MVT::v16i8) {
3671  SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
3672  return;
3673  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3674  SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
3675  return;
3676  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3677  SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
3678  return;
3679  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3680  SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
3681  return;
3682  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3683  SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
3684  return;
3685  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3686  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3687  return;
3688  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3689  SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
3690  return;
3691  }
3692  break;
3693  }
3694  case AArch64ISD::LD1DUPpost: {
3695  if (VT == MVT::v8i8) {
3696  SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
3697  return;
3698  } else if (VT == MVT::v16i8) {
3699  SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
3700  return;
3701  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3702  SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
3703  return;
3704  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3705  SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
3706  return;
3707  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3708  SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
3709  return;
3710  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3711  SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
3712  return;
3713  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3714  SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
3715  return;
3716  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3717  SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
3718  return;
3719  }
3720  break;
3721  }
3722  case AArch64ISD::LD2DUPpost: {
3723  if (VT == MVT::v8i8) {
3724  SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
3725  return;
3726  } else if (VT == MVT::v16i8) {
3727  SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
3728  return;
3729  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3730  SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
3731  return;
3732  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3733  SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
3734  return;
3735  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3736  SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
3737  return;
3738  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3739  SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
3740  return;
3741  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3742  SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
3743  return;
3744  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3745  SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
3746  return;
3747  }
3748  break;
3749  }
3750  case AArch64ISD::LD3DUPpost: {
3751  if (VT == MVT::v8i8) {
3752  SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
3753  return;
3754  } else if (VT == MVT::v16i8) {
3755  SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
3756  return;
3757  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3758  SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
3759  return;
3760  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3761  SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
3762  return;
3763  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3764  SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
3765  return;
3766  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3767  SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
3768  return;
3769  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3770  SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
3771  return;
3772  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3773  SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
3774  return;
3775  }
3776  break;
3777  }
3778  case AArch64ISD::LD4DUPpost: {
3779  if (VT == MVT::v8i8) {
3780  SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
3781  return;
3782  } else if (VT == MVT::v16i8) {
3783  SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
3784  return;
3785  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3786  SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
3787  return;
3788  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3789  SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
3790  return;
3791  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3792  SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
3793  return;
3794  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3795  SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
3796  return;
3797  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3798  SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
3799  return;
3800  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3801  SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
3802  return;
3803  }
3804  break;
3805  }
3806  case AArch64ISD::LD1LANEpost: {
3807  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3808  SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
3809  return;
3810  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3811  VT == MVT::v8f16) {
3812  SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
3813  return;
3814  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3815  VT == MVT::v2f32) {
3816  SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
3817  return;
3818  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3819  VT == MVT::v1f64) {
3820  SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
3821  return;
3822  }
3823  break;
3824  }
3825  case AArch64ISD::LD2LANEpost: {
3826  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3827  SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
3828  return;
3829  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3830  VT == MVT::v8f16) {
3831  SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
3832  return;
3833  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3834  VT == MVT::v2f32) {
3835  SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
3836  return;
3837  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3838  VT == MVT::v1f64) {
3839  SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
3840  return;
3841  }
3842  break;
3843  }
3844  case AArch64ISD::LD3LANEpost: {
3845  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3846  SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
3847  return;
3848  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3849  VT == MVT::v8f16) {
3850  SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
3851  return;
3852  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3853  VT == MVT::v2f32) {
3854  SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
3855  return;
3856  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3857  VT == MVT::v1f64) {
3858  SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
3859  return;
3860  }
3861  break;
3862  }
3863  case AArch64ISD::LD4LANEpost: {
3864  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3865  SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
3866  return;
3867  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3868  VT == MVT::v8f16) {
3869  SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
3870  return;
3871  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3872  VT == MVT::v2f32) {
3873  SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
3874  return;
3875  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3876  VT == MVT::v1f64) {
3877  SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
3878  return;
3879  }
3880  break;
3881  }
3882  case AArch64ISD::ST2post: {
3883  VT = Node->getOperand(1).getValueType();
3884  if (VT == MVT::v8i8) {
3885  SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
3886  return;
3887  } else if (VT == MVT::v16i8) {
3888  SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
3889  return;
3890  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3891  SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
3892  return;
3893  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3894  SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
3895  return;
3896  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3897  SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
3898  return;
3899  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3900  SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
3901  return;
3902  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3903  SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
3904  return;
3905  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3906  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3907  return;
3908  }
3909  break;
3910  }
3911  case AArch64ISD::ST3post: {
3912  VT = Node->getOperand(1).getValueType();
3913  if (VT == MVT::v8i8) {
3914  SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
3915  return;
3916  } else if (VT == MVT::v16i8) {
3917  SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
3918  return;
3919  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3920  SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
3921  return;
3922  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3923  SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
3924  return;
3925  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3926  SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
3927  return;
3928  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3929  SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
3930  return;
3931  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3932  SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
3933  return;
3934  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3935  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
3936  return;
3937  }
3938  break;
3939  }
3940  case AArch64ISD::ST4post: {
3941  VT = Node->getOperand(1).getValueType();
3942  if (VT == MVT::v8i8) {
3943  SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
3944  return;
3945  } else if (VT == MVT::v16i8) {
3946  SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
3947  return;
3948  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3949  SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
3950  return;
3951  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3952  SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
3953  return;
3954  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3955  SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
3956  return;
3957  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3958  SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
3959  return;
3960  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3961  SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
3962  return;
3963  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3964  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
3965  return;
3966  }
3967  break;
3968  }
3969  case AArch64ISD::ST1x2post: {
3970  VT = Node->getOperand(1).getValueType();
3971  if (VT == MVT::v8i8) {
3972  SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
3973  return;
3974  } else if (VT == MVT::v16i8) {
3975  SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
3976  return;
3977  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3978  SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
3979  return;
3980  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3981  SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
3982  return;
3983  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3984  SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
3985  return;
3986  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3987  SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
3988  return;
3989  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3990  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3991  return;
3992  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3993  SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
3994  return;
3995  }
3996  break;
3997  }
3998  case AArch64ISD::ST1x3post: {
3999  VT = Node->getOperand(1).getValueType();
4000  if (VT == MVT::v8i8) {
4001  SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
4002  return;
4003  } else if (VT == MVT::v16i8) {
4004  SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
4005  return;
4006  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4007  SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
4008  return;
4009  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4010  SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
4011  return;
4012  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4013  SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
4014  return;
4015  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4016  SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
4017  return;
4018  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4019  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4020  return;
4021  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4022  SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
4023  return;
4024  }
4025  break;
4026  }
4027  case AArch64ISD::ST1x4post: {
4028  VT = Node->getOperand(1).getValueType();
4029  if (VT == MVT::v8i8) {
4030  SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
4031  return;
4032  } else if (VT == MVT::v16i8) {
4033  SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
4034  return;
4035  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4036  SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
4037  return;
4038  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4039  SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
4040  return;
4041  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4042  SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
4043  return;
4044  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4045  SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
4046  return;
4047  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4048  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4049  return;
4050  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4051  SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
4052  return;
4053  }
4054  break;
4055  }
4056  case AArch64ISD::ST2LANEpost: {
4057  VT = Node->getOperand(1).getValueType();
4058  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4059  SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
4060  return;
4061  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4062  VT == MVT::v8f16) {
4063  SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
4064  return;
4065  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4066  VT == MVT::v2f32) {
4067  SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
4068  return;
4069  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4070  VT == MVT::v1f64) {
4071  SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
4072  return;
4073  }
4074  break;
4075  }
4076  case AArch64ISD::ST3LANEpost: {
4077  VT = Node->getOperand(1).getValueType();
4078  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4079  SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
4080  return;
4081  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4082  VT == MVT::v8f16) {
4083  SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
4084  return;
4085  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4086  VT == MVT::v2f32) {
4087  SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
4088  return;
4089  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4090  VT == MVT::v1f64) {
4091  SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
4092  return;
4093  }
4094  break;
4095  }
4096  case AArch64ISD::ST4LANEpost: {
4097  VT = Node->getOperand(1).getValueType();
4098  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4099  SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
4100  return;
4101  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4102  VT == MVT::v8f16) {
4103  SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
4104  return;
4105  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4106  VT == MVT::v2f32) {
4107  SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
4108  return;
4109  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4110  VT == MVT::v1f64) {
4111  SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
4112  return;
4113  }
4114  break;
4115  }
4116  }
4117 
4118  // Select the default instruction
4119  SelectCode(Node);
4120 }
4121 
4122 /// createAArch64ISelDag - This pass converts a legalized DAG into a
4123 /// AArch64-specific DAG, ready for instruction scheduling.
4125  CodeGenOpt::Level OptLevel) {
4126  return new AArch64DAGToDAGISel(TM, OptLevel);
4127 }
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1446
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
uint64_t CallInst * C
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &ShiftAmount, int &MaskWidth)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL...
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOffset() const
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1471
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1557
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
DiagnosticInfoOptimizationBase::Argument NV
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:342
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:265
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
iterator begin() const
Definition: ArrayRef.h:137
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isWorthFoldingADDlow(SDValue N)
If there&#39;s a use of this ADDlow that&#39;s not itself a load/store then we&#39;ll need to create a real ADD i...
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
const SDValue & getChain() const
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1069
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:876
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:337
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1503
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1069
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1626
A description of a memory reference used in the backend.
static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, SDValue &LaneOp, int &LaneIdx)
static bool isPreferredADD(int64_t ImmOff)
Shift and rotation operations.
Definition: ISDOpcodes.h:390
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:478
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
unsigned SubReg
const MDNode * getMD() const
unsigned getScalarValueSizeInBits() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:978
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
int64_t getSExtValue() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:411
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
op_iterator op_begin() const
unsigned getAlignment() const
Definition: Globals.cpp:97
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:573
bool isStrongerThanMonotonic(AtomicOrdering ao)
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:85
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1652
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:903
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:598
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32...
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:765
Extended Value Type.
Definition: ValueTypes.h:34
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:53
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
void dump() const
Dump this node, for debugging.
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:323
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:727
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:722
An SDNode that represents everything that will be needed to construct a MachineInstr.
const Constant * getConstVal() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
unsigned logBase2() const
Definition: APInt.h:1742
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:70
iterator_range< use_iterator > uses()
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:464
const SysReg * lookupSysRegByName(StringRef)
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:448
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:451
uint32_t parseGenericRegister(StringRef Name)
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOpt::Level OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG...
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:607
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:177
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:423
static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx)
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:371
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:182
static int getIntOperandFromRegisterString(StringRef RegString)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:466
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:583
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted, suitable for use in a BFI instruction.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Type * getValueType() const
Definition: GlobalValue.h:276
uint32_t Size
Definition: Profile.cpp:47
unsigned getOpcode() const
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1124
static bool isShiftedMask(uint64_t Mask, EVT VT)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void dumpr() const
Dump (recursively) this node and its use-def subgraph.
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:417
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A single uniqued string.
Definition: Metadata.h:604
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1590
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Conversion operators.
Definition: ISDOpcodes.h:445
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:758
uint64_t getZExtValue() const
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:454
#define LLVM_DEBUG(X)
Definition: Debug.h:123
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
std::size_t countLeadingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the most significant bit to the first zero bit.
Definition: MathExtras.h:462
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:754
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
virtual const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const
Returns a TargetRegisterClass used for pointer values.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:883
This class is used to represent ISD::LOAD nodes.