LLVM  10.0.0svn
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the AArch64 target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64TargetMachine.h"
15 #include "llvm/ADT/APSInt.h"
17 #include "llvm/IR/Function.h" // To access function attributes.
18 #include "llvm/IR/GlobalValue.h"
19 #include "llvm/IR/Intrinsics.h"
20 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/KnownBits.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "aarch64-isel"
29 
30 //===--------------------------------------------------------------------===//
31 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
32 /// instructions for SelectionDAG operations.
33 ///
34 namespace {
35 
36 class AArch64DAGToDAGISel : public SelectionDAGISel {
37 
38  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
39  /// make the right decision when generating code for different targets.
40  const AArch64Subtarget *Subtarget;
41 
42  bool ForCodeSize;
43 
44 public:
45  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
46  CodeGenOpt::Level OptLevel)
47  : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
48  ForCodeSize(false) {}
49 
50  StringRef getPassName() const override {
51  return "AArch64 Instruction Selection";
52  }
53 
54  bool runOnMachineFunction(MachineFunction &MF) override {
55  ForCodeSize = MF.getFunction().hasOptSize();
56  Subtarget = &MF.getSubtarget<AArch64Subtarget>();
58  }
59 
60  void Select(SDNode *Node) override;
61 
62  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
63  /// inline asm expressions.
64  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
65  unsigned ConstraintID,
66  std::vector<SDValue> &OutOps) override;
67 
68  bool tryMLAV64LaneV128(SDNode *N);
69  bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
70  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
71  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
72  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
74  return SelectShiftedRegister(N, false, Reg, Shift);
75  }
76  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77  return SelectShiftedRegister(N, true, Reg, Shift);
78  }
79  bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
80  return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
81  }
82  bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
83  return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
84  }
85  bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
86  return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
87  }
88  bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
89  return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
90  }
91  bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
92  return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
93  }
94  bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
95  return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
96  }
97  bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
98  return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
99  }
100  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
101  return SelectAddrModeIndexed(N, 1, Base, OffImm);
102  }
103  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
104  return SelectAddrModeIndexed(N, 2, Base, OffImm);
105  }
106  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
107  return SelectAddrModeIndexed(N, 4, Base, OffImm);
108  }
109  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
110  return SelectAddrModeIndexed(N, 8, Base, OffImm);
111  }
112  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
113  return SelectAddrModeIndexed(N, 16, Base, OffImm);
114  }
115  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
116  return SelectAddrModeUnscaled(N, 1, Base, OffImm);
117  }
118  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
119  return SelectAddrModeUnscaled(N, 2, Base, OffImm);
120  }
121  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
122  return SelectAddrModeUnscaled(N, 4, Base, OffImm);
123  }
124  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
125  return SelectAddrModeUnscaled(N, 8, Base, OffImm);
126  }
127  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
128  return SelectAddrModeUnscaled(N, 16, Base, OffImm);
129  }
130 
131  template<int Width>
132  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
133  SDValue &SignExtend, SDValue &DoShift) {
134  return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
135  }
136 
137  template<int Width>
138  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
139  SDValue &SignExtend, SDValue &DoShift) {
140  return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
141  }
142 
143 
144  /// Form sequences of consecutive 64/128-bit registers for use in NEON
145  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
146  /// between 1 and 4 elements. If it contains a single element that is returned
147  /// unchanged; otherwise a REG_SEQUENCE value is returned.
148  SDValue createDTuple(ArrayRef<SDValue> Vecs);
149  SDValue createQTuple(ArrayRef<SDValue> Vecs);
150 
151  /// Generic helper for the createDTuple/createQTuple
152  /// functions. Those should almost always be called instead.
153  SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
154  const unsigned SubRegs[]);
155 
156  void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
157 
158  bool tryIndexedLoad(SDNode *N);
159 
160  bool trySelectStackSlotTagP(SDNode *N);
161  void SelectTagP(SDNode *N);
162 
163  void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
164  unsigned SubRegIdx);
165  void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
166  unsigned SubRegIdx);
167  void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
168  void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
169 
170  void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
171  void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
172  void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
173  void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
174 
175  bool tryBitfieldExtractOp(SDNode *N);
176  bool tryBitfieldExtractOpFromSExt(SDNode *N);
177  bool tryBitfieldInsertOp(SDNode *N);
178  bool tryBitfieldInsertInZeroOp(SDNode *N);
179  bool tryShiftAmountMod(SDNode *N);
180 
181  bool tryReadRegister(SDNode *N);
182  bool tryWriteRegister(SDNode *N);
183 
184 // Include the pieces autogenerated from the target description.
185 #include "AArch64GenDAGISel.inc"
186 
187 private:
188  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
189  SDValue &Shift);
190  bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
191  SDValue &OffImm) {
192  return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
193  }
194  bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
195  unsigned Size, SDValue &Base,
196  SDValue &OffImm);
197  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
198  SDValue &OffImm);
199  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
200  SDValue &OffImm);
201  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
202  SDValue &Offset, SDValue &SignExtend,
203  SDValue &DoShift);
204  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
205  SDValue &Offset, SDValue &SignExtend,
206  SDValue &DoShift);
207  bool isWorthFolding(SDValue V) const;
208  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
209  SDValue &Offset, SDValue &SignExtend);
210 
211  template<unsigned RegWidth>
212  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
213  return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
214  }
215 
216  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
217 
218  bool SelectCMP_SWAP(SDNode *N);
219 
220 };
221 } // end anonymous namespace
222 
223 /// isIntImmediate - This method tests to see if the node is a constant
224 /// operand. If so Imm will receive the 32-bit value.
225 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
226  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
227  Imm = C->getZExtValue();
228  return true;
229  }
230  return false;
231 }
232 
233 // isIntImmediate - This method tests to see if a constant operand.
234 // If so Imm will receive the value.
235 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
236  return isIntImmediate(N.getNode(), Imm);
237 }
238 
239 // isOpcWithIntImmediate - This method tests to see if the node is a specific
240 // opcode and that it has a immediate integer right operand.
241 // If so Imm will receive the 32 bit value.
242 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
243  uint64_t &Imm) {
244  return N->getOpcode() == Opc &&
245  isIntImmediate(N->getOperand(1).getNode(), Imm);
246 }
247 
248 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
249  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
250  switch(ConstraintID) {
251  default:
252  llvm_unreachable("Unexpected asm memory constraint");
256  // We need to make sure that this one operand does not end up in XZR, thus
257  // require the address to be in a PointerRegClass register.
258  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
259  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
260  SDLoc dl(Op);
261  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
262  SDValue NewOp =
263  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
264  dl, Op.getValueType(),
265  Op, RC), 0);
266  OutOps.push_back(NewOp);
267  return false;
268  }
269  return true;
270 }
271 
272 /// SelectArithImmed - Select an immediate value that can be represented as
273 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
274 /// Val set to the 12-bit value and Shift set to the shifter operand.
275 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
276  SDValue &Shift) {
277  // This function is called from the addsub_shifted_imm ComplexPattern,
278  // which lists [imm] as the list of opcode it's interested in, however
279  // we still need to check whether the operand is actually an immediate
280  // here because the ComplexPattern opcode list is only used in
281  // root-level opcode matching.
282  if (!isa<ConstantSDNode>(N.getNode()))
283  return false;
284 
285  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
286  unsigned ShiftAmt;
287 
288  if (Immed >> 12 == 0) {
289  ShiftAmt = 0;
290  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
291  ShiftAmt = 12;
292  Immed = Immed >> 12;
293  } else
294  return false;
295 
296  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
297  SDLoc dl(N);
298  Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
299  Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
300  return true;
301 }
302 
303 /// SelectNegArithImmed - As above, but negates the value before trying to
304 /// select it.
305 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
306  SDValue &Shift) {
307  // This function is called from the addsub_shifted_imm ComplexPattern,
308  // which lists [imm] as the list of opcode it's interested in, however
309  // we still need to check whether the operand is actually an immediate
310  // here because the ComplexPattern opcode list is only used in
311  // root-level opcode matching.
312  if (!isa<ConstantSDNode>(N.getNode()))
313  return false;
314 
315  // The immediate operand must be a 24-bit zero-extended immediate.
316  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
317 
318  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
319  // have the opposite effect on the C flag, so this pattern mustn't match under
320  // those circumstances.
321  if (Immed == 0)
322  return false;
323 
324  if (N.getValueType() == MVT::i32)
325  Immed = ~((uint32_t)Immed) + 1;
326  else
327  Immed = ~Immed + 1ULL;
328  if (Immed & 0xFFFFFFFFFF000000ULL)
329  return false;
330 
331  Immed &= 0xFFFFFFULL;
332  return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
333  Shift);
334 }
335 
336 /// getShiftTypeForNode - Translate a shift node to the corresponding
337 /// ShiftType value.
339  switch (N.getOpcode()) {
340  default:
342  case ISD::SHL:
343  return AArch64_AM::LSL;
344  case ISD::SRL:
345  return AArch64_AM::LSR;
346  case ISD::SRA:
347  return AArch64_AM::ASR;
348  case ISD::ROTR:
349  return AArch64_AM::ROR;
350  }
351 }
352 
353 /// Determine whether it is worth it to fold SHL into the addressing
354 /// mode.
355 static bool isWorthFoldingSHL(SDValue V) {
356  assert(V.getOpcode() == ISD::SHL && "invalid opcode");
357  // It is worth folding logical shift of up to three places.
358  auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
359  if (!CSD)
360  return false;
361  unsigned ShiftVal = CSD->getZExtValue();
362  if (ShiftVal > 3)
363  return false;
364 
365  // Check if this particular node is reused in any non-memory related
366  // operation. If yes, do not try to fold this node into the address
367  // computation, since the computation will be kept.
368  const SDNode *Node = V.getNode();
369  for (SDNode *UI : Node->uses())
370  if (!isa<MemSDNode>(*UI))
371  for (SDNode *UII : UI->uses())
372  if (!isa<MemSDNode>(*UII))
373  return false;
374  return true;
375 }
376 
377 /// Determine whether it is worth to fold V into an extended register.
378 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
379  // Trivial if we are optimizing for code size or if there is only
380  // one use of the value.
381  if (ForCodeSize || V.hasOneUse())
382  return true;
383  // If a subtarget has a fastpath LSL we can fold a logical shift into
384  // the addressing mode and save a cycle.
385  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
387  return true;
388  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
389  const SDValue LHS = V.getOperand(0);
390  const SDValue RHS = V.getOperand(1);
391  if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
392  return true;
393  if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
394  return true;
395  }
396 
397  // It hurts otherwise, since the value will be reused.
398  return false;
399 }
400 
401 /// SelectShiftedRegister - Select a "shifted register" operand. If the value
402 /// is not shifted, set the Shift operand to default of "LSL 0". The logical
403 /// instructions allow the shifted register to be rotated, but the arithmetic
404 /// instructions do not. The AllowROR parameter specifies whether ROR is
405 /// supported.
406 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
407  SDValue &Reg, SDValue &Shift) {
409  if (ShType == AArch64_AM::InvalidShiftExtend)
410  return false;
411  if (!AllowROR && ShType == AArch64_AM::ROR)
412  return false;
413 
414  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
415  unsigned BitSize = N.getValueSizeInBits();
416  unsigned Val = RHS->getZExtValue() & (BitSize - 1);
417  unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
418 
419  Reg = N.getOperand(0);
420  Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
421  return isWorthFolding(N);
422  }
423 
424  return false;
425 }
426 
427 /// getExtendTypeForNode - Translate an extend node to the corresponding
428 /// ExtendType value.
430 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
431  if (N.getOpcode() == ISD::SIGN_EXTEND ||
433  EVT SrcVT;
435  SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
436  else
437  SrcVT = N.getOperand(0).getValueType();
438 
439  if (!IsLoadStore && SrcVT == MVT::i8)
440  return AArch64_AM::SXTB;
441  else if (!IsLoadStore && SrcVT == MVT::i16)
442  return AArch64_AM::SXTH;
443  else if (SrcVT == MVT::i32)
444  return AArch64_AM::SXTW;
445  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
446 
448  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
449  N.getOpcode() == ISD::ANY_EXTEND) {
450  EVT SrcVT = N.getOperand(0).getValueType();
451  if (!IsLoadStore && SrcVT == MVT::i8)
452  return AArch64_AM::UXTB;
453  else if (!IsLoadStore && SrcVT == MVT::i16)
454  return AArch64_AM::UXTH;
455  else if (SrcVT == MVT::i32)
456  return AArch64_AM::UXTW;
457  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
458 
460  } else if (N.getOpcode() == ISD::AND) {
462  if (!CSD)
464  uint64_t AndMask = CSD->getZExtValue();
465 
466  switch (AndMask) {
467  default:
469  case 0xFF:
470  return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
471  case 0xFFFF:
472  return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
473  case 0xFFFFFFFF:
474  return AArch64_AM::UXTW;
475  }
476  }
477 
479 }
480 
481 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
482 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
483  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
485  return false;
486 
487  SDValue SV = DL->getOperand(0);
488  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
489  return false;
490 
491  SDValue EV = SV.getOperand(1);
492  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
493  return false;
494 
495  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
496  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
497  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
498  LaneOp = EV.getOperand(0);
499 
500  return true;
501 }
502 
503 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
504 // high lane extract.
505 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
506  SDValue &LaneOp, int &LaneIdx) {
507 
508  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
509  std::swap(Op0, Op1);
510  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
511  return false;
512  }
513  StdOp = Op1;
514  return true;
515 }
516 
517 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
518 /// is a lane in the upper half of a 128-bit vector. Recognize and select this
519 /// so that we don't emit unnecessary lane extracts.
520 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
521  SDLoc dl(N);
522  SDValue Op0 = N->getOperand(0);
523  SDValue Op1 = N->getOperand(1);
524  SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
525  SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
526  int LaneIdx = -1; // Will hold the lane index.
527 
528  if (Op1.getOpcode() != ISD::MUL ||
529  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
530  LaneIdx)) {
531  std::swap(Op0, Op1);
532  if (Op1.getOpcode() != ISD::MUL ||
533  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
534  LaneIdx))
535  return false;
536  }
537 
538  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
539 
540  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
541 
542  unsigned MLAOpc = ~0U;
543 
544  switch (N->getSimpleValueType(0).SimpleTy) {
545  default:
546  llvm_unreachable("Unrecognized MLA.");
547  case MVT::v4i16:
548  MLAOpc = AArch64::MLAv4i16_indexed;
549  break;
550  case MVT::v8i16:
551  MLAOpc = AArch64::MLAv8i16_indexed;
552  break;
553  case MVT::v2i32:
554  MLAOpc = AArch64::MLAv2i32_indexed;
555  break;
556  case MVT::v4i32:
557  MLAOpc = AArch64::MLAv4i32_indexed;
558  break;
559  }
560 
561  ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
562  return true;
563 }
564 
565 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
566  SDLoc dl(N);
567  SDValue SMULLOp0;
568  SDValue SMULLOp1;
569  int LaneIdx;
570 
571  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
572  LaneIdx))
573  return false;
574 
575  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
576 
577  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
578 
579  unsigned SMULLOpc = ~0U;
580 
581  if (IntNo == Intrinsic::aarch64_neon_smull) {
582  switch (N->getSimpleValueType(0).SimpleTy) {
583  default:
584  llvm_unreachable("Unrecognized SMULL.");
585  case MVT::v4i32:
586  SMULLOpc = AArch64::SMULLv4i16_indexed;
587  break;
588  case MVT::v2i64:
589  SMULLOpc = AArch64::SMULLv2i32_indexed;
590  break;
591  }
592  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
593  switch (N->getSimpleValueType(0).SimpleTy) {
594  default:
595  llvm_unreachable("Unrecognized SMULL.");
596  case MVT::v4i32:
597  SMULLOpc = AArch64::UMULLv4i16_indexed;
598  break;
599  case MVT::v2i64:
600  SMULLOpc = AArch64::UMULLv2i32_indexed;
601  break;
602  }
603  } else
604  llvm_unreachable("Unrecognized intrinsic.");
605 
606  ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
607  return true;
608 }
609 
610 /// Instructions that accept extend modifiers like UXTW expect the register
611 /// being extended to be a GPR32, but the incoming DAG might be acting on a
612 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
613 /// this is the case.
615  if (N.getValueType() == MVT::i32)
616  return N;
617 
618  SDLoc dl(N);
619  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
620  MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
621  dl, MVT::i32, N, SubReg);
622  return SDValue(Node, 0);
623 }
624 
625 
626 /// SelectArithExtendedRegister - Select a "extended register" operand. This
627 /// operand folds in an extend followed by an optional left shift.
628 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
629  SDValue &Shift) {
630  unsigned ShiftVal = 0;
632 
633  if (N.getOpcode() == ISD::SHL) {
635  if (!CSD)
636  return false;
637  ShiftVal = CSD->getZExtValue();
638  if (ShiftVal > 4)
639  return false;
640 
641  Ext = getExtendTypeForNode(N.getOperand(0));
643  return false;
644 
645  Reg = N.getOperand(0).getOperand(0);
646  } else {
647  Ext = getExtendTypeForNode(N);
649  return false;
650 
651  Reg = N.getOperand(0);
652 
653  // Don't match if free 32-bit -> 64-bit zext can be used instead.
654  if (Ext == AArch64_AM::UXTW &&
655  Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
656  return false;
657  }
658 
659  // AArch64 mandates that the RHS of the operation must use the smallest
660  // register class that could contain the size being extended from. Thus,
661  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
662  // there might not be an actual 32-bit value in the program. We can
663  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
664  assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
665  Reg = narrowIfNeeded(CurDAG, Reg);
666  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
667  MVT::i32);
668  return isWorthFolding(N);
669 }
670 
671 /// If there's a use of this ADDlow that's not itself a load/store then we'll
672 /// need to create a real ADD instruction from it anyway and there's no point in
673 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
674 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
675 /// leads to duplicated ADRP instructions.
677  for (auto Use : N->uses()) {
678  if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
679  Use->getOpcode() != ISD::ATOMIC_LOAD &&
680  Use->getOpcode() != ISD::ATOMIC_STORE)
681  return false;
682 
683  // ldar and stlr have much more restrictive addressing modes (just a
684  // register).
685  if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering()))
686  return false;
687  }
688 
689  return true;
690 }
691 
692 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
693 /// immediate" address. The "Size" argument is the size in bytes of the memory
694 /// reference, which determines the scale.
695 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
696  unsigned BW, unsigned Size,
697  SDValue &Base,
698  SDValue &OffImm) {
699  SDLoc dl(N);
700  const DataLayout &DL = CurDAG->getDataLayout();
701  const TargetLowering *TLI = getTargetLowering();
702  if (N.getOpcode() == ISD::FrameIndex) {
703  int FI = cast<FrameIndexSDNode>(N)->getIndex();
704  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
705  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
706  return true;
707  }
708 
709  // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
710  // selected here doesn't support labels/immediates, only base+offset.
711  if (CurDAG->isBaseWithConstantOffset(N)) {
712  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
713  if (IsSignedImm) {
714  int64_t RHSC = RHS->getSExtValue();
715  unsigned Scale = Log2_32(Size);
716  int64_t Range = 0x1LL << (BW - 1);
717 
718  if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
719  RHSC < (Range << Scale)) {
720  Base = N.getOperand(0);
721  if (Base.getOpcode() == ISD::FrameIndex) {
722  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
723  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
724  }
725  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
726  return true;
727  }
728  } else {
729  // unsigned Immediate
730  uint64_t RHSC = RHS->getZExtValue();
731  unsigned Scale = Log2_32(Size);
732  uint64_t Range = 0x1ULL << BW;
733 
734  if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
735  Base = N.getOperand(0);
736  if (Base.getOpcode() == ISD::FrameIndex) {
737  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
738  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
739  }
740  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
741  return true;
742  }
743  }
744  }
745  }
746  // Base only. The address will be materialized into a register before
747  // the memory is accessed.
748  // add x0, Xbase, #offset
749  // stp x1, x2, [x0]
750  Base = N;
751  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
752  return true;
753 }
754 
755 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
756 /// immediate" address. The "Size" argument is the size in bytes of the memory
757 /// reference, which determines the scale.
758 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
759  SDValue &Base, SDValue &OffImm) {
760  SDLoc dl(N);
761  const DataLayout &DL = CurDAG->getDataLayout();
762  const TargetLowering *TLI = getTargetLowering();
763  if (N.getOpcode() == ISD::FrameIndex) {
764  int FI = cast<FrameIndexSDNode>(N)->getIndex();
765  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
766  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
767  return true;
768  }
769 
771  GlobalAddressSDNode *GAN =
773  Base = N.getOperand(0);
774  OffImm = N.getOperand(1);
775  if (!GAN)
776  return true;
777 
778  if (GAN->getOffset() % Size == 0) {
779  const GlobalValue *GV = GAN->getGlobal();
780  unsigned Alignment = GV->getAlignment();
781  Type *Ty = GV->getValueType();
782  if (Alignment == 0 && Ty->isSized())
783  Alignment = DL.getABITypeAlignment(Ty);
784 
785  if (Alignment >= Size)
786  return true;
787  }
788  }
789 
790  if (CurDAG->isBaseWithConstantOffset(N)) {
791  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
792  int64_t RHSC = (int64_t)RHS->getZExtValue();
793  unsigned Scale = Log2_32(Size);
794  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
795  Base = N.getOperand(0);
796  if (Base.getOpcode() == ISD::FrameIndex) {
797  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
798  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
799  }
800  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
801  return true;
802  }
803  }
804  }
805 
806  // Before falling back to our general case, check if the unscaled
807  // instructions can handle this. If so, that's preferable.
808  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
809  return false;
810 
811  // Base only. The address will be materialized into a register before
812  // the memory is accessed.
813  // add x0, Xbase, #offset
814  // ldr x0, [x0]
815  Base = N;
816  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
817  return true;
818 }
819 
820 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
821 /// immediate" address. This should only match when there is an offset that
822 /// is not valid for a scaled immediate addressing mode. The "Size" argument
823 /// is the size in bytes of the memory reference, which is needed here to know
824 /// what is valid for a scaled immediate.
825 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
826  SDValue &Base,
827  SDValue &OffImm) {
828  if (!CurDAG->isBaseWithConstantOffset(N))
829  return false;
830  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
831  int64_t RHSC = RHS->getSExtValue();
832  // If the offset is valid as a scaled immediate, don't match here.
833  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
834  RHSC < (0x1000 << Log2_32(Size)))
835  return false;
836  if (RHSC >= -256 && RHSC < 256) {
837  Base = N.getOperand(0);
838  if (Base.getOpcode() == ISD::FrameIndex) {
839  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
840  const TargetLowering *TLI = getTargetLowering();
841  Base = CurDAG->getTargetFrameIndex(
842  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
843  }
844  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
845  return true;
846  }
847  }
848  return false;
849 }
850 
851 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
852  SDLoc dl(N);
853  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
854  SDValue ImpDef = SDValue(
855  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
856  MachineSDNode *Node = CurDAG->getMachineNode(
857  TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
858  return SDValue(Node, 0);
859 }
860 
861 /// Check if the given SHL node (\p N), can be used to form an
862 /// extended register for an addressing mode.
863 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
864  bool WantExtend, SDValue &Offset,
865  SDValue &SignExtend) {
866  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
868  if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
869  return false;
870 
871  SDLoc dl(N);
872  if (WantExtend) {
874  getExtendTypeForNode(N.getOperand(0), true);
876  return false;
877 
878  Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
879  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
880  MVT::i32);
881  } else {
882  Offset = N.getOperand(0);
883  SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
884  }
885 
886  unsigned LegalShiftVal = Log2_32(Size);
887  unsigned ShiftVal = CSD->getZExtValue();
888 
889  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
890  return false;
891 
892  return isWorthFolding(N);
893 }
894 
895 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
896  SDValue &Base, SDValue &Offset,
897  SDValue &SignExtend,
898  SDValue &DoShift) {
899  if (N.getOpcode() != ISD::ADD)
900  return false;
901  SDValue LHS = N.getOperand(0);
902  SDValue RHS = N.getOperand(1);
903  SDLoc dl(N);
904 
905  // We don't want to match immediate adds here, because they are better lowered
906  // to the register-immediate addressing modes.
907  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
908  return false;
909 
910  // Check if this particular node is reused in any non-memory related
911  // operation. If yes, do not try to fold this node into the address
912  // computation, since the computation will be kept.
913  const SDNode *Node = N.getNode();
914  for (SDNode *UI : Node->uses()) {
915  if (!isa<MemSDNode>(*UI))
916  return false;
917  }
918 
919  // Remember if it is worth folding N when it produces extended register.
920  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
921 
922  // Try to match a shifted extend on the RHS.
923  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
924  SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
925  Base = LHS;
926  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
927  return true;
928  }
929 
930  // Try to match a shifted extend on the LHS.
931  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
932  SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
933  Base = RHS;
934  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
935  return true;
936  }
937 
938  // There was no shift, whatever else we find.
939  DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
940 
942  // Try to match an unshifted extend on the LHS.
943  if (IsExtendedRegisterWorthFolding &&
944  (Ext = getExtendTypeForNode(LHS, true)) !=
946  Base = RHS;
947  Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
948  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
949  MVT::i32);
950  if (isWorthFolding(LHS))
951  return true;
952  }
953 
954  // Try to match an unshifted extend on the RHS.
955  if (IsExtendedRegisterWorthFolding &&
956  (Ext = getExtendTypeForNode(RHS, true)) !=
958  Base = LHS;
959  Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
960  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
961  MVT::i32);
962  if (isWorthFolding(RHS))
963  return true;
964  }
965 
966  return false;
967 }
968 
969 // Check if the given immediate is preferred by ADD. If an immediate can be
970 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
971 // encoded by one MOVZ, return true.
972 static bool isPreferredADD(int64_t ImmOff) {
973  // Constant in [0x0, 0xfff] can be encoded in ADD.
974  if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
975  return true;
976  // Check if it can be encoded in an "ADD LSL #12".
977  if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
978  // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
979  return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
980  (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
981  return false;
982 }
983 
984 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
985  SDValue &Base, SDValue &Offset,
986  SDValue &SignExtend,
987  SDValue &DoShift) {
988  if (N.getOpcode() != ISD::ADD)
989  return false;
990  SDValue LHS = N.getOperand(0);
991  SDValue RHS = N.getOperand(1);
992  SDLoc DL(N);
993 
994  // Check if this particular node is reused in any non-memory related
995  // operation. If yes, do not try to fold this node into the address
996  // computation, since the computation will be kept.
997  const SDNode *Node = N.getNode();
998  for (SDNode *UI : Node->uses()) {
999  if (!isa<MemSDNode>(*UI))
1000  return false;
1001  }
1002 
1003  // Watch out if RHS is a wide immediate, it can not be selected into
1004  // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1005  // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1006  // instructions like:
1007  // MOV X0, WideImmediate
1008  // ADD X1, BaseReg, X0
1009  // LDR X2, [X1, 0]
1010  // For such situation, using [BaseReg, XReg] addressing mode can save one
1011  // ADD/SUB:
1012  // MOV X0, WideImmediate
1013  // LDR X2, [BaseReg, X0]
1014  if (isa<ConstantSDNode>(RHS)) {
1015  int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
1016  unsigned Scale = Log2_32(Size);
1017  // Skip the immediate can be selected by load/store addressing mode.
1018  // Also skip the immediate can be encoded by a single ADD (SUB is also
1019  // checked by using -ImmOff).
1020  if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
1021  isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1022  return false;
1023 
1024  SDValue Ops[] = { RHS };
1025  SDNode *MOVI =
1026  CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1027  SDValue MOVIV = SDValue(MOVI, 0);
1028  // This ADD of two X register will be selected into [Reg+Reg] mode.
1029  N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1030  }
1031 
1032  // Remember if it is worth folding N when it produces extended register.
1033  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1034 
1035  // Try to match a shifted extend on the RHS.
1036  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1037  SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1038  Base = LHS;
1039  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1040  return true;
1041  }
1042 
1043  // Try to match a shifted extend on the LHS.
1044  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1045  SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1046  Base = RHS;
1047  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1048  return true;
1049  }
1050 
1051  // Match any non-shifted, non-extend, non-immediate add expression.
1052  Base = LHS;
1053  Offset = RHS;
1054  SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1055  DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1056  // Reg1 + Reg2 is free: no check needed.
1057  return true;
1058 }
1059 
1060 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1061  static const unsigned RegClassIDs[] = {
1062  AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1063  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1064  AArch64::dsub2, AArch64::dsub3};
1065 
1066  return createTuple(Regs, RegClassIDs, SubRegs);
1067 }
1068 
1069 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1070  static const unsigned RegClassIDs[] = {
1071  AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1072  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1073  AArch64::qsub2, AArch64::qsub3};
1074 
1075  return createTuple(Regs, RegClassIDs, SubRegs);
1076 }
1077 
1078 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1079  const unsigned RegClassIDs[],
1080  const unsigned SubRegs[]) {
1081  // There's no special register-class for a vector-list of 1 element: it's just
1082  // a vector.
1083  if (Regs.size() == 1)
1084  return Regs[0];
1085 
1086  assert(Regs.size() >= 2 && Regs.size() <= 4);
1087 
1088  SDLoc DL(Regs[0]);
1089 
1091 
1092  // First operand of REG_SEQUENCE is the desired RegClass.
1093  Ops.push_back(
1094  CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1095 
1096  // Then we get pairs of source & subregister-position for the components.
1097  for (unsigned i = 0; i < Regs.size(); ++i) {
1098  Ops.push_back(Regs[i]);
1099  Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1100  }
1101 
1102  SDNode *N =
1103  CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1104  return SDValue(N, 0);
1105 }
1106 
1107 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1108  bool isExt) {
1109  SDLoc dl(N);
1110  EVT VT = N->getValueType(0);
1111 
1112  unsigned ExtOff = isExt;
1113 
1114  // Form a REG_SEQUENCE to force register allocation.
1115  unsigned Vec0Off = ExtOff + 1;
1116  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1117  N->op_begin() + Vec0Off + NumVecs);
1118  SDValue RegSeq = createQTuple(Regs);
1119 
1121  if (isExt)
1122  Ops.push_back(N->getOperand(1));
1123  Ops.push_back(RegSeq);
1124  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1125  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1126 }
1127 
1128 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1129  LoadSDNode *LD = cast<LoadSDNode>(N);
1130  if (LD->isUnindexed())
1131  return false;
1132  EVT VT = LD->getMemoryVT();
1133  EVT DstVT = N->getValueType(0);
1135  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1136 
1137  // We're not doing validity checking here. That was done when checking
1138  // if we should mark the load as indexed or not. We're just selecting
1139  // the right instruction.
1140  unsigned Opcode = 0;
1141 
1143  bool InsertTo64 = false;
1144  if (VT == MVT::i64)
1145  Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1146  else if (VT == MVT::i32) {
1147  if (ExtType == ISD::NON_EXTLOAD)
1148  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1149  else if (ExtType == ISD::SEXTLOAD)
1150  Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1151  else {
1152  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1153  InsertTo64 = true;
1154  // The result of the load is only i32. It's the subreg_to_reg that makes
1155  // it into an i64.
1156  DstVT = MVT::i32;
1157  }
1158  } else if (VT == MVT::i16) {
1159  if (ExtType == ISD::SEXTLOAD) {
1160  if (DstVT == MVT::i64)
1161  Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1162  else
1163  Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1164  } else {
1165  Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1166  InsertTo64 = DstVT == MVT::i64;
1167  // The result of the load is only i32. It's the subreg_to_reg that makes
1168  // it into an i64.
1169  DstVT = MVT::i32;
1170  }
1171  } else if (VT == MVT::i8) {
1172  if (ExtType == ISD::SEXTLOAD) {
1173  if (DstVT == MVT::i64)
1174  Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1175  else
1176  Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1177  } else {
1178  Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1179  InsertTo64 = DstVT == MVT::i64;
1180  // The result of the load is only i32. It's the subreg_to_reg that makes
1181  // it into an i64.
1182  DstVT = MVT::i32;
1183  }
1184  } else if (VT == MVT::f16) {
1185  Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1186  } else if (VT == MVT::f32) {
1187  Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1188  } else if (VT == MVT::f64 || VT.is64BitVector()) {
1189  Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1190  } else if (VT.is128BitVector()) {
1191  Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1192  } else
1193  return false;
1194  SDValue Chain = LD->getChain();
1195  SDValue Base = LD->getBasePtr();
1196  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1197  int OffsetVal = (int)OffsetOp->getZExtValue();
1198  SDLoc dl(N);
1199  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1200  SDValue Ops[] = { Base, Offset, Chain };
1201  SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1202  MVT::Other, Ops);
1203  // Either way, we're replacing the node, so tell the caller that.
1204  SDValue LoadedVal = SDValue(Res, 1);
1205  if (InsertTo64) {
1206  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1207  LoadedVal =
1208  SDValue(CurDAG->getMachineNode(
1209  AArch64::SUBREG_TO_REG, dl, MVT::i64,
1210  CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1211  SubReg),
1212  0);
1213  }
1214 
1215  ReplaceUses(SDValue(N, 0), LoadedVal);
1216  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1217  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1218  CurDAG->RemoveDeadNode(N);
1219  return true;
1220 }
1221 
1222 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1223  unsigned SubRegIdx) {
1224  SDLoc dl(N);
1225  EVT VT = N->getValueType(0);
1226  SDValue Chain = N->getOperand(0);
1227 
1228  SDValue Ops[] = {N->getOperand(2), // Mem operand;
1229  Chain};
1230 
1231  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1232 
1233  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1234  SDValue SuperReg = SDValue(Ld, 0);
1235  for (unsigned i = 0; i < NumVecs; ++i)
1236  ReplaceUses(SDValue(N, i),
1237  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1238 
1239  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1240 
1241  // Transfer memoperands.
1242  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1243  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1244 
1245  CurDAG->RemoveDeadNode(N);
1246 }
1247 
1248 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1249  unsigned Opc, unsigned SubRegIdx) {
1250  SDLoc dl(N);
1251  EVT VT = N->getValueType(0);
1252  SDValue Chain = N->getOperand(0);
1253 
1254  SDValue Ops[] = {N->getOperand(1), // Mem operand
1255  N->getOperand(2), // Incremental
1256  Chain};
1257 
1258  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1260 
1261  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1262 
1263  // Update uses of write back register
1264  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1265 
1266  // Update uses of vector list
1267  SDValue SuperReg = SDValue(Ld, 1);
1268  if (NumVecs == 1)
1269  ReplaceUses(SDValue(N, 0), SuperReg);
1270  else
1271  for (unsigned i = 0; i < NumVecs; ++i)
1272  ReplaceUses(SDValue(N, i),
1273  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1274 
1275  // Update the chain
1276  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1277  CurDAG->RemoveDeadNode(N);
1278 }
1279 
1280 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1281  unsigned Opc) {
1282  SDLoc dl(N);
1283  EVT VT = N->getOperand(2)->getValueType(0);
1284 
1285  // Form a REG_SEQUENCE to force register allocation.
1286  bool Is128Bit = VT.getSizeInBits() == 128;
1287  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1288  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1289 
1290  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1291  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1292 
1293  // Transfer memoperands.
1294  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1295  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1296 
1297  ReplaceNode(N, St);
1298 }
1299 
1300 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1301  unsigned Opc) {
1302  SDLoc dl(N);
1303  EVT VT = N->getOperand(2)->getValueType(0);
1304  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1305  MVT::Other}; // Type for the Chain
1306 
1307  // Form a REG_SEQUENCE to force register allocation.
1308  bool Is128Bit = VT.getSizeInBits() == 128;
1309  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1310  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1311 
1312  SDValue Ops[] = {RegSeq,
1313  N->getOperand(NumVecs + 1), // base register
1314  N->getOperand(NumVecs + 2), // Incremental
1315  N->getOperand(0)}; // Chain
1316  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1317 
1318  ReplaceNode(N, St);
1319 }
1320 
1321 namespace {
1322 /// WidenVector - Given a value in the V64 register class, produce the
1323 /// equivalent value in the V128 register class.
1324 class WidenVector {
1325  SelectionDAG &DAG;
1326 
1327 public:
1328  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1329 
1330  SDValue operator()(SDValue V64Reg) {
1331  EVT VT = V64Reg.getValueType();
1332  unsigned NarrowSize = VT.getVectorNumElements();
1333  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1334  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1335  SDLoc DL(V64Reg);
1336 
1337  SDValue Undef =
1338  SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1339  return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1340  }
1341 };
1342 } // namespace
1343 
1344 /// NarrowVector - Given a value in the V128 register class, produce the
1345 /// equivalent value in the V64 register class.
1346 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1347  EVT VT = V128Reg.getValueType();
1348  unsigned WideSize = VT.getVectorNumElements();
1349  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1350  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1351 
1352  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1353  V128Reg);
1354 }
1355 
1356 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1357  unsigned Opc) {
1358  SDLoc dl(N);
1359  EVT VT = N->getValueType(0);
1360  bool Narrow = VT.getSizeInBits() == 64;
1361 
1362  // Form a REG_SEQUENCE to force register allocation.
1363  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1364 
1365  if (Narrow)
1366  transform(Regs, Regs.begin(),
1367  WidenVector(*CurDAG));
1368 
1369  SDValue RegSeq = createQTuple(Regs);
1370 
1371  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1372 
1373  unsigned LaneNo =
1374  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1375 
1376  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1377  N->getOperand(NumVecs + 3), N->getOperand(0)};
1378  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1379  SDValue SuperReg = SDValue(Ld, 0);
1380 
1381  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1382  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1383  AArch64::qsub2, AArch64::qsub3 };
1384  for (unsigned i = 0; i < NumVecs; ++i) {
1385  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1386  if (Narrow)
1387  NV = NarrowVector(NV, *CurDAG);
1388  ReplaceUses(SDValue(N, i), NV);
1389  }
1390 
1391  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1392  CurDAG->RemoveDeadNode(N);
1393 }
1394 
1395 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1396  unsigned Opc) {
1397  SDLoc dl(N);
1398  EVT VT = N->getValueType(0);
1399  bool Narrow = VT.getSizeInBits() == 64;
1400 
1401  // Form a REG_SEQUENCE to force register allocation.
1402  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1403 
1404  if (Narrow)
1405  transform(Regs, Regs.begin(),
1406  WidenVector(*CurDAG));
1407 
1408  SDValue RegSeq = createQTuple(Regs);
1409 
1410  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1411  RegSeq->getValueType(0), MVT::Other};
1412 
1413  unsigned LaneNo =
1414  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1415 
1416  SDValue Ops[] = {RegSeq,
1417  CurDAG->getTargetConstant(LaneNo, dl,
1418  MVT::i64), // Lane Number
1419  N->getOperand(NumVecs + 2), // Base register
1420  N->getOperand(NumVecs + 3), // Incremental
1421  N->getOperand(0)};
1422  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1423 
1424  // Update uses of the write back register
1425  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1426 
1427  // Update uses of the vector list
1428  SDValue SuperReg = SDValue(Ld, 1);
1429  if (NumVecs == 1) {
1430  ReplaceUses(SDValue(N, 0),
1431  Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1432  } else {
1433  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1434  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1435  AArch64::qsub2, AArch64::qsub3 };
1436  for (unsigned i = 0; i < NumVecs; ++i) {
1437  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1438  SuperReg);
1439  if (Narrow)
1440  NV = NarrowVector(NV, *CurDAG);
1441  ReplaceUses(SDValue(N, i), NV);
1442  }
1443  }
1444 
1445  // Update the Chain
1446  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1447  CurDAG->RemoveDeadNode(N);
1448 }
1449 
1450 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1451  unsigned Opc) {
1452  SDLoc dl(N);
1453  EVT VT = N->getOperand(2)->getValueType(0);
1454  bool Narrow = VT.getSizeInBits() == 64;
1455 
1456  // Form a REG_SEQUENCE to force register allocation.
1457  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1458 
1459  if (Narrow)
1460  transform(Regs, Regs.begin(),
1461  WidenVector(*CurDAG));
1462 
1463  SDValue RegSeq = createQTuple(Regs);
1464 
1465  unsigned LaneNo =
1466  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1467 
1468  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1469  N->getOperand(NumVecs + 3), N->getOperand(0)};
1470  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1471 
1472  // Transfer memoperands.
1473  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1474  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1475 
1476  ReplaceNode(N, St);
1477 }
1478 
1479 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1480  unsigned Opc) {
1481  SDLoc dl(N);
1482  EVT VT = N->getOperand(2)->getValueType(0);
1483  bool Narrow = VT.getSizeInBits() == 64;
1484 
1485  // Form a REG_SEQUENCE to force register allocation.
1486  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1487 
1488  if (Narrow)
1489  transform(Regs, Regs.begin(),
1490  WidenVector(*CurDAG));
1491 
1492  SDValue RegSeq = createQTuple(Regs);
1493 
1494  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1495  MVT::Other};
1496 
1497  unsigned LaneNo =
1498  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1499 
1500  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1501  N->getOperand(NumVecs + 2), // Base Register
1502  N->getOperand(NumVecs + 3), // Incremental
1503  N->getOperand(0)};
1504  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1505 
1506  // Transfer memoperands.
1507  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1508  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1509 
1510  ReplaceNode(N, St);
1511 }
1512 
1514  unsigned &Opc, SDValue &Opd0,
1515  unsigned &LSB, unsigned &MSB,
1516  unsigned NumberOfIgnoredLowBits,
1517  bool BiggerPattern) {
1518  assert(N->getOpcode() == ISD::AND &&
1519  "N must be a AND operation to call this function");
1520 
1521  EVT VT = N->getValueType(0);
1522 
1523  // Here we can test the type of VT and return false when the type does not
1524  // match, but since it is done prior to that call in the current context
1525  // we turned that into an assert to avoid redundant code.
1526  assert((VT == MVT::i32 || VT == MVT::i64) &&
1527  "Type checking must have been done before calling this function");
1528 
1529  // FIXME: simplify-demanded-bits in DAGCombine will probably have
1530  // changed the AND node to a 32-bit mask operation. We'll have to
1531  // undo that as part of the transform here if we want to catch all
1532  // the opportunities.
1533  // Currently the NumberOfIgnoredLowBits argument helps to recover
1534  // form these situations when matching bigger pattern (bitfield insert).
1535 
1536  // For unsigned extracts, check for a shift right and mask
1537  uint64_t AndImm = 0;
1538  if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
1539  return false;
1540 
1541  const SDNode *Op0 = N->getOperand(0).getNode();
1542 
1543  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1544  // simplified. Try to undo that
1545  AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
1546 
1547  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1548  if (AndImm & (AndImm + 1))
1549  return false;
1550 
1551  bool ClampMSB = false;
1552  uint64_t SrlImm = 0;
1553  // Handle the SRL + ANY_EXTEND case.
1554  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1555  isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
1556  // Extend the incoming operand of the SRL to 64-bit.
1557  Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1558  // Make sure to clamp the MSB so that we preserve the semantics of the
1559  // original operations.
1560  ClampMSB = true;
1561  } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1563  SrlImm)) {
1564  // If the shift result was truncated, we can still combine them.
1565  Opd0 = Op0->getOperand(0).getOperand(0);
1566 
1567  // Use the type of SRL node.
1568  VT = Opd0->getValueType(0);
1569  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
1570  Opd0 = Op0->getOperand(0);
1571  } else if (BiggerPattern) {
1572  // Let's pretend a 0 shift right has been performed.
1573  // The resulting code will be at least as good as the original one
1574  // plus it may expose more opportunities for bitfield insert pattern.
1575  // FIXME: Currently we limit this to the bigger pattern, because
1576  // some optimizations expect AND and not UBFM.
1577  Opd0 = N->getOperand(0);
1578  } else
1579  return false;
1580 
1581  // Bail out on large immediates. This happens when no proper
1582  // combining/constant folding was performed.
1583  if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
1584  LLVM_DEBUG(
1585  (dbgs() << N
1586  << ": Found large shift immediate, this should not happen\n"));
1587  return false;
1588  }
1589 
1590  LSB = SrlImm;
1591  MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
1592  : countTrailingOnes<uint64_t>(AndImm)) -
1593  1;
1594  if (ClampMSB)
1595  // Since we're moving the extend before the right shift operation, we need
1596  // to clamp the MSB to make sure we don't shift in undefined bits instead of
1597  // the zeros which would get shifted in with the original right shift
1598  // operation.
1599  MSB = MSB > 31 ? 31 : MSB;
1600 
1601  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1602  return true;
1603 }
1604 
1605 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
1606  SDValue &Opd0, unsigned &Immr,
1607  unsigned &Imms) {
1609 
1610  EVT VT = N->getValueType(0);
1611  unsigned BitWidth = VT.getSizeInBits();
1612  assert((VT == MVT::i32 || VT == MVT::i64) &&
1613  "Type checking must have been done before calling this function");
1614 
1615  SDValue Op = N->getOperand(0);
1616  if (Op->getOpcode() == ISD::TRUNCATE) {
1617  Op = Op->getOperand(0);
1618  VT = Op->getValueType(0);
1619  BitWidth = VT.getSizeInBits();
1620  }
1621 
1622  uint64_t ShiftImm;
1623  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
1624  !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1625  return false;
1626 
1627  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1628  if (ShiftImm + Width > BitWidth)
1629  return false;
1630 
1631  Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
1632  Opd0 = Op.getOperand(0);
1633  Immr = ShiftImm;
1634  Imms = ShiftImm + Width - 1;
1635  return true;
1636 }
1637 
1638 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
1639  SDValue &Opd0, unsigned &LSB,
1640  unsigned &MSB) {
1641  // We are looking for the following pattern which basically extracts several
1642  // continuous bits from the source value and places it from the LSB of the
1643  // destination value, all other bits of the destination value or set to zero:
1644  //
1645  // Value2 = AND Value, MaskImm
1646  // SRL Value2, ShiftImm
1647  //
1648  // with MaskImm >> ShiftImm to search for the bit width.
1649  //
1650  // This gets selected into a single UBFM:
1651  //
1652  // UBFM Value, ShiftImm, BitWide + SrlImm -1
1653  //
1654 
1655  if (N->getOpcode() != ISD::SRL)
1656  return false;
1657 
1658  uint64_t AndMask = 0;
1659  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
1660  return false;
1661 
1662  Opd0 = N->getOperand(0).getOperand(0);
1663 
1664  uint64_t SrlImm = 0;
1665  if (!isIntImmediate(N->getOperand(1), SrlImm))
1666  return false;
1667 
1668  // Check whether we really have several bits extract here.
1669  unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
1670  if (BitWide && isMask_64(AndMask >> SrlImm)) {
1671  if (N->getValueType(0) == MVT::i32)
1672  Opc = AArch64::UBFMWri;
1673  else
1674  Opc = AArch64::UBFMXri;
1675 
1676  LSB = SrlImm;
1677  MSB = BitWide + SrlImm - 1;
1678  return true;
1679  }
1680 
1681  return false;
1682 }
1683 
1684 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1685  unsigned &Immr, unsigned &Imms,
1686  bool BiggerPattern) {
1687  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1688  "N must be a SHR/SRA operation to call this function");
1689 
1690  EVT VT = N->getValueType(0);
1691 
1692  // Here we can test the type of VT and return false when the type does not
1693  // match, but since it is done prior to that call in the current context
1694  // we turned that into an assert to avoid redundant code.
1695  assert((VT == MVT::i32 || VT == MVT::i64) &&
1696  "Type checking must have been done before calling this function");
1697 
1698  // Check for AND + SRL doing several bits extract.
1699  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
1700  return true;
1701 
1702  // We're looking for a shift of a shift.
1703  uint64_t ShlImm = 0;
1704  uint64_t TruncBits = 0;
1705  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
1706  Opd0 = N->getOperand(0).getOperand(0);
1707  } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1708  N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1709  // We are looking for a shift of truncate. Truncate from i64 to i32 could
1710  // be considered as setting high 32 bits as zero. Our strategy here is to
1711  // always generate 64bit UBFM. This consistency will help the CSE pass
1712  // later find more redundancy.
1713  Opd0 = N->getOperand(0).getOperand(0);
1714  TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1715  VT = Opd0.getValueType();
1716  assert(VT == MVT::i64 && "the promoted type should be i64");
1717  } else if (BiggerPattern) {
1718  // Let's pretend a 0 shift left has been performed.
1719  // FIXME: Currently we limit this to the bigger pattern case,
1720  // because some optimizations expect AND and not UBFM
1721  Opd0 = N->getOperand(0);
1722  } else
1723  return false;
1724 
1725  // Missing combines/constant folding may have left us with strange
1726  // constants.
1727  if (ShlImm >= VT.getSizeInBits()) {
1728  LLVM_DEBUG(
1729  (dbgs() << N
1730  << ": Found large shift immediate, this should not happen\n"));
1731  return false;
1732  }
1733 
1734  uint64_t SrlImm = 0;
1735  if (!isIntImmediate(N->getOperand(1), SrlImm))
1736  return false;
1737 
1738  assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
1739  "bad amount in shift node!");
1740  int immr = SrlImm - ShlImm;
1741  Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
1742  Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
1743  // SRA requires a signed extraction
1744  if (VT == MVT::i32)
1745  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1746  else
1747  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1748  return true;
1749 }
1750 
1751 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
1753 
1754  EVT VT = N->getValueType(0);
1755  EVT NarrowVT = N->getOperand(0)->getValueType(0);
1756  if (VT != MVT::i64 || NarrowVT != MVT::i32)
1757  return false;
1758 
1759  uint64_t ShiftImm;
1760  SDValue Op = N->getOperand(0);
1761  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1762  return false;
1763 
1764  SDLoc dl(N);
1765  // Extend the incoming operand of the shift to 64-bits.
1766  SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
1767  unsigned Immr = ShiftImm;
1768  unsigned Imms = NarrowVT.getSizeInBits() - 1;
1769  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1770  CurDAG->getTargetConstant(Imms, dl, VT)};
1771  CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
1772  return true;
1773 }
1774 
1775 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
1776  SDValue &Opd0, unsigned &Immr, unsigned &Imms,
1777  unsigned NumberOfIgnoredLowBits = 0,
1778  bool BiggerPattern = false) {
1779  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
1780  return false;
1781 
1782  switch (N->getOpcode()) {
1783  default:
1784  if (!N->isMachineOpcode())
1785  return false;
1786  break;
1787  case ISD::AND:
1788  return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
1789  NumberOfIgnoredLowBits, BiggerPattern);
1790  case ISD::SRL:
1791  case ISD::SRA:
1792  return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
1793 
1795  return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
1796  }
1797 
1798  unsigned NOpc = N->getMachineOpcode();
1799  switch (NOpc) {
1800  default:
1801  return false;
1802  case AArch64::SBFMWri:
1803  case AArch64::UBFMWri:
1804  case AArch64::SBFMXri:
1805  case AArch64::UBFMXri:
1806  Opc = NOpc;
1807  Opd0 = N->getOperand(0);
1808  Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
1809  Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
1810  return true;
1811  }
1812  // Unreachable
1813  return false;
1814 }
1815 
1816 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
1817  unsigned Opc, Immr, Imms;
1818  SDValue Opd0;
1819  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
1820  return false;
1821 
1822  EVT VT = N->getValueType(0);
1823  SDLoc dl(N);
1824 
1825  // If the bit extract operation is 64bit but the original type is 32bit, we
1826  // need to add one EXTRACT_SUBREG.
1827  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
1828  SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
1829  CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
1830 
1831  SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
1832  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1833  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
1834  MVT::i32, SDValue(BFM, 0), SubReg));
1835  return true;
1836  }
1837 
1838  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1839  CurDAG->getTargetConstant(Imms, dl, VT)};
1840  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1841  return true;
1842 }
1843 
1844 /// Does DstMask form a complementary pair with the mask provided by
1845 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
1846 /// this asks whether DstMask zeroes precisely those bits that will be set by
1847 /// the other half.
1848 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
1849  unsigned NumberOfIgnoredHighBits, EVT VT) {
1850  assert((VT == MVT::i32 || VT == MVT::i64) &&
1851  "i32 or i64 mask type expected!");
1852  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
1853 
1854  APInt SignificantDstMask = APInt(BitWidth, DstMask);
1855  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
1856 
1857  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
1858  (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
1859 }
1860 
1861 // Look for bits that will be useful for later uses.
1862 // A bit is consider useless as soon as it is dropped and never used
1863 // before it as been dropped.
1864 // E.g., looking for useful bit of x
1865 // 1. y = x & 0x7
1866 // 2. z = y >> 2
1867 // After #1, x useful bits are 0x7, then the useful bits of x, live through
1868 // y.
1869 // After #2, the useful bits of x are 0x4.
1870 // However, if x is used on an unpredicatable instruction, then all its bits
1871 // are useful.
1872 // E.g.
1873 // 1. y = x & 0x7
1874 // 2. z = y >> 2
1875 // 3. str x, [@x]
1876 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
1877 
1879  unsigned Depth) {
1880  uint64_t Imm =
1881  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1882  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
1883  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
1884  getUsefulBits(Op, UsefulBits, Depth + 1);
1885 }
1886 
1887 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
1888  uint64_t Imm, uint64_t MSB,
1889  unsigned Depth) {
1890  // inherit the bitwidth value
1891  APInt OpUsefulBits(UsefulBits);
1892  OpUsefulBits = 1;
1893 
1894  if (MSB >= Imm) {
1895  OpUsefulBits <<= MSB - Imm + 1;
1896  --OpUsefulBits;
1897  // The interesting part will be in the lower part of the result
1898  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1899  // The interesting part was starting at Imm in the argument
1900  OpUsefulBits <<= Imm;
1901  } else {
1902  OpUsefulBits <<= MSB + 1;
1903  --OpUsefulBits;
1904  // The interesting part will be shifted in the result
1905  OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
1906  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1907  // The interesting part was at zero in the argument
1908  OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
1909  }
1910 
1911  UsefulBits &= OpUsefulBits;
1912 }
1913 
1914 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
1915  unsigned Depth) {
1916  uint64_t Imm =
1917  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1918  uint64_t MSB =
1919  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1920 
1921  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1922 }
1923 
1925  unsigned Depth) {
1926  uint64_t ShiftTypeAndValue =
1927  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1928  APInt Mask(UsefulBits);
1929  Mask.clearAllBits();
1930  Mask.flipAllBits();
1931 
1932  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
1933  // Shift Left
1934  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1935  Mask <<= ShiftAmt;
1936  getUsefulBits(Op, Mask, Depth + 1);
1937  Mask.lshrInPlace(ShiftAmt);
1938  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
1939  // Shift Right
1940  // We do not handle AArch64_AM::ASR, because the sign will change the
1941  // number of useful bits
1942  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1943  Mask.lshrInPlace(ShiftAmt);
1944  getUsefulBits(Op, Mask, Depth + 1);
1945  Mask <<= ShiftAmt;
1946  } else
1947  return;
1948 
1949  UsefulBits &= Mask;
1950 }
1951 
1952 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
1953  unsigned Depth) {
1954  uint64_t Imm =
1955  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1956  uint64_t MSB =
1957  cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
1958 
1959  APInt OpUsefulBits(UsefulBits);
1960  OpUsefulBits = 1;
1961 
1962  APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
1963  ResultUsefulBits.flipAllBits();
1964  APInt Mask(UsefulBits.getBitWidth(), 0);
1965 
1966  getUsefulBits(Op, ResultUsefulBits, Depth + 1);
1967 
1968  if (MSB >= Imm) {
1969  // The instruction is a BFXIL.
1970  uint64_t Width = MSB - Imm + 1;
1971  uint64_t LSB = Imm;
1972 
1973  OpUsefulBits <<= Width;
1974  --OpUsefulBits;
1975 
1976  if (Op.getOperand(1) == Orig) {
1977  // Copy the low bits from the result to bits starting from LSB.
1978  Mask = ResultUsefulBits & OpUsefulBits;
1979  Mask <<= LSB;
1980  }
1981 
1982  if (Op.getOperand(0) == Orig)
1983  // Bits starting from LSB in the input contribute to the result.
1984  Mask |= (ResultUsefulBits & ~OpUsefulBits);
1985  } else {
1986  // The instruction is a BFI.
1987  uint64_t Width = MSB + 1;
1988  uint64_t LSB = UsefulBits.getBitWidth() - Imm;
1989 
1990  OpUsefulBits <<= Width;
1991  --OpUsefulBits;
1992  OpUsefulBits <<= LSB;
1993 
1994  if (Op.getOperand(1) == Orig) {
1995  // Copy the bits from the result to the zero bits.
1996  Mask = ResultUsefulBits & OpUsefulBits;
1997  Mask.lshrInPlace(LSB);
1998  }
1999 
2000  if (Op.getOperand(0) == Orig)
2001  Mask |= (ResultUsefulBits & ~OpUsefulBits);
2002  }
2003 
2004  UsefulBits &= Mask;
2005 }
2006 
2007 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2008  SDValue Orig, unsigned Depth) {
2009 
2010  // Users of this node should have already been instruction selected
2011  // FIXME: Can we turn that into an assert?
2012  if (!UserNode->isMachineOpcode())
2013  return;
2014 
2015  switch (UserNode->getMachineOpcode()) {
2016  default:
2017  return;
2018  case AArch64::ANDSWri:
2019  case AArch64::ANDSXri:
2020  case AArch64::ANDWri:
2021  case AArch64::ANDXri:
2022  // We increment Depth only when we call the getUsefulBits
2023  return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2024  Depth);
2025  case AArch64::UBFMWri:
2026  case AArch64::UBFMXri:
2027  return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2028 
2029  case AArch64::ORRWrs:
2030  case AArch64::ORRXrs:
2031  if (UserNode->getOperand(1) != Orig)
2032  return;
2033  return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2034  Depth);
2035  case AArch64::BFMWri:
2036  case AArch64::BFMXri:
2037  return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2038 
2039  case AArch64::STRBBui:
2040  case AArch64::STURBBi:
2041  if (UserNode->getOperand(0) != Orig)
2042  return;
2043  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2044  return;
2045 
2046  case AArch64::STRHHui:
2047  case AArch64::STURHHi:
2048  if (UserNode->getOperand(0) != Orig)
2049  return;
2050  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2051  return;
2052  }
2053 }
2054 
2055 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2056  if (Depth >= 6)
2057  return;
2058  // Initialize UsefulBits
2059  if (!Depth) {
2060  unsigned Bitwidth = Op.getScalarValueSizeInBits();
2061  // At the beginning, assume every produced bits is useful
2062  UsefulBits = APInt(Bitwidth, 0);
2063  UsefulBits.flipAllBits();
2064  }
2065  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2066 
2067  for (SDNode *Node : Op.getNode()->uses()) {
2068  // A use cannot produce useful bits
2069  APInt UsefulBitsForUse = APInt(UsefulBits);
2070  getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2071  UsersUsefulBits |= UsefulBitsForUse;
2072  }
2073  // UsefulBits contains the produced bits that are meaningful for the
2074  // current definition, thus a user cannot make a bit meaningful at
2075  // this point
2076  UsefulBits &= UsersUsefulBits;
2077 }
2078 
2079 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
2080 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2081 /// 0, return Op unchanged.
2082 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2083  if (ShlAmount == 0)
2084  return Op;
2085 
2086  EVT VT = Op.getValueType();
2087  SDLoc dl(Op);
2088  unsigned BitWidth = VT.getSizeInBits();
2089  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2090 
2091  SDNode *ShiftNode;
2092  if (ShlAmount > 0) {
2093  // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2094  ShiftNode = CurDAG->getMachineNode(
2095  UBFMOpc, dl, VT, Op,
2096  CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2097  CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2098  } else {
2099  // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2100  assert(ShlAmount < 0 && "expected right shift");
2101  int ShrAmount = -ShlAmount;
2102  ShiftNode = CurDAG->getMachineNode(
2103  UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2104  CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2105  }
2106 
2107  return SDValue(ShiftNode, 0);
2108 }
2109 
2110 /// Does this tree qualify as an attempt to move a bitfield into position,
2111 /// essentially "(and (shl VAL, N), Mask)".
2113  bool BiggerPattern,
2114  SDValue &Src, int &ShiftAmount,
2115  int &MaskWidth) {
2116  EVT VT = Op.getValueType();
2117  unsigned BitWidth = VT.getSizeInBits();
2118  (void)BitWidth;
2119  assert(BitWidth == 32 || BitWidth == 64);
2120 
2121  KnownBits Known = CurDAG->computeKnownBits(Op);
2122 
2123  // Non-zero in the sense that they're not provably zero, which is the key
2124  // point if we want to use this value
2125  uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2126 
2127  // Discard a constant AND mask if present. It's safe because the node will
2128  // already have been factored into the computeKnownBits calculation above.
2129  uint64_t AndImm;
2130  if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
2131  assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
2132  Op = Op.getOperand(0);
2133  }
2134 
2135  // Don't match if the SHL has more than one use, since then we'll end up
2136  // generating SHL+UBFIZ instead of just keeping SHL+AND.
2137  if (!BiggerPattern && !Op.hasOneUse())
2138  return false;
2139 
2140  uint64_t ShlImm;
2141  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2142  return false;
2143  Op = Op.getOperand(0);
2144 
2145  if (!isShiftedMask_64(NonZeroBits))
2146  return false;
2147 
2148  ShiftAmount = countTrailingZeros(NonZeroBits);
2149  MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
2150 
2151  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2152  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2153  // amount. BiggerPattern is true when this pattern is being matched for BFI,
2154  // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2155  // which case it is not profitable to insert an extra shift.
2156  if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
2157  return false;
2158  Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
2159 
2160  return true;
2161 }
2162 
2163 static bool isShiftedMask(uint64_t Mask, EVT VT) {
2164  assert(VT == MVT::i32 || VT == MVT::i64);
2165  if (VT == MVT::i32)
2166  return isShiftedMask_32(Mask);
2167  return isShiftedMask_64(Mask);
2168 }
2169 
2170 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
2171 // inserted only sets known zero bits.
2173  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2174 
2175  EVT VT = N->getValueType(0);
2176  if (VT != MVT::i32 && VT != MVT::i64)
2177  return false;
2178 
2179  unsigned BitWidth = VT.getSizeInBits();
2180 
2181  uint64_t OrImm;
2182  if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
2183  return false;
2184 
2185  // Skip this transformation if the ORR immediate can be encoded in the ORR.
2186  // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2187  // performance neutral.
2188  if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
2189  return false;
2190 
2191  uint64_t MaskImm;
2192  SDValue And = N->getOperand(0);
2193  // Must be a single use AND with an immediate operand.
2194  if (!And.hasOneUse() ||
2195  !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
2196  return false;
2197 
2198  // Compute the Known Zero for the AND as this allows us to catch more general
2199  // cases than just looking for AND with imm.
2200  KnownBits Known = CurDAG->computeKnownBits(And);
2201 
2202  // Non-zero in the sense that they're not provably zero, which is the key
2203  // point if we want to use this value.
2204  uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
2205 
2206  // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2207  if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
2208  return false;
2209 
2210  // The bits being inserted must only set those bits that are known to be zero.
2211  if ((OrImm & NotKnownZero) != 0) {
2212  // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2213  // currently handle this case.
2214  return false;
2215  }
2216 
2217  // BFI/BFXIL dst, src, #lsb, #width.
2218  int LSB = countTrailingOnes(NotKnownZero);
2219  int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2220 
2221  // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2222  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2223  unsigned ImmS = Width - 1;
2224 
2225  // If we're creating a BFI instruction avoid cases where we need more
2226  // instructions to materialize the BFI constant as compared to the original
2227  // ORR. A BFXIL will use the same constant as the original ORR, so the code
2228  // should be no worse in this case.
2229  bool IsBFI = LSB != 0;
2230  uint64_t BFIImm = OrImm >> LSB;
2231  if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
2232  // We have a BFI instruction and we know the constant can't be materialized
2233  // with a ORR-immediate with the zero register.
2234  unsigned OrChunks = 0, BFIChunks = 0;
2235  for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
2236  if (((OrImm >> Shift) & 0xFFFF) != 0)
2237  ++OrChunks;
2238  if (((BFIImm >> Shift) & 0xFFFF) != 0)
2239  ++BFIChunks;
2240  }
2241  if (BFIChunks > OrChunks)
2242  return false;
2243  }
2244 
2245  // Materialize the constant to be inserted.
2246  SDLoc DL(N);
2247  unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2248  SDNode *MOVI = CurDAG->getMachineNode(
2249  MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2250 
2251  // Create the BFI/BFXIL instruction.
2252  SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2253  CurDAG->getTargetConstant(ImmR, DL, VT),
2254  CurDAG->getTargetConstant(ImmS, DL, VT)};
2255  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2256  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2257  return true;
2258 }
2259 
2260 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
2261  SelectionDAG *CurDAG) {
2262  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2263 
2264  EVT VT = N->getValueType(0);
2265  if (VT != MVT::i32 && VT != MVT::i64)
2266  return false;
2267 
2268  unsigned BitWidth = VT.getSizeInBits();
2269 
2270  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
2271  // have the expected shape. Try to undo that.
2272 
2273  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
2274  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
2275 
2276  // Given a OR operation, check if we have the following pattern
2277  // ubfm c, b, imm, imm2 (or something that does the same jobs, see
2278  // isBitfieldExtractOp)
2279  // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
2280  // countTrailingZeros(mask2) == imm2 - imm + 1
2281  // f = d | c
2282  // if yes, replace the OR instruction with:
2283  // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
2284 
2285  // OR is commutative, check all combinations of operand order and values of
2286  // BiggerPattern, i.e.
2287  // Opd0, Opd1, BiggerPattern=false
2288  // Opd1, Opd0, BiggerPattern=false
2289  // Opd0, Opd1, BiggerPattern=true
2290  // Opd1, Opd0, BiggerPattern=true
2291  // Several of these combinations may match, so check with BiggerPattern=false
2292  // first since that will produce better results by matching more instructions
2293  // and/or inserting fewer extra instructions.
2294  for (int I = 0; I < 4; ++I) {
2295 
2296  SDValue Dst, Src;
2297  unsigned ImmR, ImmS;
2298  bool BiggerPattern = I / 2;
2299  SDValue OrOpd0Val = N->getOperand(I % 2);
2300  SDNode *OrOpd0 = OrOpd0Val.getNode();
2301  SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
2302  SDNode *OrOpd1 = OrOpd1Val.getNode();
2303 
2304  unsigned BFXOpc;
2305  int DstLSB, Width;
2306  if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
2307  NumberOfIgnoredLowBits, BiggerPattern)) {
2308  // Check that the returned opcode is compatible with the pattern,
2309  // i.e., same type and zero extended (U and not S)
2310  if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
2311  (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
2312  continue;
2313 
2314  // Compute the width of the bitfield insertion
2315  DstLSB = 0;
2316  Width = ImmS - ImmR + 1;
2317  // FIXME: This constraint is to catch bitfield insertion we may
2318  // want to widen the pattern if we want to grab general bitfied
2319  // move case
2320  if (Width <= 0)
2321  continue;
2322 
2323  // If the mask on the insertee is correct, we have a BFXIL operation. We
2324  // can share the ImmR and ImmS values from the already-computed UBFM.
2325  } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
2326  BiggerPattern,
2327  Src, DstLSB, Width)) {
2328  ImmR = (BitWidth - DstLSB) % BitWidth;
2329  ImmS = Width - 1;
2330  } else
2331  continue;
2332 
2333  // Check the second part of the pattern
2334  EVT VT = OrOpd1Val.getValueType();
2335  assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
2336 
2337  // Compute the Known Zero for the candidate of the first operand.
2338  // This allows to catch more general case than just looking for
2339  // AND with imm. Indeed, simplify-demanded-bits may have removed
2340  // the AND instruction because it proves it was useless.
2341  KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
2342 
2343  // Check if there is enough room for the second operand to appear
2344  // in the first one
2345  APInt BitsToBeInserted =
2346  APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
2347 
2348  if ((BitsToBeInserted & ~Known.Zero) != 0)
2349  continue;
2350 
2351  // Set the first operand
2352  uint64_t Imm;
2353  if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
2354  isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
2355  // In that case, we can eliminate the AND
2356  Dst = OrOpd1->getOperand(0);
2357  else
2358  // Maybe the AND has been removed by simplify-demanded-bits
2359  // or is useful because it discards more bits
2360  Dst = OrOpd1Val;
2361 
2362  // both parts match
2363  SDLoc DL(N);
2364  SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
2365  CurDAG->getTargetConstant(ImmS, DL, VT)};
2366  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2367  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2368  return true;
2369  }
2370 
2371  // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
2372  // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
2373  // mask (e.g., 0x000ffff0).
2374  uint64_t Mask0Imm, Mask1Imm;
2375  SDValue And0 = N->getOperand(0);
2376  SDValue And1 = N->getOperand(1);
2377  if (And0.hasOneUse() && And1.hasOneUse() &&
2378  isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
2379  isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
2380  APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
2381  (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
2382 
2383  // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
2384  // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
2385  // bits to be inserted.
2386  if (isShiftedMask(Mask0Imm, VT)) {
2387  std::swap(And0, And1);
2388  std::swap(Mask0Imm, Mask1Imm);
2389  }
2390 
2391  SDValue Src = And1->getOperand(0);
2392  SDValue Dst = And0->getOperand(0);
2393  unsigned LSB = countTrailingZeros(Mask1Imm);
2394  int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
2395 
2396  // The BFXIL inserts the low-order bits from a source register, so right
2397  // shift the needed bits into place.
2398  SDLoc DL(N);
2399  unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2400  SDNode *LSR = CurDAG->getMachineNode(
2401  ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
2402  CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
2403 
2404  // BFXIL is an alias of BFM, so translate to BFM operands.
2405  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2406  unsigned ImmS = Width - 1;
2407 
2408  // Create the BFXIL instruction.
2409  SDValue Ops[] = {Dst, SDValue(LSR, 0),
2410  CurDAG->getTargetConstant(ImmR, DL, VT),
2411  CurDAG->getTargetConstant(ImmS, DL, VT)};
2412  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2413  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2414  return true;
2415  }
2416 
2417  return false;
2418 }
2419 
2420 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
2421  if (N->getOpcode() != ISD::OR)
2422  return false;
2423 
2424  APInt NUsefulBits;
2425  getUsefulBits(SDValue(N, 0), NUsefulBits);
2426 
2427  // If all bits are not useful, just return UNDEF.
2428  if (!NUsefulBits) {
2429  CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
2430  return true;
2431  }
2432 
2433  if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
2434  return true;
2435 
2436  return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
2437 }
2438 
2439 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
2440 /// equivalent of a left shift by a constant amount followed by an and masking
2441 /// out a contiguous set of bits.
2442 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
2443  if (N->getOpcode() != ISD::AND)
2444  return false;
2445 
2446  EVT VT = N->getValueType(0);
2447  if (VT != MVT::i32 && VT != MVT::i64)
2448  return false;
2449 
2450  SDValue Op0;
2451  int DstLSB, Width;
2452  if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
2453  Op0, DstLSB, Width))
2454  return false;
2455 
2456  // ImmR is the rotate right amount.
2457  unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
2458  // ImmS is the most significant bit of the source to be moved.
2459  unsigned ImmS = Width - 1;
2460 
2461  SDLoc DL(N);
2462  SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
2463  CurDAG->getTargetConstant(ImmS, DL, VT)};
2464  unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2465  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2466  return true;
2467 }
2468 
2469 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
2470 /// variable shift/rotate instructions.
2471 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
2472  EVT VT = N->getValueType(0);
2473 
2474  unsigned Opc;
2475  switch (N->getOpcode()) {
2476  case ISD::ROTR:
2477  Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
2478  break;
2479  case ISD::SHL:
2480  Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
2481  break;
2482  case ISD::SRL:
2483  Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
2484  break;
2485  case ISD::SRA:
2486  Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
2487  break;
2488  default:
2489  return false;
2490  }
2491 
2492  uint64_t Size;
2493  uint64_t Bits;
2494  if (VT == MVT::i32) {
2495  Bits = 5;
2496  Size = 32;
2497  } else if (VT == MVT::i64) {
2498  Bits = 6;
2499  Size = 64;
2500  } else
2501  return false;
2502 
2503  SDValue ShiftAmt = N->getOperand(1);
2504  SDLoc DL(N);
2505  SDValue NewShiftAmt;
2506 
2507  // Skip over an extend of the shift amount.
2508  if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
2509  ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
2510  ShiftAmt = ShiftAmt->getOperand(0);
2511 
2512  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
2513  SDValue Add0 = ShiftAmt->getOperand(0);
2514  SDValue Add1 = ShiftAmt->getOperand(1);
2515  uint64_t Add0Imm;
2516  uint64_t Add1Imm;
2517  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
2518  // to avoid the ADD/SUB.
2519  if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0))
2520  NewShiftAmt = Add0;
2521  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2522  // generate a NEG instead of a SUB of a constant.
2523  else if (ShiftAmt->getOpcode() == ISD::SUB &&
2524  isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
2525  (Add0Imm % Size == 0)) {
2526  unsigned NegOpc;
2527  unsigned ZeroReg;
2528  EVT SubVT = ShiftAmt->getValueType(0);
2529  if (SubVT == MVT::i32) {
2530  NegOpc = AArch64::SUBWrr;
2531  ZeroReg = AArch64::WZR;
2532  } else {
2533  assert(SubVT == MVT::i64);
2534  NegOpc = AArch64::SUBXrr;
2535  ZeroReg = AArch64::XZR;
2536  }
2537  SDValue Zero =
2538  CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
2539  MachineSDNode *Neg =
2540  CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
2541  NewShiftAmt = SDValue(Neg, 0);
2542  } else
2543  return false;
2544  } else {
2545  // If the shift amount is masked with an AND, check that the mask covers the
2546  // bits that are implicitly ANDed off by the above opcodes and if so, skip
2547  // the AND.
2548  uint64_t MaskImm;
2549  if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm))
2550  return false;
2551 
2552  if (countTrailingOnes(MaskImm) < Bits)
2553  return false;
2554 
2555  NewShiftAmt = ShiftAmt->getOperand(0);
2556  }
2557 
2558  // Narrow/widen the shift amount to match the size of the shift operation.
2559  if (VT == MVT::i32)
2560  NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
2561  else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
2562  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
2563  MachineSDNode *Ext = CurDAG->getMachineNode(
2564  AArch64::SUBREG_TO_REG, DL, VT,
2565  CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
2566  NewShiftAmt = SDValue(Ext, 0);
2567  }
2568 
2569  SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
2570  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2571  return true;
2572 }
2573 
2574 bool
2575 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2576  unsigned RegWidth) {
2577  APFloat FVal(0.0);
2578  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2579  FVal = CN->getValueAPF();
2580  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2581  // Some otherwise illegal constants are allowed in this case.
2582  if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2583  !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2584  return false;
2585 
2586  ConstantPoolSDNode *CN =
2587  dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2588  FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2589  } else
2590  return false;
2591 
2592  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2593  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2594  // x-register.
2595  //
2596  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2597  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2598  // integers.
2599  bool IsExact;
2600 
2601  // fbits is between 1 and 64 in the worst-case, which means the fmul
2602  // could have 2^64 as an actual operand. Need 65 bits of precision.
2603  APSInt IntVal(65, true);
2604  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2605 
2606  // N.b. isPowerOf2 also checks for > 0.
2607  if (!IsExact || !IntVal.isPowerOf2()) return false;
2608  unsigned FBits = IntVal.logBase2();
2609 
2610  // Checks above should have guaranteed that we haven't lost information in
2611  // finding FBits, but it must still be in range.
2612  if (FBits == 0 || FBits > RegWidth) return false;
2613 
2614  FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
2615  return true;
2616 }
2617 
2618 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
2619 // of the string and obtains the integer values from them and combines these
2620 // into a single value to be used in the MRS/MSR instruction.
2623  RegString.split(Fields, ':');
2624 
2625  if (Fields.size() == 1)
2626  return -1;
2627 
2628  assert(Fields.size() == 5
2629  && "Invalid number of fields in read register string");
2630 
2631  SmallVector<int, 5> Ops;
2632  bool AllIntFields = true;
2633 
2634  for (StringRef Field : Fields) {
2635  unsigned IntField;
2636  AllIntFields &= !Field.getAsInteger(10, IntField);
2637  Ops.push_back(IntField);
2638  }
2639 
2640  assert(AllIntFields &&
2641  "Unexpected non-integer value in special register string.");
2642 
2643  // Need to combine the integer fields of the string into a single value
2644  // based on the bit encoding of MRS/MSR instruction.
2645  return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2646  (Ops[3] << 3) | (Ops[4]);
2647 }
2648 
2649 // Lower the read_register intrinsic to an MRS instruction node if the special
2650 // register string argument is either of the form detailed in the ALCE (the
2651 // form described in getIntOperandsFromRegsterString) or is a named register
2652 // known by the MRS SysReg mapper.
2653 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
2654  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2655  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2656  SDLoc DL(N);
2657 
2658  int Reg = getIntOperandFromRegisterString(RegString->getString());
2659  if (Reg != -1) {
2660  ReplaceNode(N, CurDAG->getMachineNode(
2661  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2662  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2663  N->getOperand(0)));
2664  return true;
2665  }
2666 
2667  // Use the sysreg mapper to map the remaining possible strings to the
2668  // value for the register to be used for the instruction operand.
2669  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2670  if (TheReg && TheReg->Readable &&
2671  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2672  Reg = TheReg->Encoding;
2673  else
2674  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2675 
2676  if (Reg != -1) {
2677  ReplaceNode(N, CurDAG->getMachineNode(
2678  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2679  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2680  N->getOperand(0)));
2681  return true;
2682  }
2683 
2684  if (RegString->getString() == "pc") {
2685  ReplaceNode(N, CurDAG->getMachineNode(
2687  CurDAG->getTargetConstant(0, DL, MVT::i32),
2688  N->getOperand(0)));
2689  return true;
2690  }
2691 
2692  return false;
2693 }
2694 
2695 // Lower the write_register intrinsic to an MSR instruction node if the special
2696 // register string argument is either of the form detailed in the ALCE (the
2697 // form described in getIntOperandsFromRegsterString) or is a named register
2698 // known by the MSR SysReg mapper.
2699 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
2700  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2701  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2702  SDLoc DL(N);
2703 
2704  int Reg = getIntOperandFromRegisterString(RegString->getString());
2705  if (Reg != -1) {
2706  ReplaceNode(
2707  N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2708  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2709  N->getOperand(2), N->getOperand(0)));
2710  return true;
2711  }
2712 
2713  // Check if the register was one of those allowed as the pstatefield value in
2714  // the MSR (immediate) instruction. To accept the values allowed in the
2715  // pstatefield for the MSR (immediate) instruction, we also require that an
2716  // immediate value has been provided as an argument, we know that this is
2717  // the case as it has been ensured by semantic checking.
2718  auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
2719  if (PMapper) {
2720  assert (isa<ConstantSDNode>(N->getOperand(2))
2721  && "Expected a constant integer expression.");
2722  unsigned Reg = PMapper->Encoding;
2723  uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
2724  unsigned State;
2725  if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) {
2726  assert(Immed < 2 && "Bad imm");
2727  State = AArch64::MSRpstateImm1;
2728  } else {
2729  assert(Immed < 16 && "Bad imm");
2730  State = AArch64::MSRpstateImm4;
2731  }
2732  ReplaceNode(N, CurDAG->getMachineNode(
2733  State, DL, MVT::Other,
2734  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2735  CurDAG->getTargetConstant(Immed, DL, MVT::i16),
2736  N->getOperand(0)));
2737  return true;
2738  }
2739 
2740  // Use the sysreg mapper to attempt to map the remaining possible strings
2741  // to the value for the register to be used for the MSR (register)
2742  // instruction operand.
2743  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2744  if (TheReg && TheReg->Writeable &&
2745  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2746  Reg = TheReg->Encoding;
2747  else
2748  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2749  if (Reg != -1) {
2750  ReplaceNode(N, CurDAG->getMachineNode(
2751  AArch64::MSR, DL, MVT::Other,
2752  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2753  N->getOperand(2), N->getOperand(0)));
2754  return true;
2755  }
2756 
2757  return false;
2758 }
2759 
2760 /// We've got special pseudo-instructions for these
2761 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2762  unsigned Opcode;
2763  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2764 
2765  // Leave IR for LSE if subtarget supports it.
2766  if (Subtarget->hasLSE()) return false;
2767 
2768  if (MemTy == MVT::i8)
2769  Opcode = AArch64::CMP_SWAP_8;
2770  else if (MemTy == MVT::i16)
2771  Opcode = AArch64::CMP_SWAP_16;
2772  else if (MemTy == MVT::i32)
2773  Opcode = AArch64::CMP_SWAP_32;
2774  else if (MemTy == MVT::i64)
2775  Opcode = AArch64::CMP_SWAP_64;
2776  else
2777  llvm_unreachable("Unknown AtomicCmpSwap type");
2778 
2779  MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
2780  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2781  N->getOperand(0)};
2782  SDNode *CmpSwap = CurDAG->getMachineNode(
2783  Opcode, SDLoc(N),
2784  CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
2785 
2786  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2787  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2788 
2789  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2790  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2791  CurDAG->RemoveDeadNode(N);
2792 
2793  return true;
2794 }
2795 
2796 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
2797  // tagp(FrameIndex, IRGstack, tag_offset):
2798  // since the offset between FrameIndex and IRGstack is a compile-time
2799  // constant, this can be lowered to a single ADDG instruction.
2800  if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
2801  return false;
2802  }
2803 
2804  SDValue IRG_SP = N->getOperand(2);
2805  if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
2806  cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
2807  Intrinsic::aarch64_irg_sp) {
2808  return false;
2809  }
2810 
2811  const TargetLowering *TLI = getTargetLowering();
2812  SDLoc DL(N);
2813  int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
2814  SDValue FiOp = CurDAG->getTargetFrameIndex(
2815  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2816  int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2817 
2818  SDNode *Out = CurDAG->getMachineNode(
2819  AArch64::TAGPstack, DL, MVT::i64,
2820  {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
2821  CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
2822  ReplaceNode(N, Out);
2823  return true;
2824 }
2825 
2826 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
2827  assert(isa<ConstantSDNode>(N->getOperand(3)) &&
2828  "llvm.aarch64.tagp third argument must be an immediate");
2829  if (trySelectStackSlotTagP(N))
2830  return;
2831  // FIXME: above applies in any case when offset between Op1 and Op2 is a
2832  // compile-time constant, not just for stack allocations.
2833 
2834  // General case for unrelated pointers in Op1 and Op2.
2835  SDLoc DL(N);
2836  int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2837  SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
2838  {N->getOperand(1), N->getOperand(2)});
2839  SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
2840  {SDValue(N1, 0), N->getOperand(2)});
2841  SDNode *N3 = CurDAG->getMachineNode(
2842  AArch64::ADDG, DL, MVT::i64,
2843  {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
2844  CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
2845  ReplaceNode(N, N3);
2846 }
2847 
2848 void AArch64DAGToDAGISel::Select(SDNode *Node) {
2849  // If we have a custom node, we already have selected!
2850  if (Node->isMachineOpcode()) {
2851  LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
2852  Node->setNodeId(-1);
2853  return;
2854  }
2855 
2856  // Few custom selection stuff.
2857  EVT VT = Node->getValueType(0);
2858 
2859  switch (Node->getOpcode()) {
2860  default:
2861  break;
2862 
2863  case ISD::ATOMIC_CMP_SWAP:
2864  if (SelectCMP_SWAP(Node))
2865  return;
2866  break;
2867 
2868  case ISD::READ_REGISTER:
2869  if (tryReadRegister(Node))
2870  return;
2871  break;
2872 
2873  case ISD::WRITE_REGISTER:
2874  if (tryWriteRegister(Node))
2875  return;
2876  break;
2877 
2878  case ISD::ADD:
2879  if (tryMLAV64LaneV128(Node))
2880  return;
2881  break;
2882 
2883  case ISD::LOAD: {
2884  // Try to select as an indexed load. Fall through to normal processing
2885  // if we can't.
2886  if (tryIndexedLoad(Node))
2887  return;
2888  break;
2889  }
2890 
2891  case ISD::SRL:
2892  case ISD::AND:
2893  case ISD::SRA:
2895  if (tryBitfieldExtractOp(Node))
2896  return;
2897  if (tryBitfieldInsertInZeroOp(Node))
2898  return;
2900  case ISD::ROTR:
2901  case ISD::SHL:
2902  if (tryShiftAmountMod(Node))
2903  return;
2904  break;
2905 
2906  case ISD::SIGN_EXTEND:
2907  if (tryBitfieldExtractOpFromSExt(Node))
2908  return;
2909  break;
2910 
2911  case ISD::OR:
2912  if (tryBitfieldInsertOp(Node))
2913  return;
2914  break;
2915 
2916  case ISD::EXTRACT_VECTOR_ELT: {
2917  // Extracting lane zero is a special case where we can just use a plain
2918  // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
2919  // the rest of the compiler, especially the register allocator and copyi
2920  // propagation, to reason about, so is preferred when it's possible to
2921  // use it.
2922  ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
2923  // Bail and use the default Select() for non-zero lanes.
2924  if (LaneNode->getZExtValue() != 0)
2925  break;
2926  // If the element type is not the same as the result type, likewise
2927  // bail and use the default Select(), as there's more to do than just
2928  // a cross-class COPY. This catches extracts of i8 and i16 elements
2929  // since they will need an explicit zext.
2930  if (VT != Node->getOperand(0).getValueType().getVectorElementType())
2931  break;
2932  unsigned SubReg;
2933  switch (Node->getOperand(0)
2934  .getValueType()
2936  .getSizeInBits()) {
2937  default:
2938  llvm_unreachable("Unexpected vector element type!");
2939  case 64:
2940  SubReg = AArch64::dsub;
2941  break;
2942  case 32:
2943  SubReg = AArch64::ssub;
2944  break;
2945  case 16:
2946  SubReg = AArch64::hsub;
2947  break;
2948  case 8:
2949  llvm_unreachable("unexpected zext-requiring extract element!");
2950  }
2951  SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
2952  Node->getOperand(0));
2953  LLVM_DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
2954  LLVM_DEBUG(Extract->dumpr(CurDAG));
2955  LLVM_DEBUG(dbgs() << "\n");
2956  ReplaceNode(Node, Extract.getNode());
2957  return;
2958  }
2959  case ISD::Constant: {
2960  // Materialize zero constants as copies from WZR/XZR. This allows
2961  // the coalescer to propagate these into other instructions.
2962  ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
2963  if (ConstNode->isNullValue()) {
2964  if (VT == MVT::i32) {
2965  SDValue New = CurDAG->getCopyFromReg(
2966  CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
2967  ReplaceNode(Node, New.getNode());
2968  return;
2969  } else if (VT == MVT::i64) {
2970  SDValue New = CurDAG->getCopyFromReg(
2971  CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
2972  ReplaceNode(Node, New.getNode());
2973  return;
2974  }
2975  }
2976  break;
2977  }
2978 
2979  case ISD::FrameIndex: {
2980  // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
2981  int FI = cast<FrameIndexSDNode>(Node)->getIndex();
2982  unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
2983  const TargetLowering *TLI = getTargetLowering();
2984  SDValue TFI = CurDAG->getTargetFrameIndex(
2985  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2986  SDLoc DL(Node);
2987  SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
2988  CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
2989  CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
2990  return;
2991  }
2992  case ISD::INTRINSIC_W_CHAIN: {
2993  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2994  switch (IntNo) {
2995  default:
2996  break;
2997  case Intrinsic::aarch64_ldaxp:
2998  case Intrinsic::aarch64_ldxp: {
2999  unsigned Op =
3000  IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
3001  SDValue MemAddr = Node->getOperand(2);
3002  SDLoc DL(Node);
3003  SDValue Chain = Node->getOperand(0);
3004 
3005  SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
3006  MVT::Other, MemAddr, Chain);
3007 
3008  // Transfer memoperands.
3009  MachineMemOperand *MemOp =
3010  cast<MemIntrinsicSDNode>(Node)->getMemOperand();
3011  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3012  ReplaceNode(Node, Ld);
3013  return;
3014  }
3015  case Intrinsic::aarch64_stlxp:
3016  case Intrinsic::aarch64_stxp: {
3017  unsigned Op =
3018  IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
3019  SDLoc DL(Node);
3020  SDValue Chain = Node->getOperand(0);
3021  SDValue ValLo = Node->getOperand(2);
3022  SDValue ValHi = Node->getOperand(3);
3023  SDValue MemAddr = Node->getOperand(4);
3024 
3025  // Place arguments in the right order.
3026  SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
3027 
3028  SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
3029  // Transfer memoperands.
3030  MachineMemOperand *MemOp =
3031  cast<MemIntrinsicSDNode>(Node)->getMemOperand();
3032  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3033 
3034  ReplaceNode(Node, St);
3035  return;
3036  }
3037  case Intrinsic::aarch64_neon_ld1x2:
3038  if (VT == MVT::v8i8) {
3039  SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
3040  return;
3041  } else if (VT == MVT::v16i8) {
3042  SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
3043  return;
3044  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3045  SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
3046  return;
3047  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3048  SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
3049  return;
3050  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3051  SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
3052  return;
3053  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3054  SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
3055  return;
3056  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3057  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3058  return;
3059  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3060  SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
3061  return;
3062  }
3063  break;
3064  case Intrinsic::aarch64_neon_ld1x3:
3065  if (VT == MVT::v8i8) {
3066  SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
3067  return;
3068  } else if (VT == MVT::v16i8) {
3069  SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
3070  return;
3071  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3072  SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
3073  return;
3074  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3075  SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
3076  return;
3077  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3078  SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
3079  return;
3080  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3081  SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
3082  return;
3083  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3084  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3085  return;
3086  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3087  SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
3088  return;
3089  }
3090  break;
3091  case Intrinsic::aarch64_neon_ld1x4:
3092  if (VT == MVT::v8i8) {
3093  SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
3094  return;
3095  } else if (VT == MVT::v16i8) {
3096  SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
3097  return;
3098  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3099  SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
3100  return;
3101  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3102  SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
3103  return;
3104  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3105  SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
3106  return;
3107  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3108  SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
3109  return;
3110  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3111  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3112  return;
3113  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3114  SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
3115  return;
3116  }
3117  break;
3118  case Intrinsic::aarch64_neon_ld2:
3119  if (VT == MVT::v8i8) {
3120  SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
3121  return;
3122  } else if (VT == MVT::v16i8) {
3123  SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
3124  return;
3125  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3126  SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
3127  return;
3128  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3129  SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
3130  return;
3131  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3132  SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
3133  return;
3134  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3135  SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
3136  return;
3137  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3138  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3139  return;
3140  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3141  SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
3142  return;
3143  }
3144  break;
3145  case Intrinsic::aarch64_neon_ld3:
3146  if (VT == MVT::v8i8) {
3147  SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
3148  return;
3149  } else if (VT == MVT::v16i8) {
3150  SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
3151  return;
3152  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3153  SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
3154  return;
3155  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3156  SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
3157  return;
3158  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3159  SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
3160  return;
3161  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3162  SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
3163  return;
3164  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3165  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3166  return;
3167  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3168  SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
3169  return;
3170  }
3171  break;
3172  case Intrinsic::aarch64_neon_ld4:
3173  if (VT == MVT::v8i8) {
3174  SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
3175  return;
3176  } else if (VT == MVT::v16i8) {
3177  SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
3178  return;
3179  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3180  SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
3181  return;
3182  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3183  SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
3184  return;
3185  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3186  SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
3187  return;
3188  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3189  SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
3190  return;
3191  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3192  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3193  return;
3194  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3195  SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
3196  return;
3197  }
3198  break;
3199  case Intrinsic::aarch64_neon_ld2r:
3200  if (VT == MVT::v8i8) {
3201  SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
3202  return;
3203  } else if (VT == MVT::v16i8) {
3204  SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
3205  return;
3206  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3207  SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
3208  return;
3209  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3210  SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
3211  return;
3212  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3213  SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
3214  return;
3215  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3216  SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
3217  return;
3218  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3219  SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
3220  return;
3221  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3222  SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
3223  return;
3224  }
3225  break;
3226  case Intrinsic::aarch64_neon_ld3r:
3227  if (VT == MVT::v8i8) {
3228  SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
3229  return;
3230  } else if (VT == MVT::v16i8) {
3231  SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
3232  return;
3233  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3234  SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
3235  return;
3236  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3237  SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
3238  return;
3239  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3240  SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
3241  return;
3242  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3243  SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
3244  return;
3245  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3246  SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
3247  return;
3248  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3249  SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
3250  return;
3251  }
3252  break;
3253  case Intrinsic::aarch64_neon_ld4r:
3254  if (VT == MVT::v8i8) {
3255  SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
3256  return;
3257  } else if (VT == MVT::v16i8) {
3258  SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
3259  return;
3260  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3261  SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
3262  return;
3263  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3264  SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
3265  return;
3266  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3267  SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
3268  return;
3269  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3270  SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
3271  return;
3272  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3273  SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
3274  return;
3275  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3276  SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
3277  return;
3278  }
3279  break;
3280  case Intrinsic::aarch64_neon_ld2lane:
3281  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3282  SelectLoadLane(Node, 2, AArch64::LD2i8);
3283  return;
3284  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3285  VT == MVT::v8f16) {
3286  SelectLoadLane(Node, 2, AArch64::LD2i16);
3287  return;
3288  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3289  VT == MVT::v2f32) {
3290  SelectLoadLane(Node, 2, AArch64::LD2i32);
3291  return;
3292  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3293  VT == MVT::v1f64) {
3294  SelectLoadLane(Node, 2, AArch64::LD2i64);
3295  return;
3296  }
3297  break;
3298  case Intrinsic::aarch64_neon_ld3lane:
3299  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3300  SelectLoadLane(Node, 3, AArch64::LD3i8);
3301  return;
3302  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3303  VT == MVT::v8f16) {
3304  SelectLoadLane(Node, 3, AArch64::LD3i16);
3305  return;
3306  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3307  VT == MVT::v2f32) {
3308  SelectLoadLane(Node, 3, AArch64::LD3i32);
3309  return;
3310  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3311  VT == MVT::v1f64) {
3312  SelectLoadLane(Node, 3, AArch64::LD3i64);
3313  return;
3314  }
3315  break;
3316  case Intrinsic::aarch64_neon_ld4lane:
3317  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3318  SelectLoadLane(Node, 4, AArch64::LD4i8);
3319  return;
3320  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3321  VT == MVT::v8f16) {
3322  SelectLoadLane(Node, 4, AArch64::LD4i16);
3323  return;
3324  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3325  VT == MVT::v2f32) {
3326  SelectLoadLane(Node, 4, AArch64::LD4i32);
3327  return;
3328  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3329  VT == MVT::v1f64) {
3330  SelectLoadLane(Node, 4, AArch64::LD4i64);
3331  return;
3332  }
3333  break;
3334  }
3335  } break;
3336  case ISD::INTRINSIC_WO_CHAIN: {
3337  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
3338  switch (IntNo) {
3339  default:
3340  break;
3341  case Intrinsic::aarch64_tagp:
3342  SelectTagP(Node);
3343  return;
3344  case Intrinsic::aarch64_neon_tbl2:
3345  SelectTable(Node, 2,
3346  VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
3347  false);
3348  return;
3349  case Intrinsic::aarch64_neon_tbl3:
3350  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
3351  : AArch64::TBLv16i8Three,
3352  false);
3353  return;
3354  case Intrinsic::aarch64_neon_tbl4:
3355  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
3356  : AArch64::TBLv16i8Four,
3357  false);
3358  return;
3359  case Intrinsic::aarch64_neon_tbx2:
3360  SelectTable(Node, 2,
3361  VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
3362  true);
3363  return;
3364  case Intrinsic::aarch64_neon_tbx3:
3365  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
3366  : AArch64::TBXv16i8Three,
3367  true);
3368  return;
3369  case Intrinsic::aarch64_neon_tbx4:
3370  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
3371  : AArch64::TBXv16i8Four,
3372  true);
3373  return;
3374  case Intrinsic::aarch64_neon_smull:
3375  case Intrinsic::aarch64_neon_umull:
3376  if (tryMULLV64LaneV128(IntNo, Node))
3377  return;
3378  break;
3379  }
3380  break;
3381  }
3382  case ISD::INTRINSIC_VOID: {
3383  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3384  if (Node->getNumOperands() >= 3)
3385  VT = Node->getOperand(2)->getValueType(0);
3386  switch (IntNo) {
3387  default:
3388  break;
3389  case Intrinsic::aarch64_neon_st1x2: {
3390  if (VT == MVT::v8i8) {
3391  SelectStore(Node, 2, AArch64::ST1Twov8b);
3392  return;
3393  } else if (VT == MVT::v16i8) {
3394  SelectStore(Node, 2, AArch64::ST1Twov16b);
3395  return;
3396  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3397  SelectStore(Node, 2, AArch64::ST1Twov4h);
3398  return;
3399  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3400  SelectStore(Node, 2, AArch64::ST1Twov8h);
3401  return;
3402  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3403  SelectStore(Node, 2, AArch64::ST1Twov2s);
3404  return;
3405  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3406  SelectStore(Node, 2, AArch64::ST1Twov4s);
3407  return;
3408  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3409  SelectStore(Node, 2, AArch64::ST1Twov2d);
3410  return;
3411  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3412  SelectStore(Node, 2, AArch64::ST1Twov1d);
3413  return;
3414  }
3415  break;
3416  }
3417  case Intrinsic::aarch64_neon_st1x3: {
3418  if (VT == MVT::v8i8) {
3419  SelectStore(Node, 3, AArch64::ST1Threev8b);
3420  return;
3421  } else if (VT == MVT::v16i8) {
3422  SelectStore(Node, 3, AArch64::ST1Threev16b);
3423  return;
3424  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3425  SelectStore(Node, 3, AArch64::ST1Threev4h);
3426  return;
3427  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3428  SelectStore(Node, 3, AArch64::ST1Threev8h);
3429  return;
3430  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3431  SelectStore(Node, 3, AArch64::ST1Threev2s);
3432  return;
3433  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3434  SelectStore(Node, 3, AArch64::ST1Threev4s);
3435  return;
3436  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3437  SelectStore(Node, 3, AArch64::ST1Threev2d);
3438  return;
3439  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3440  SelectStore(Node, 3, AArch64::ST1Threev1d);
3441  return;
3442  }
3443  break;
3444  }
3445  case Intrinsic::aarch64_neon_st1x4: {
3446  if (VT == MVT::v8i8) {
3447  SelectStore(Node, 4, AArch64::ST1Fourv8b);
3448  return;
3449  } else if (VT == MVT::v16i8) {
3450  SelectStore(Node, 4, AArch64::ST1Fourv16b);
3451  return;
3452  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3453  SelectStore(Node, 4, AArch64::ST1Fourv4h);
3454  return;
3455  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3456  SelectStore(Node, 4, AArch64::ST1Fourv8h);
3457  return;
3458  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3459  SelectStore(Node, 4, AArch64::ST1Fourv2s);
3460  return;
3461  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3462  SelectStore(Node, 4, AArch64::ST1Fourv4s);
3463  return;
3464  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3465  SelectStore(Node, 4, AArch64::ST1Fourv2d);
3466  return;
3467  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3468  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3469  return;
3470  }
3471  break;
3472  }
3473  case Intrinsic::aarch64_neon_st2: {
3474  if (VT == MVT::v8i8) {
3475  SelectStore(Node, 2, AArch64::ST2Twov8b);
3476  return;
3477  } else if (VT == MVT::v16i8) {
3478  SelectStore(Node, 2, AArch64::ST2Twov16b);
3479  return;
3480  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3481  SelectStore(Node, 2, AArch64::ST2Twov4h);
3482  return;
3483  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3484  SelectStore(Node, 2, AArch64::ST2Twov8h);
3485  return;
3486  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3487  SelectStore(Node, 2, AArch64::ST2Twov2s);
3488  return;
3489  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3490  SelectStore(Node, 2, AArch64::ST2Twov4s);
3491  return;
3492  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3493  SelectStore(Node, 2, AArch64::ST2Twov2d);
3494  return;
3495  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3496  SelectStore(Node, 2, AArch64::ST1Twov1d);
3497  return;
3498  }
3499  break;
3500  }
3501  case Intrinsic::aarch64_neon_st3: {
3502  if (VT == MVT::v8i8) {
3503  SelectStore(Node, 3, AArch64::ST3Threev8b);
3504  return;
3505  } else if (VT == MVT::v16i8) {
3506  SelectStore(Node, 3, AArch64::ST3Threev16b);
3507  return;
3508  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3509  SelectStore(Node, 3, AArch64::ST3Threev4h);
3510  return;
3511  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3512  SelectStore(Node, 3, AArch64::ST3Threev8h);
3513  return;
3514  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3515  SelectStore(Node, 3, AArch64::ST3Threev2s);
3516  return;
3517  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3518  SelectStore(Node, 3, AArch64::ST3Threev4s);
3519  return;
3520  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3521  SelectStore(Node, 3, AArch64::ST3Threev2d);
3522  return;
3523  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3524  SelectStore(Node, 3, AArch64::ST1Threev1d);
3525  return;
3526  }
3527  break;
3528  }
3529  case Intrinsic::aarch64_neon_st4: {
3530  if (VT == MVT::v8i8) {
3531  SelectStore(Node, 4, AArch64::ST4Fourv8b);
3532  return;
3533  } else if (VT == MVT::v16i8) {
3534  SelectStore(Node, 4, AArch64::ST4Fourv16b);
3535  return;
3536  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3537  SelectStore(Node, 4, AArch64::ST4Fourv4h);
3538  return;
3539  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3540  SelectStore(Node, 4, AArch64::ST4Fourv8h);
3541  return;
3542  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3543  SelectStore(Node, 4, AArch64::ST4Fourv2s);
3544  return;
3545  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3546  SelectStore(Node, 4, AArch64::ST4Fourv4s);
3547  return;
3548  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3549  SelectStore(Node, 4, AArch64::ST4Fourv2d);
3550  return;
3551  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3552  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3553  return;
3554  }
3555  break;
3556  }
3557  case Intrinsic::aarch64_neon_st2lane: {
3558  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3559  SelectStoreLane(Node, 2, AArch64::ST2i8);
3560  return;
3561  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3562  VT == MVT::v8f16) {
3563  SelectStoreLane(Node, 2, AArch64::ST2i16);
3564  return;
3565  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3566  VT == MVT::v2f32) {
3567  SelectStoreLane(Node, 2, AArch64::ST2i32);
3568  return;
3569  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3570  VT == MVT::v1f64) {
3571  SelectStoreLane(Node, 2, AArch64::ST2i64);
3572  return;
3573  }
3574  break;
3575  }
3576  case Intrinsic::aarch64_neon_st3lane: {
3577  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3578  SelectStoreLane(Node, 3, AArch64::ST3i8);
3579  return;
3580  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3581  VT == MVT::v8f16) {
3582  SelectStoreLane(Node, 3, AArch64::ST3i16);
3583  return;
3584  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3585  VT == MVT::v2f32) {
3586  SelectStoreLane(Node, 3, AArch64::ST3i32);
3587  return;
3588  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3589  VT == MVT::v1f64) {
3590  SelectStoreLane(Node, 3, AArch64::ST3i64);
3591  return;
3592  }
3593  break;
3594  }
3595  case Intrinsic::aarch64_neon_st4lane: {
3596  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3597  SelectStoreLane(Node, 4, AArch64::ST4i8);
3598  return;
3599  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3600  VT == MVT::v8f16) {
3601  SelectStoreLane(Node, 4, AArch64::ST4i16);
3602  return;
3603  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3604  VT == MVT::v2f32) {
3605  SelectStoreLane(Node, 4, AArch64::ST4i32);
3606  return;
3607  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3608  VT == MVT::v1f64) {
3609  SelectStoreLane(Node, 4, AArch64::ST4i64);
3610  return;
3611  }
3612  break;
3613  }
3614  }
3615  break;
3616  }
3617  case AArch64ISD::LD2post: {
3618  if (VT == MVT::v8i8) {
3619  SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
3620  return;
3621  } else if (VT == MVT::v16i8) {
3622  SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
3623  return;
3624  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3625  SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
3626  return;
3627  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3628  SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
3629  return;
3630  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3631  SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
3632  return;
3633  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3634  SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
3635  return;
3636  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3637  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3638  return;
3639  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3640  SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
3641  return;
3642  }
3643  break;
3644  }
3645  case AArch64ISD::LD3post: {
3646  if (VT == MVT::v8i8) {
3647  SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
3648  return;
3649  } else if (VT == MVT::v16i8) {
3650  SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
3651  return;
3652  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3653  SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
3654  return;
3655  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3656  SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
3657  return;
3658  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3659  SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
3660  return;
3661  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3662  SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
3663  return;
3664  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3665  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3666  return;
3667  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3668  SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
3669  return;
3670  }
3671  break;
3672  }
3673  case AArch64ISD::LD4post: {
3674  if (VT == MVT::v8i8) {
3675  SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
3676  return;
3677  } else if (VT == MVT::v16i8) {
3678  SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
3679  return;
3680  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3681  SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
3682  return;
3683  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3684  SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
3685  return;
3686  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3687  SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
3688  return;
3689  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3690  SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
3691  return;
3692  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3693  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3694  return;
3695  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3696  SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
3697  return;
3698  }
3699  break;
3700  }
3701  case AArch64ISD::LD1x2post: {
3702  if (VT == MVT::v8i8) {
3703  SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
3704  return;
3705  } else if (VT == MVT::v16i8) {
3706  SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
3707  return;
3708  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3709  SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
3710  return;
3711  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3712  SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
3713  return;
3714  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3715  SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
3716  return;
3717  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3718  SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
3719  return;
3720  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3721  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3722  return;
3723  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3724  SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
3725  return;
3726  }
3727  break;
3728  }
3729  case AArch64ISD::LD1x3post: {
3730  if (VT == MVT::v8i8) {
3731  SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
3732  return;
3733  } else if (VT == MVT::v16i8) {
3734  SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
3735  return;
3736  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3737  SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
3738  return;
3739  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3740  SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
3741  return;
3742  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3743  SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
3744  return;
3745  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3746  SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
3747  return;
3748  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3749  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3750  return;
3751  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3752  SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
3753  return;
3754  }
3755  break;
3756  }
3757  case AArch64ISD::LD1x4post: {
3758  if (VT == MVT::v8i8) {
3759  SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
3760  return;
3761  } else if (VT == MVT::v16i8) {
3762  SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
3763  return;
3764  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3765  SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
3766  return;
3767  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3768  SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
3769  return;
3770  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3771  SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
3772  return;
3773  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3774  SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
3775  return;
3776  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3777  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3778  return;
3779  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3780  SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
3781  return;
3782  }
3783  break;
3784  }
3785  case AArch64ISD::LD1DUPpost: {
3786  if (VT == MVT::v8i8) {
3787  SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
3788  return;
3789  } else if (VT == MVT::v16i8) {
3790  SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
3791  return;
3792  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3793  SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
3794  return;
3795  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3796  SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
3797  return;
3798  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3799  SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
3800  return;
3801  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3802  SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
3803  return;
3804  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3805  SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
3806  return;
3807  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3808  SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
3809  return;
3810  }
3811  break;
3812  }
3813  case AArch64ISD::LD2DUPpost: {
3814  if (VT == MVT::v8i8) {
3815  SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
3816  return;
3817  } else if (VT == MVT::v16i8) {
3818  SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
3819  return;
3820  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3821  SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
3822  return;
3823  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3824  SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
3825  return;
3826  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3827  SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
3828  return;
3829  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3830  SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
3831  return;
3832  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3833  SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
3834  return;
3835  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3836  SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
3837  return;
3838  }
3839  break;
3840  }
3841  case AArch64ISD::LD3DUPpost: {
3842  if (VT == MVT::v8i8) {
3843  SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
3844  return;
3845  } else if (VT == MVT::v16i8) {
3846  SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
3847  return;
3848  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3849  SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
3850  return;
3851  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3852  SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
3853  return;
3854  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3855  SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
3856  return;
3857  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3858  SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
3859  return;
3860  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3861  SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
3862  return;
3863  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3864  SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
3865  return;
3866  }
3867  break;
3868  }
3869  case AArch64ISD::LD4DUPpost: {
3870  if (VT == MVT::v8i8) {
3871  SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
3872  return;
3873  } else if (VT == MVT::v16i8) {
3874  SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
3875  return;
3876  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3877  SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
3878  return;
3879  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3880  SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
3881  return;
3882  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3883  SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
3884  return;
3885  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3886  SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
3887  return;
3888  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3889  SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
3890  return;
3891  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3892  SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
3893  return;
3894  }
3895  break;
3896  }
3897  case AArch64ISD::LD1LANEpost: {
3898  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3899  SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
3900  return;
3901  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3902  VT == MVT::v8f16) {
3903  SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
3904  return;
3905  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3906  VT == MVT::v2f32) {
3907  SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
3908  return;
3909  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3910  VT == MVT::v1f64) {
3911  SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
3912  return;
3913  }
3914  break;
3915  }
3916  case AArch64ISD::LD2LANEpost: {
3917  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3918  SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
3919  return;
3920  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3921  VT == MVT::v8f16) {
3922  SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
3923  return;
3924  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3925  VT == MVT::v2f32) {
3926  SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
3927  return;
3928  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3929  VT == MVT::v1f64) {
3930  SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
3931  return;
3932  }
3933  break;
3934  }
3935  case AArch64ISD::LD3LANEpost: {
3936  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3937  SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
3938  return;
3939  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3940  VT == MVT::v8f16) {
3941  SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
3942  return;
3943  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3944  VT == MVT::v2f32) {
3945  SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
3946  return;
3947  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3948  VT == MVT::v1f64) {
3949  SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
3950  return;
3951  }
3952  break;
3953  }
3954  case AArch64ISD::LD4LANEpost: {
3955  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3956  SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
3957  return;
3958  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3959  VT == MVT::v8f16) {
3960  SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
3961  return;
3962  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3963  VT == MVT::v2f32) {
3964  SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
3965  return;
3966  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3967  VT == MVT::v1f64) {
3968  SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
3969  return;
3970  }
3971  break;
3972  }
3973  case AArch64ISD::ST2post: {
3974  VT = Node->getOperand(1).getValueType();
3975  if (VT == MVT::v8i8) {
3976  SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
3977  return;
3978  } else if (VT == MVT::v16i8) {
3979  SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
3980  return;
3981  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3982  SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
3983  return;
3984  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3985  SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
3986  return;
3987  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3988  SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
3989  return;
3990  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3991  SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
3992  return;
3993  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3994  SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
3995  return;
3996  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3997  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3998  return;
3999  }
4000  break;
4001  }
4002  case AArch64ISD::ST3post: {
4003  VT = Node->getOperand(1).getValueType();
4004  if (VT == MVT::v8i8) {
4005  SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
4006  return;
4007  } else if (VT == MVT::v16i8) {
4008  SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
4009  return;
4010  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4011  SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
4012  return;
4013  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4014  SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
4015  return;
4016  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4017  SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
4018  return;
4019  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4020  SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
4021  return;
4022  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4023  SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
4024  return;
4025  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4026  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4027  return;
4028  }
4029  break;
4030  }
4031  case AArch64ISD::ST4post: {
4032  VT = Node->getOperand(1).getValueType();
4033  if (VT == MVT::v8i8) {
4034  SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
4035  return;
4036  } else if (VT == MVT::v16i8) {
4037  SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
4038  return;
4039  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4040  SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
4041  return;
4042  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4043  SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
4044  return;
4045  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4046  SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
4047  return;
4048  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4049  SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
4050  return;
4051  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4052  SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
4053  return;
4054  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4055  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4056  return;
4057  }
4058  break;
4059  }
4060  case AArch64ISD::ST1x2post: {
4061  VT = Node->getOperand(1).getValueType();
4062  if (VT == MVT::v8i8) {
4063  SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
4064  return;
4065  } else if (VT == MVT::v16i8) {
4066  SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
4067  return;
4068  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4069  SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
4070  return;
4071  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4072  SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
4073  return;
4074  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4075  SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
4076  return;
4077  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4078  SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
4079  return;
4080  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4081  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
4082  return;
4083  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4084  SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
4085  return;
4086  }
4087  break;
4088  }
4089  case AArch64ISD::ST1x3post: {
4090  VT = Node->getOperand(1).getValueType();
4091  if (VT == MVT::v8i8) {
4092  SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
4093  return;
4094  } else if (VT == MVT::v16i8) {
4095  SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
4096  return;
4097  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4098  SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
4099  return;
4100  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4101  SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
4102  return;
4103  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4104  SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
4105  return;
4106  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4107  SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
4108  return;
4109  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4110  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4111  return;
4112  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4113  SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
4114  return;
4115  }
4116  break;
4117  }
4118  case AArch64ISD::ST1x4post: {
4119  VT = Node->getOperand(1).getValueType();
4120  if (VT == MVT::v8i8) {
4121  SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
4122  return;
4123  } else if (VT == MVT::v16i8) {
4124  SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
4125  return;
4126  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4127  SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
4128  return;
4129  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4130  SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
4131  return;
4132  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4133  SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
4134  return;
4135  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4136  SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
4137  return;
4138  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4139  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4140  return;
4141  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4142  SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
4143  return;
4144  }
4145  break;
4146  }
4147  case AArch64ISD::ST2LANEpost: {
4148  VT = Node->getOperand(1).getValueType();
4149  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4150  SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
4151  return;
4152  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4153  VT == MVT::v8f16) {
4154  SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
4155  return;
4156  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4157  VT == MVT::v2f32) {
4158  SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
4159  return;
4160  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4161  VT == MVT::v1f64) {
4162  SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
4163  return;
4164  }
4165  break;
4166  }
4167  case AArch64ISD::ST3LANEpost: {
4168  VT = Node->getOperand(1).getValueType();
4169  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4170  SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
4171  return;
4172  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4173  VT == MVT::v8f16) {
4174  SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
4175  return;
4176  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4177  VT == MVT::v2f32) {
4178  SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
4179  return;
4180  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4181  VT == MVT::v1f64) {
4182  SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
4183  return;
4184  }
4185  break;
4186  }
4187  case AArch64ISD::ST4LANEpost: {
4188  VT = Node->getOperand(1).getValueType();
4189  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4190  SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
4191  return;
4192  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4193  VT == MVT::v8f16) {
4194  SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
4195  return;
4196  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4197  VT == MVT::v2f32) {
4198  SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
4199  return;
4200  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4201  VT == MVT::v1f64) {
4202  SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
4203  return;
4204  }
4205  break;
4206  }
4207  }
4208 
4209  // Select the default instruction
4210  SelectCode(Node);
4211 }
4212 
4213 /// createAArch64ISelDag - This pass converts a legalized DAG into a
4214 /// AArch64-specific DAG, ready for instruction scheduling.
4216  CodeGenOpt::Level OptLevel) {
4217  return new AArch64DAGToDAGISel(TM, OptLevel);
4218 }
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1451
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
uint64_t CallInst * C
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &ShiftAmount, int &MaskWidth)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL...
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOffset() const
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1483
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1569
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
DiagnosticInfoOptimizationBase::Argument NV
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:382
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:264
iterator begin() const
Definition: ArrayRef.h:136
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:622
unsigned countLeadingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the most significant bit to the first zero bit.
Definition: MathExtras.h:461
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isWorthFoldingADDlow(SDValue N)
If there&#39;s a use of this ADDlow that&#39;s not itself a load/store then we&#39;ll need to create a real ADD i...
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
const SDValue & getChain() const
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:878
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:377
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:39
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1515
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1079
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1638
A description of a memory reference used in the backend.
static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, SDValue &LaneOp, int &LaneIdx)
static bool isPreferredADD(int64_t ImmOff)
Shift and rotation operations.
Definition: ISDOpcodes.h:434
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
unsigned SubReg
const MDNode * getMD() const
unsigned getScalarValueSizeInBits() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:977
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
int64_t getSExtValue() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:410
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
op_iterator op_begin() const
unsigned getAlignment() const
Definition: Globals.cpp:97
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:592
bool isStrongerThanMonotonic(AtomicOrdering ao)
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:150
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:165
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:84
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1664
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:987
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32...
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:827
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Extended Value Type.
Definition: ValueTypes.h:33
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:52
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
void dump() const
Dump this node, for debugging.
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:363
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:696
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:752
An SDNode that represents everything that will be needed to construct a MachineInstr.
const Constant * getConstVal() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
unsigned logBase2() const
Definition: APInt.h:1754
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< use_iterator > uses()
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:463
const SysReg * lookupSysRegByName(StringRef)
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:492
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:495
uint32_t parseGenericRegister(StringRef Name)
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOpt::Level OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG...
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:606
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:176
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:422
static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx)
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:411
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:181
static int getIntOperandFromRegisterString(StringRef RegString)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:510
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:642
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted, suitable for use in a BFI instruction.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
Type * getValueType() const
Definition: GlobalValue.h:279
uint32_t Size
Definition: Profile.cpp:46
unsigned getOpcode() const
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1259
static bool isShiftedMask(uint64_t Mask, EVT VT)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void dumpr() const
Dump (recursively) this node and its use-def subgraph.
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
unsigned countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:477
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:258
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:416
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A single uniqued string.
Definition: Metadata.h:603
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1602
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Conversion operators.
Definition: ISDOpcodes.h:489
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:820
uint64_t getZExtValue() const
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:498
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:816
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
virtual const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const
Returns a TargetRegisterClass used for pointer values.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:950
This class is used to represent ISD::LOAD nodes.