LLVM  10.0.0svn
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the AArch64 target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64TargetMachine.h"
15 #include "llvm/ADT/APSInt.h"
17 #include "llvm/IR/Function.h" // To access function attributes.
18 #include "llvm/IR/GlobalValue.h"
19 #include "llvm/IR/Intrinsics.h"
20 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/KnownBits.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "aarch64-isel"
29 
30 //===--------------------------------------------------------------------===//
31 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
32 /// instructions for SelectionDAG operations.
33 ///
34 namespace {
35 
36 class AArch64DAGToDAGISel : public SelectionDAGISel {
37 
38  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
39  /// make the right decision when generating code for different targets.
40  const AArch64Subtarget *Subtarget;
41 
42  bool ForCodeSize;
43 
44 public:
45  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
46  CodeGenOpt::Level OptLevel)
47  : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
48  ForCodeSize(false) {}
49 
50  StringRef getPassName() const override {
51  return "AArch64 Instruction Selection";
52  }
53 
54  bool runOnMachineFunction(MachineFunction &MF) override {
55  ForCodeSize = MF.getFunction().hasOptSize();
56  Subtarget = &MF.getSubtarget<AArch64Subtarget>();
58  }
59 
60  void Select(SDNode *Node) override;
61 
62  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
63  /// inline asm expressions.
64  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
65  unsigned ConstraintID,
66  std::vector<SDValue> &OutOps) override;
67 
68  bool tryMLAV64LaneV128(SDNode *N);
69  bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
70  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
71  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
72  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
74  return SelectShiftedRegister(N, false, Reg, Shift);
75  }
76  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77  return SelectShiftedRegister(N, true, Reg, Shift);
78  }
79  bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
80  return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
81  }
82  bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
83  return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
84  }
85  bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
86  return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
87  }
88  bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
89  return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
90  }
91  bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
92  return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
93  }
94  bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
95  return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
96  }
97  bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
98  return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
99  }
100  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
101  return SelectAddrModeIndexed(N, 1, Base, OffImm);
102  }
103  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
104  return SelectAddrModeIndexed(N, 2, Base, OffImm);
105  }
106  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
107  return SelectAddrModeIndexed(N, 4, Base, OffImm);
108  }
109  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
110  return SelectAddrModeIndexed(N, 8, Base, OffImm);
111  }
112  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
113  return SelectAddrModeIndexed(N, 16, Base, OffImm);
114  }
115  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
116  return SelectAddrModeUnscaled(N, 1, Base, OffImm);
117  }
118  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
119  return SelectAddrModeUnscaled(N, 2, Base, OffImm);
120  }
121  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
122  return SelectAddrModeUnscaled(N, 4, Base, OffImm);
123  }
124  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
125  return SelectAddrModeUnscaled(N, 8, Base, OffImm);
126  }
127  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
128  return SelectAddrModeUnscaled(N, 16, Base, OffImm);
129  }
130 
131  template<int Width>
132  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
133  SDValue &SignExtend, SDValue &DoShift) {
134  return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
135  }
136 
137  template<int Width>
138  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
139  SDValue &SignExtend, SDValue &DoShift) {
140  return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
141  }
142 
143 
144  /// Form sequences of consecutive 64/128-bit registers for use in NEON
145  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
146  /// between 1 and 4 elements. If it contains a single element that is returned
147  /// unchanged; otherwise a REG_SEQUENCE value is returned.
148  SDValue createDTuple(ArrayRef<SDValue> Vecs);
149  SDValue createQTuple(ArrayRef<SDValue> Vecs);
150 
151  /// Generic helper for the createDTuple/createQTuple
152  /// functions. Those should almost always be called instead.
153  SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
154  const unsigned SubRegs[]);
155 
156  void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
157 
158  bool tryIndexedLoad(SDNode *N);
159 
160  bool trySelectStackSlotTagP(SDNode *N);
161  void SelectTagP(SDNode *N);
162 
163  void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
164  unsigned SubRegIdx);
165  void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
166  unsigned SubRegIdx);
167  void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
168  void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
169 
170  void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
171  void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
172  void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
173  void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
174 
175  bool tryBitfieldExtractOp(SDNode *N);
176  bool tryBitfieldExtractOpFromSExt(SDNode *N);
177  bool tryBitfieldInsertOp(SDNode *N);
178  bool tryBitfieldInsertInZeroOp(SDNode *N);
179  bool tryShiftAmountMod(SDNode *N);
180 
181  bool tryReadRegister(SDNode *N);
182  bool tryWriteRegister(SDNode *N);
183 
184 // Include the pieces autogenerated from the target description.
185 #include "AArch64GenDAGISel.inc"
186 
187 private:
188  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
189  SDValue &Shift);
190  bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
191  SDValue &OffImm) {
192  return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
193  }
194  bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
195  unsigned Size, SDValue &Base,
196  SDValue &OffImm);
197  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
198  SDValue &OffImm);
199  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
200  SDValue &OffImm);
201  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
202  SDValue &Offset, SDValue &SignExtend,
203  SDValue &DoShift);
204  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
205  SDValue &Offset, SDValue &SignExtend,
206  SDValue &DoShift);
207  bool isWorthFolding(SDValue V) const;
208  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
209  SDValue &Offset, SDValue &SignExtend);
210 
211  template<unsigned RegWidth>
212  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
213  return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
214  }
215 
216  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
217 
218  bool SelectCMP_SWAP(SDNode *N);
219 
220 };
221 } // end anonymous namespace
222 
223 /// isIntImmediate - This method tests to see if the node is a constant
224 /// operand. If so Imm will receive the 32-bit value.
225 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
226  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
227  Imm = C->getZExtValue();
228  return true;
229  }
230  return false;
231 }
232 
233 // isIntImmediate - This method tests to see if a constant operand.
234 // If so Imm will receive the value.
235 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
236  return isIntImmediate(N.getNode(), Imm);
237 }
238 
239 // isOpcWithIntImmediate - This method tests to see if the node is a specific
240 // opcode and that it has a immediate integer right operand.
241 // If so Imm will receive the 32 bit value.
242 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
243  uint64_t &Imm) {
244  return N->getOpcode() == Opc &&
245  isIntImmediate(N->getOperand(1).getNode(), Imm);
246 }
247 
248 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
249  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
250  switch(ConstraintID) {
251  default:
252  llvm_unreachable("Unexpected asm memory constraint");
256  // We need to make sure that this one operand does not end up in XZR, thus
257  // require the address to be in a PointerRegClass register.
258  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
259  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
260  SDLoc dl(Op);
261  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
262  SDValue NewOp =
263  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
264  dl, Op.getValueType(),
265  Op, RC), 0);
266  OutOps.push_back(NewOp);
267  return false;
268  }
269  return true;
270 }
271 
272 /// SelectArithImmed - Select an immediate value that can be represented as
273 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
274 /// Val set to the 12-bit value and Shift set to the shifter operand.
275 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
276  SDValue &Shift) {
277  // This function is called from the addsub_shifted_imm ComplexPattern,
278  // which lists [imm] as the list of opcode it's interested in, however
279  // we still need to check whether the operand is actually an immediate
280  // here because the ComplexPattern opcode list is only used in
281  // root-level opcode matching.
282  if (!isa<ConstantSDNode>(N.getNode()))
283  return false;
284 
285  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
286  unsigned ShiftAmt;
287 
288  if (Immed >> 12 == 0) {
289  ShiftAmt = 0;
290  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
291  ShiftAmt = 12;
292  Immed = Immed >> 12;
293  } else
294  return false;
295 
296  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
297  SDLoc dl(N);
298  Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
299  Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
300  return true;
301 }
302 
303 /// SelectNegArithImmed - As above, but negates the value before trying to
304 /// select it.
305 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
306  SDValue &Shift) {
307  // This function is called from the addsub_shifted_imm ComplexPattern,
308  // which lists [imm] as the list of opcode it's interested in, however
309  // we still need to check whether the operand is actually an immediate
310  // here because the ComplexPattern opcode list is only used in
311  // root-level opcode matching.
312  if (!isa<ConstantSDNode>(N.getNode()))
313  return false;
314 
315  // The immediate operand must be a 24-bit zero-extended immediate.
316  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
317 
318  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
319  // have the opposite effect on the C flag, so this pattern mustn't match under
320  // those circumstances.
321  if (Immed == 0)
322  return false;
323 
324  if (N.getValueType() == MVT::i32)
325  Immed = ~((uint32_t)Immed) + 1;
326  else
327  Immed = ~Immed + 1ULL;
328  if (Immed & 0xFFFFFFFFFF000000ULL)
329  return false;
330 
331  Immed &= 0xFFFFFFULL;
332  return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
333  Shift);
334 }
335 
336 /// getShiftTypeForNode - Translate a shift node to the corresponding
337 /// ShiftType value.
339  switch (N.getOpcode()) {
340  default:
342  case ISD::SHL:
343  return AArch64_AM::LSL;
344  case ISD::SRL:
345  return AArch64_AM::LSR;
346  case ISD::SRA:
347  return AArch64_AM::ASR;
348  case ISD::ROTR:
349  return AArch64_AM::ROR;
350  }
351 }
352 
353 /// Determine whether it is worth it to fold SHL into the addressing
354 /// mode.
355 static bool isWorthFoldingSHL(SDValue V) {
356  assert(V.getOpcode() == ISD::SHL && "invalid opcode");
357  // It is worth folding logical shift of up to three places.
358  auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
359  if (!CSD)
360  return false;
361  unsigned ShiftVal = CSD->getZExtValue();
362  if (ShiftVal > 3)
363  return false;
364 
365  // Check if this particular node is reused in any non-memory related
366  // operation. If yes, do not try to fold this node into the address
367  // computation, since the computation will be kept.
368  const SDNode *Node = V.getNode();
369  for (SDNode *UI : Node->uses())
370  if (!isa<MemSDNode>(*UI))
371  for (SDNode *UII : UI->uses())
372  if (!isa<MemSDNode>(*UII))
373  return false;
374  return true;
375 }
376 
377 /// Determine whether it is worth to fold V into an extended register.
378 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
379  // Trivial if we are optimizing for code size or if there is only
380  // one use of the value.
381  if (ForCodeSize || V.hasOneUse())
382  return true;
383  // If a subtarget has a fastpath LSL we can fold a logical shift into
384  // the addressing mode and save a cycle.
385  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
387  return true;
388  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
389  const SDValue LHS = V.getOperand(0);
390  const SDValue RHS = V.getOperand(1);
391  if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
392  return true;
393  if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
394  return true;
395  }
396 
397  // It hurts otherwise, since the value will be reused.
398  return false;
399 }
400 
401 /// SelectShiftedRegister - Select a "shifted register" operand. If the value
402 /// is not shifted, set the Shift operand to default of "LSL 0". The logical
403 /// instructions allow the shifted register to be rotated, but the arithmetic
404 /// instructions do not. The AllowROR parameter specifies whether ROR is
405 /// supported.
406 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
407  SDValue &Reg, SDValue &Shift) {
409  if (ShType == AArch64_AM::InvalidShiftExtend)
410  return false;
411  if (!AllowROR && ShType == AArch64_AM::ROR)
412  return false;
413 
414  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
415  unsigned BitSize = N.getValueSizeInBits();
416  unsigned Val = RHS->getZExtValue() & (BitSize - 1);
417  unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
418 
419  Reg = N.getOperand(0);
420  Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
421  return isWorthFolding(N);
422  }
423 
424  return false;
425 }
426 
427 /// getExtendTypeForNode - Translate an extend node to the corresponding
428 /// ExtendType value.
430 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
431  if (N.getOpcode() == ISD::SIGN_EXTEND ||
433  EVT SrcVT;
435  SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
436  else
437  SrcVT = N.getOperand(0).getValueType();
438 
439  if (!IsLoadStore && SrcVT == MVT::i8)
440  return AArch64_AM::SXTB;
441  else if (!IsLoadStore && SrcVT == MVT::i16)
442  return AArch64_AM::SXTH;
443  else if (SrcVT == MVT::i32)
444  return AArch64_AM::SXTW;
445  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
446 
448  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
449  N.getOpcode() == ISD::ANY_EXTEND) {
450  EVT SrcVT = N.getOperand(0).getValueType();
451  if (!IsLoadStore && SrcVT == MVT::i8)
452  return AArch64_AM::UXTB;
453  else if (!IsLoadStore && SrcVT == MVT::i16)
454  return AArch64_AM::UXTH;
455  else if (SrcVT == MVT::i32)
456  return AArch64_AM::UXTW;
457  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
458 
460  } else if (N.getOpcode() == ISD::AND) {
462  if (!CSD)
464  uint64_t AndMask = CSD->getZExtValue();
465 
466  switch (AndMask) {
467  default:
469  case 0xFF:
470  return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
471  case 0xFFFF:
472  return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
473  case 0xFFFFFFFF:
474  return AArch64_AM::UXTW;
475  }
476  }
477 
479 }
480 
481 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
482 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
483  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
485  return false;
486 
487  SDValue SV = DL->getOperand(0);
488  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
489  return false;
490 
491  SDValue EV = SV.getOperand(1);
492  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
493  return false;
494 
495  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
496  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
497  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
498  LaneOp = EV.getOperand(0);
499 
500  return true;
501 }
502 
503 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
504 // high lane extract.
505 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
506  SDValue &LaneOp, int &LaneIdx) {
507 
508  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
509  std::swap(Op0, Op1);
510  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
511  return false;
512  }
513  StdOp = Op1;
514  return true;
515 }
516 
517 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
518 /// is a lane in the upper half of a 128-bit vector. Recognize and select this
519 /// so that we don't emit unnecessary lane extracts.
520 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
521  SDLoc dl(N);
522  SDValue Op0 = N->getOperand(0);
523  SDValue Op1 = N->getOperand(1);
524  SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
525  SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
526  int LaneIdx = -1; // Will hold the lane index.
527 
528  if (Op1.getOpcode() != ISD::MUL ||
529  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
530  LaneIdx)) {
531  std::swap(Op0, Op1);
532  if (Op1.getOpcode() != ISD::MUL ||
533  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
534  LaneIdx))
535  return false;
536  }
537 
538  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
539 
540  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
541 
542  unsigned MLAOpc = ~0U;
543 
544  switch (N->getSimpleValueType(0).SimpleTy) {
545  default:
546  llvm_unreachable("Unrecognized MLA.");
547  case MVT::v4i16:
548  MLAOpc = AArch64::MLAv4i16_indexed;
549  break;
550  case MVT::v8i16:
551  MLAOpc = AArch64::MLAv8i16_indexed;
552  break;
553  case MVT::v2i32:
554  MLAOpc = AArch64::MLAv2i32_indexed;
555  break;
556  case MVT::v4i32:
557  MLAOpc = AArch64::MLAv4i32_indexed;
558  break;
559  }
560 
561  ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
562  return true;
563 }
564 
565 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
566  SDLoc dl(N);
567  SDValue SMULLOp0;
568  SDValue SMULLOp1;
569  int LaneIdx;
570 
571  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
572  LaneIdx))
573  return false;
574 
575  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
576 
577  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
578 
579  unsigned SMULLOpc = ~0U;
580 
581  if (IntNo == Intrinsic::aarch64_neon_smull) {
582  switch (N->getSimpleValueType(0).SimpleTy) {
583  default:
584  llvm_unreachable("Unrecognized SMULL.");
585  case MVT::v4i32:
586  SMULLOpc = AArch64::SMULLv4i16_indexed;
587  break;
588  case MVT::v2i64:
589  SMULLOpc = AArch64::SMULLv2i32_indexed;
590  break;
591  }
592  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
593  switch (N->getSimpleValueType(0).SimpleTy) {
594  default:
595  llvm_unreachable("Unrecognized SMULL.");
596  case MVT::v4i32:
597  SMULLOpc = AArch64::UMULLv4i16_indexed;
598  break;
599  case MVT::v2i64:
600  SMULLOpc = AArch64::UMULLv2i32_indexed;
601  break;
602  }
603  } else
604  llvm_unreachable("Unrecognized intrinsic.");
605 
606  ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
607  return true;
608 }
609 
610 /// Instructions that accept extend modifiers like UXTW expect the register
611 /// being extended to be a GPR32, but the incoming DAG might be acting on a
612 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
613 /// this is the case.
615  if (N.getValueType() == MVT::i32)
616  return N;
617 
618  SDLoc dl(N);
619  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
620  MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
621  dl, MVT::i32, N, SubReg);
622  return SDValue(Node, 0);
623 }
624 
625 
626 /// SelectArithExtendedRegister - Select a "extended register" operand. This
627 /// operand folds in an extend followed by an optional left shift.
628 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
629  SDValue &Shift) {
630  unsigned ShiftVal = 0;
632 
633  if (N.getOpcode() == ISD::SHL) {
635  if (!CSD)
636  return false;
637  ShiftVal = CSD->getZExtValue();
638  if (ShiftVal > 4)
639  return false;
640 
641  Ext = getExtendTypeForNode(N.getOperand(0));
643  return false;
644 
645  Reg = N.getOperand(0).getOperand(0);
646  } else {
647  Ext = getExtendTypeForNode(N);
649  return false;
650 
651  Reg = N.getOperand(0);
652 
653  // Don't match if free 32-bit -> 64-bit zext can be used instead.
654  if (Ext == AArch64_AM::UXTW &&
655  Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
656  return false;
657  }
658 
659  // AArch64 mandates that the RHS of the operation must use the smallest
660  // register class that could contain the size being extended from. Thus,
661  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
662  // there might not be an actual 32-bit value in the program. We can
663  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
664  assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
665  Reg = narrowIfNeeded(CurDAG, Reg);
666  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
667  MVT::i32);
668  return isWorthFolding(N);
669 }
670 
671 /// If there's a use of this ADDlow that's not itself a load/store then we'll
672 /// need to create a real ADD instruction from it anyway and there's no point in
673 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
674 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
675 /// leads to duplicated ADRP instructions.
677  for (auto Use : N->uses()) {
678  if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
679  Use->getOpcode() != ISD::ATOMIC_LOAD &&
680  Use->getOpcode() != ISD::ATOMIC_STORE)
681  return false;
682 
683  // ldar and stlr have much more restrictive addressing modes (just a
684  // register).
685  if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering()))
686  return false;
687  }
688 
689  return true;
690 }
691 
692 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
693 /// immediate" address. The "Size" argument is the size in bytes of the memory
694 /// reference, which determines the scale.
695 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
696  unsigned BW, unsigned Size,
697  SDValue &Base,
698  SDValue &OffImm) {
699  SDLoc dl(N);
700  const DataLayout &DL = CurDAG->getDataLayout();
701  const TargetLowering *TLI = getTargetLowering();
702  if (N.getOpcode() == ISD::FrameIndex) {
703  int FI = cast<FrameIndexSDNode>(N)->getIndex();
704  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
705  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
706  return true;
707  }
708 
709  // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
710  // selected here doesn't support labels/immediates, only base+offset.
711  if (CurDAG->isBaseWithConstantOffset(N)) {
712  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
713  if (IsSignedImm) {
714  int64_t RHSC = RHS->getSExtValue();
715  unsigned Scale = Log2_32(Size);
716  int64_t Range = 0x1LL << (BW - 1);
717 
718  if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
719  RHSC < (Range << Scale)) {
720  Base = N.getOperand(0);
721  if (Base.getOpcode() == ISD::FrameIndex) {
722  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
723  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
724  }
725  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
726  return true;
727  }
728  } else {
729  // unsigned Immediate
730  uint64_t RHSC = RHS->getZExtValue();
731  unsigned Scale = Log2_32(Size);
732  uint64_t Range = 0x1ULL << BW;
733 
734  if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
735  Base = N.getOperand(0);
736  if (Base.getOpcode() == ISD::FrameIndex) {
737  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
738  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
739  }
740  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
741  return true;
742  }
743  }
744  }
745  }
746  // Base only. The address will be materialized into a register before
747  // the memory is accessed.
748  // add x0, Xbase, #offset
749  // stp x1, x2, [x0]
750  Base = N;
751  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
752  return true;
753 }
754 
755 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
756 /// immediate" address. The "Size" argument is the size in bytes of the memory
757 /// reference, which determines the scale.
758 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
759  SDValue &Base, SDValue &OffImm) {
760  SDLoc dl(N);
761  const DataLayout &DL = CurDAG->getDataLayout();
762  const TargetLowering *TLI = getTargetLowering();
763  if (N.getOpcode() == ISD::FrameIndex) {
764  int FI = cast<FrameIndexSDNode>(N)->getIndex();
765  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
766  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
767  return true;
768  }
769 
771  GlobalAddressSDNode *GAN =
773  Base = N.getOperand(0);
774  OffImm = N.getOperand(1);
775  if (!GAN)
776  return true;
777 
778  if (GAN->getOffset() % Size == 0) {
779  const GlobalValue *GV = GAN->getGlobal();
780  unsigned Alignment = GV->getAlignment();
781  Type *Ty = GV->getValueType();
782  if (Alignment == 0 && Ty->isSized())
783  Alignment = DL.getABITypeAlignment(Ty);
784 
785  if (Alignment >= Size)
786  return true;
787  }
788  }
789 
790  if (CurDAG->isBaseWithConstantOffset(N)) {
791  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
792  int64_t RHSC = (int64_t)RHS->getZExtValue();
793  unsigned Scale = Log2_32(Size);
794  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
795  Base = N.getOperand(0);
796  if (Base.getOpcode() == ISD::FrameIndex) {
797  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
798  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
799  }
800  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
801  return true;
802  }
803  }
804  }
805 
806  // Before falling back to our general case, check if the unscaled
807  // instructions can handle this. If so, that's preferable.
808  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
809  return false;
810 
811  // Base only. The address will be materialized into a register before
812  // the memory is accessed.
813  // add x0, Xbase, #offset
814  // ldr x0, [x0]
815  Base = N;
816  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
817  return true;
818 }
819 
820 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
821 /// immediate" address. This should only match when there is an offset that
822 /// is not valid for a scaled immediate addressing mode. The "Size" argument
823 /// is the size in bytes of the memory reference, which is needed here to know
824 /// what is valid for a scaled immediate.
825 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
826  SDValue &Base,
827  SDValue &OffImm) {
828  if (!CurDAG->isBaseWithConstantOffset(N))
829  return false;
830  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
831  int64_t RHSC = RHS->getSExtValue();
832  // If the offset is valid as a scaled immediate, don't match here.
833  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
834  RHSC < (0x1000 << Log2_32(Size)))
835  return false;
836  if (RHSC >= -256 && RHSC < 256) {
837  Base = N.getOperand(0);
838  if (Base.getOpcode() == ISD::FrameIndex) {
839  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
840  const TargetLowering *TLI = getTargetLowering();
841  Base = CurDAG->getTargetFrameIndex(
842  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
843  }
844  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
845  return true;
846  }
847  }
848  return false;
849 }
850 
851 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
852  SDLoc dl(N);
853  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
854  SDValue ImpDef = SDValue(
855  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
856  MachineSDNode *Node = CurDAG->getMachineNode(
857  TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
858  return SDValue(Node, 0);
859 }
860 
861 /// Check if the given SHL node (\p N), can be used to form an
862 /// extended register for an addressing mode.
863 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
864  bool WantExtend, SDValue &Offset,
865  SDValue &SignExtend) {
866  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
868  if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
869  return false;
870 
871  SDLoc dl(N);
872  if (WantExtend) {
874  getExtendTypeForNode(N.getOperand(0), true);
876  return false;
877 
878  Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
879  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
880  MVT::i32);
881  } else {
882  Offset = N.getOperand(0);
883  SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
884  }
885 
886  unsigned LegalShiftVal = Log2_32(Size);
887  unsigned ShiftVal = CSD->getZExtValue();
888 
889  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
890  return false;
891 
892  return isWorthFolding(N);
893 }
894 
895 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
896  SDValue &Base, SDValue &Offset,
897  SDValue &SignExtend,
898  SDValue &DoShift) {
899  if (N.getOpcode() != ISD::ADD)
900  return false;
901  SDValue LHS = N.getOperand(0);
902  SDValue RHS = N.getOperand(1);
903  SDLoc dl(N);
904 
905  // We don't want to match immediate adds here, because they are better lowered
906  // to the register-immediate addressing modes.
907  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
908  return false;
909 
910  // Check if this particular node is reused in any non-memory related
911  // operation. If yes, do not try to fold this node into the address
912  // computation, since the computation will be kept.
913  const SDNode *Node = N.getNode();
914  for (SDNode *UI : Node->uses()) {
915  if (!isa<MemSDNode>(*UI))
916  return false;
917  }
918 
919  // Remember if it is worth folding N when it produces extended register.
920  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
921 
922  // Try to match a shifted extend on the RHS.
923  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
924  SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
925  Base = LHS;
926  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
927  return true;
928  }
929 
930  // Try to match a shifted extend on the LHS.
931  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
932  SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
933  Base = RHS;
934  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
935  return true;
936  }
937 
938  // There was no shift, whatever else we find.
939  DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
940 
942  // Try to match an unshifted extend on the LHS.
943  if (IsExtendedRegisterWorthFolding &&
944  (Ext = getExtendTypeForNode(LHS, true)) !=
946  Base = RHS;
947  Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
948  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
949  MVT::i32);
950  if (isWorthFolding(LHS))
951  return true;
952  }
953 
954  // Try to match an unshifted extend on the RHS.
955  if (IsExtendedRegisterWorthFolding &&
956  (Ext = getExtendTypeForNode(RHS, true)) !=
958  Base = LHS;
959  Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
960  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
961  MVT::i32);
962  if (isWorthFolding(RHS))
963  return true;
964  }
965 
966  return false;
967 }
968 
969 // Check if the given immediate is preferred by ADD. If an immediate can be
970 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
971 // encoded by one MOVZ, return true.
972 static bool isPreferredADD(int64_t ImmOff) {
973  // Constant in [0x0, 0xfff] can be encoded in ADD.
974  if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
975  return true;
976  // Check if it can be encoded in an "ADD LSL #12".
977  if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
978  // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
979  return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
980  (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
981  return false;
982 }
983 
984 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
985  SDValue &Base, SDValue &Offset,
986  SDValue &SignExtend,
987  SDValue &DoShift) {
988  if (N.getOpcode() != ISD::ADD)
989  return false;
990  SDValue LHS = N.getOperand(0);
991  SDValue RHS = N.getOperand(1);
992  SDLoc DL(N);
993 
994  // Check if this particular node is reused in any non-memory related
995  // operation. If yes, do not try to fold this node into the address
996  // computation, since the computation will be kept.
997  const SDNode *Node = N.getNode();
998  for (SDNode *UI : Node->uses()) {
999  if (!isa<MemSDNode>(*UI))
1000  return false;
1001  }
1002 
1003  // Watch out if RHS is a wide immediate, it can not be selected into
1004  // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1005  // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1006  // instructions like:
1007  // MOV X0, WideImmediate
1008  // ADD X1, BaseReg, X0
1009  // LDR X2, [X1, 0]
1010  // For such situation, using [BaseReg, XReg] addressing mode can save one
1011  // ADD/SUB:
1012  // MOV X0, WideImmediate
1013  // LDR X2, [BaseReg, X0]
1014  if (isa<ConstantSDNode>(RHS)) {
1015  int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
1016  unsigned Scale = Log2_32(Size);
1017  // Skip the immediate can be selected by load/store addressing mode.
1018  // Also skip the immediate can be encoded by a single ADD (SUB is also
1019  // checked by using -ImmOff).
1020  if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
1021  isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1022  return false;
1023 
1024  SDValue Ops[] = { RHS };
1025  SDNode *MOVI =
1026  CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1027  SDValue MOVIV = SDValue(MOVI, 0);
1028  // This ADD of two X register will be selected into [Reg+Reg] mode.
1029  N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1030  }
1031 
1032  // Remember if it is worth folding N when it produces extended register.
1033  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1034 
1035  // Try to match a shifted extend on the RHS.
1036  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1037  SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1038  Base = LHS;
1039  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1040  return true;
1041  }
1042 
1043  // Try to match a shifted extend on the LHS.
1044  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1045  SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1046  Base = RHS;
1047  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1048  return true;
1049  }
1050 
1051  // Match any non-shifted, non-extend, non-immediate add expression.
1052  Base = LHS;
1053  Offset = RHS;
1054  SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1055  DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1056  // Reg1 + Reg2 is free: no check needed.
1057  return true;
1058 }
1059 
1060 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1061  static const unsigned RegClassIDs[] = {
1062  AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1063  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1064  AArch64::dsub2, AArch64::dsub3};
1065 
1066  return createTuple(Regs, RegClassIDs, SubRegs);
1067 }
1068 
1069 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1070  static const unsigned RegClassIDs[] = {
1071  AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1072  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1073  AArch64::qsub2, AArch64::qsub3};
1074 
1075  return createTuple(Regs, RegClassIDs, SubRegs);
1076 }
1077 
1078 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1079  const unsigned RegClassIDs[],
1080  const unsigned SubRegs[]) {
1081  // There's no special register-class for a vector-list of 1 element: it's just
1082  // a vector.
1083  if (Regs.size() == 1)
1084  return Regs[0];
1085 
1086  assert(Regs.size() >= 2 && Regs.size() <= 4);
1087 
1088  SDLoc DL(Regs[0]);
1089 
1091 
1092  // First operand of REG_SEQUENCE is the desired RegClass.
1093  Ops.push_back(
1094  CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1095 
1096  // Then we get pairs of source & subregister-position for the components.
1097  for (unsigned i = 0; i < Regs.size(); ++i) {
1098  Ops.push_back(Regs[i]);
1099  Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1100  }
1101 
1102  SDNode *N =
1103  CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1104  return SDValue(N, 0);
1105 }
1106 
1107 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1108  bool isExt) {
1109  SDLoc dl(N);
1110  EVT VT = N->getValueType(0);
1111 
1112  unsigned ExtOff = isExt;
1113 
1114  // Form a REG_SEQUENCE to force register allocation.
1115  unsigned Vec0Off = ExtOff + 1;
1116  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1117  N->op_begin() + Vec0Off + NumVecs);
1118  SDValue RegSeq = createQTuple(Regs);
1119 
1121  if (isExt)
1122  Ops.push_back(N->getOperand(1));
1123  Ops.push_back(RegSeq);
1124  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1125  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1126 }
1127 
1128 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1129  LoadSDNode *LD = cast<LoadSDNode>(N);
1130  if (LD->isUnindexed())
1131  return false;
1132  EVT VT = LD->getMemoryVT();
1133  EVT DstVT = N->getValueType(0);
1135  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1136 
1137  // We're not doing validity checking here. That was done when checking
1138  // if we should mark the load as indexed or not. We're just selecting
1139  // the right instruction.
1140  unsigned Opcode = 0;
1141 
1143  bool InsertTo64 = false;
1144  if (VT == MVT::i64)
1145  Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1146  else if (VT == MVT::i32) {
1147  if (ExtType == ISD::NON_EXTLOAD)
1148  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1149  else if (ExtType == ISD::SEXTLOAD)
1150  Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1151  else {
1152  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1153  InsertTo64 = true;
1154  // The result of the load is only i32. It's the subreg_to_reg that makes
1155  // it into an i64.
1156  DstVT = MVT::i32;
1157  }
1158  } else if (VT == MVT::i16) {
1159  if (ExtType == ISD::SEXTLOAD) {
1160  if (DstVT == MVT::i64)
1161  Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1162  else
1163  Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1164  } else {
1165  Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1166  InsertTo64 = DstVT == MVT::i64;
1167  // The result of the load is only i32. It's the subreg_to_reg that makes
1168  // it into an i64.
1169  DstVT = MVT::i32;
1170  }
1171  } else if (VT == MVT::i8) {
1172  if (ExtType == ISD::SEXTLOAD) {
1173  if (DstVT == MVT::i64)
1174  Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1175  else
1176  Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1177  } else {
1178  Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1179  InsertTo64 = DstVT == MVT::i64;
1180  // The result of the load is only i32. It's the subreg_to_reg that makes
1181  // it into an i64.
1182  DstVT = MVT::i32;
1183  }
1184  } else if (VT == MVT::f16) {
1185  Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1186  } else if (VT == MVT::f32) {
1187  Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1188  } else if (VT == MVT::f64 || VT.is64BitVector()) {
1189  Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1190  } else if (VT.is128BitVector()) {
1191  Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1192  } else
1193  return false;
1194  SDValue Chain = LD->getChain();
1195  SDValue Base = LD->getBasePtr();
1196  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1197  int OffsetVal = (int)OffsetOp->getZExtValue();
1198  SDLoc dl(N);
1199  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1200  SDValue Ops[] = { Base, Offset, Chain };
1201  SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1202  MVT::Other, Ops);
1203  // Either way, we're replacing the node, so tell the caller that.
1204  SDValue LoadedVal = SDValue(Res, 1);
1205  if (InsertTo64) {
1206  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1207  LoadedVal =
1208  SDValue(CurDAG->getMachineNode(
1209  AArch64::SUBREG_TO_REG, dl, MVT::i64,
1210  CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1211  SubReg),
1212  0);
1213  }
1214 
1215  ReplaceUses(SDValue(N, 0), LoadedVal);
1216  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1217  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1218  CurDAG->RemoveDeadNode(N);
1219  return true;
1220 }
1221 
1222 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1223  unsigned SubRegIdx) {
1224  SDLoc dl(N);
1225  EVT VT = N->getValueType(0);
1226  SDValue Chain = N->getOperand(0);
1227 
1228  SDValue Ops[] = {N->getOperand(2), // Mem operand;
1229  Chain};
1230 
1231  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1232 
1233  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1234  SDValue SuperReg = SDValue(Ld, 0);
1235  for (unsigned i = 0; i < NumVecs; ++i)
1236  ReplaceUses(SDValue(N, i),
1237  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1238 
1239  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1240 
1241  // Transfer memoperands.
1242  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1243  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1244 
1245  CurDAG->RemoveDeadNode(N);
1246 }
1247 
1248 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1249  unsigned Opc, unsigned SubRegIdx) {
1250  SDLoc dl(N);
1251  EVT VT = N->getValueType(0);
1252  SDValue Chain = N->getOperand(0);
1253 
1254  SDValue Ops[] = {N->getOperand(1), // Mem operand
1255  N->getOperand(2), // Incremental
1256  Chain};
1257 
1258  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1260 
1261  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1262 
1263  // Update uses of write back register
1264  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1265 
1266  // Update uses of vector list
1267  SDValue SuperReg = SDValue(Ld, 1);
1268  if (NumVecs == 1)
1269  ReplaceUses(SDValue(N, 0), SuperReg);
1270  else
1271  for (unsigned i = 0; i < NumVecs; ++i)
1272  ReplaceUses(SDValue(N, i),
1273  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1274 
1275  // Update the chain
1276  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1277  CurDAG->RemoveDeadNode(N);
1278 }
1279 
1280 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1281  unsigned Opc) {
1282  SDLoc dl(N);
1283  EVT VT = N->getOperand(2)->getValueType(0);
1284 
1285  // Form a REG_SEQUENCE to force register allocation.
1286  bool Is128Bit = VT.getSizeInBits() == 128;
1287  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1288  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1289 
1290  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1291  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1292 
1293  // Transfer memoperands.
1294  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1295  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1296 
1297  ReplaceNode(N, St);
1298 }
1299 
1300 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1301  unsigned Opc) {
1302  SDLoc dl(N);
1303  EVT VT = N->getOperand(2)->getValueType(0);
1304  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1305  MVT::Other}; // Type for the Chain
1306 
1307  // Form a REG_SEQUENCE to force register allocation.
1308  bool Is128Bit = VT.getSizeInBits() == 128;
1309  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1310  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1311 
1312  SDValue Ops[] = {RegSeq,
1313  N->getOperand(NumVecs + 1), // base register
1314  N->getOperand(NumVecs + 2), // Incremental
1315  N->getOperand(0)}; // Chain
1316  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1317 
1318  ReplaceNode(N, St);
1319 }
1320 
1321 namespace {
1322 /// WidenVector - Given a value in the V64 register class, produce the
1323 /// equivalent value in the V128 register class.
1324 class WidenVector {
1325  SelectionDAG &DAG;
1326 
1327 public:
1328  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1329 
1330  SDValue operator()(SDValue V64Reg) {
1331  EVT VT = V64Reg.getValueType();
1332  unsigned NarrowSize = VT.getVectorNumElements();
1333  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1334  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1335  SDLoc DL(V64Reg);
1336 
1337  SDValue Undef =
1338  SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1339  return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1340  }
1341 };
1342 } // namespace
1343 
1344 /// NarrowVector - Given a value in the V128 register class, produce the
1345 /// equivalent value in the V64 register class.
1346 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1347  EVT VT = V128Reg.getValueType();
1348  unsigned WideSize = VT.getVectorNumElements();
1349  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1350  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1351 
1352  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1353  V128Reg);
1354 }
1355 
1356 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1357  unsigned Opc) {
1358  SDLoc dl(N);
1359  EVT VT = N->getValueType(0);
1360  bool Narrow = VT.getSizeInBits() == 64;
1361 
1362  // Form a REG_SEQUENCE to force register allocation.
1363  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1364 
1365  if (Narrow)
1366  transform(Regs, Regs.begin(),
1367  WidenVector(*CurDAG));
1368 
1369  SDValue RegSeq = createQTuple(Regs);
1370 
1371  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1372 
1373  unsigned LaneNo =
1374  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1375 
1376  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1377  N->getOperand(NumVecs + 3), N->getOperand(0)};
1378  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1379  SDValue SuperReg = SDValue(Ld, 0);
1380 
1381  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1382  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1383  AArch64::qsub2, AArch64::qsub3 };
1384  for (unsigned i = 0; i < NumVecs; ++i) {
1385  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1386  if (Narrow)
1387  NV = NarrowVector(NV, *CurDAG);
1388  ReplaceUses(SDValue(N, i), NV);
1389  }
1390 
1391  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1392  CurDAG->RemoveDeadNode(N);
1393 }
1394 
1395 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1396  unsigned Opc) {
1397  SDLoc dl(N);
1398  EVT VT = N->getValueType(0);
1399  bool Narrow = VT.getSizeInBits() == 64;
1400 
1401  // Form a REG_SEQUENCE to force register allocation.
1402  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1403 
1404  if (Narrow)
1405  transform(Regs, Regs.begin(),
1406  WidenVector(*CurDAG));
1407 
1408  SDValue RegSeq = createQTuple(Regs);
1409 
1410  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1411  RegSeq->getValueType(0), MVT::Other};
1412 
1413  unsigned LaneNo =
1414  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1415 
1416  SDValue Ops[] = {RegSeq,
1417  CurDAG->getTargetConstant(LaneNo, dl,
1418  MVT::i64), // Lane Number
1419  N->getOperand(NumVecs + 2), // Base register
1420  N->getOperand(NumVecs + 3), // Incremental
1421  N->getOperand(0)};
1422  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1423 
1424  // Update uses of the write back register
1425  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1426 
1427  // Update uses of the vector list
1428  SDValue SuperReg = SDValue(Ld, 1);
1429  if (NumVecs == 1) {
1430  ReplaceUses(SDValue(N, 0),
1431  Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1432  } else {
1433  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1434  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1435  AArch64::qsub2, AArch64::qsub3 };
1436  for (unsigned i = 0; i < NumVecs; ++i) {
1437  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1438  SuperReg);
1439  if (Narrow)
1440  NV = NarrowVector(NV, *CurDAG);
1441  ReplaceUses(SDValue(N, i), NV);
1442  }
1443  }
1444 
1445  // Update the Chain
1446  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1447  CurDAG->RemoveDeadNode(N);
1448 }
1449 
1450 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1451  unsigned Opc) {
1452  SDLoc dl(N);
1453  EVT VT = N->getOperand(2)->getValueType(0);
1454  bool Narrow = VT.getSizeInBits() == 64;
1455 
1456  // Form a REG_SEQUENCE to force register allocation.
1457  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1458 
1459  if (Narrow)
1460  transform(Regs, Regs.begin(),
1461  WidenVector(*CurDAG));
1462 
1463  SDValue RegSeq = createQTuple(Regs);
1464 
1465  unsigned LaneNo =
1466  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1467 
1468  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1469  N->getOperand(NumVecs + 3), N->getOperand(0)};
1470  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1471 
1472  // Transfer memoperands.
1473  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1474  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1475 
1476  ReplaceNode(N, St);
1477 }
1478 
1479 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1480  unsigned Opc) {
1481  SDLoc dl(N);
1482  EVT VT = N->getOperand(2)->getValueType(0);
1483  bool Narrow = VT.getSizeInBits() == 64;
1484 
1485  // Form a REG_SEQUENCE to force register allocation.
1486  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1487 
1488  if (Narrow)
1489  transform(Regs, Regs.begin(),
1490  WidenVector(*CurDAG));
1491 
1492  SDValue RegSeq = createQTuple(Regs);
1493 
1494  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1495  MVT::Other};
1496 
1497  unsigned LaneNo =
1498  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1499 
1500  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1501  N->getOperand(NumVecs + 2), // Base Register
1502  N->getOperand(NumVecs + 3), // Incremental
1503  N->getOperand(0)};
1504  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1505 
1506  // Transfer memoperands.
1507  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1508  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1509 
1510  ReplaceNode(N, St);
1511 }
1512 
1514  unsigned &Opc, SDValue &Opd0,
1515  unsigned &LSB, unsigned &MSB,
1516  unsigned NumberOfIgnoredLowBits,
1517  bool BiggerPattern) {
1518  assert(N->getOpcode() == ISD::AND &&
1519  "N must be a AND operation to call this function");
1520 
1521  EVT VT = N->getValueType(0);
1522 
1523  // Here we can test the type of VT and return false when the type does not
1524  // match, but since it is done prior to that call in the current context
1525  // we turned that into an assert to avoid redundant code.
1526  assert((VT == MVT::i32 || VT == MVT::i64) &&
1527  "Type checking must have been done before calling this function");
1528 
1529  // FIXME: simplify-demanded-bits in DAGCombine will probably have
1530  // changed the AND node to a 32-bit mask operation. We'll have to
1531  // undo that as part of the transform here if we want to catch all
1532  // the opportunities.
1533  // Currently the NumberOfIgnoredLowBits argument helps to recover
1534  // form these situations when matching bigger pattern (bitfield insert).
1535 
1536  // For unsigned extracts, check for a shift right and mask
1537  uint64_t AndImm = 0;
1538  if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
1539  return false;
1540 
1541  const SDNode *Op0 = N->getOperand(0).getNode();
1542 
1543  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1544  // simplified. Try to undo that
1545  AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
1546 
1547  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1548  if (AndImm & (AndImm + 1))
1549  return false;
1550 
1551  bool ClampMSB = false;
1552  uint64_t SrlImm = 0;
1553  // Handle the SRL + ANY_EXTEND case.
1554  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1555  isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
1556  // Extend the incoming operand of the SRL to 64-bit.
1557  Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1558  // Make sure to clamp the MSB so that we preserve the semantics of the
1559  // original operations.
1560  ClampMSB = true;
1561  } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1563  SrlImm)) {
1564  // If the shift result was truncated, we can still combine them.
1565  Opd0 = Op0->getOperand(0).getOperand(0);
1566 
1567  // Use the type of SRL node.
1568  VT = Opd0->getValueType(0);
1569  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
1570  Opd0 = Op0->getOperand(0);
1571  } else if (BiggerPattern) {
1572  // Let's pretend a 0 shift right has been performed.
1573  // The resulting code will be at least as good as the original one
1574  // plus it may expose more opportunities for bitfield insert pattern.
1575  // FIXME: Currently we limit this to the bigger pattern, because
1576  // some optimizations expect AND and not UBFM.
1577  Opd0 = N->getOperand(0);
1578  } else
1579  return false;
1580 
1581  // Bail out on large immediates. This happens when no proper
1582  // combining/constant folding was performed.
1583  if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
1584  LLVM_DEBUG(
1585  (dbgs() << N
1586  << ": Found large shift immediate, this should not happen\n"));
1587  return false;
1588  }
1589 
1590  LSB = SrlImm;
1591  MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
1592  : countTrailingOnes<uint64_t>(AndImm)) -
1593  1;
1594  if (ClampMSB)
1595  // Since we're moving the extend before the right shift operation, we need
1596  // to clamp the MSB to make sure we don't shift in undefined bits instead of
1597  // the zeros which would get shifted in with the original right shift
1598  // operation.
1599  MSB = MSB > 31 ? 31 : MSB;
1600 
1601  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1602  return true;
1603 }
1604 
1605 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
1606  SDValue &Opd0, unsigned &Immr,
1607  unsigned &Imms) {
1609 
1610  EVT VT = N->getValueType(0);
1611  unsigned BitWidth = VT.getSizeInBits();
1612  assert((VT == MVT::i32 || VT == MVT::i64) &&
1613  "Type checking must have been done before calling this function");
1614 
1615  SDValue Op = N->getOperand(0);
1616  if (Op->getOpcode() == ISD::TRUNCATE) {
1617  Op = Op->getOperand(0);
1618  VT = Op->getValueType(0);
1619  BitWidth = VT.getSizeInBits();
1620  }
1621 
1622  uint64_t ShiftImm;
1623  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
1624  !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1625  return false;
1626 
1627  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1628  if (ShiftImm + Width > BitWidth)
1629  return false;
1630 
1631  Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
1632  Opd0 = Op.getOperand(0);
1633  Immr = ShiftImm;
1634  Imms = ShiftImm + Width - 1;
1635  return true;
1636 }
1637 
1638 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
1639  SDValue &Opd0, unsigned &LSB,
1640  unsigned &MSB) {
1641  // We are looking for the following pattern which basically extracts several
1642  // continuous bits from the source value and places it from the LSB of the
1643  // destination value, all other bits of the destination value or set to zero:
1644  //
1645  // Value2 = AND Value, MaskImm
1646  // SRL Value2, ShiftImm
1647  //
1648  // with MaskImm >> ShiftImm to search for the bit width.
1649  //
1650  // This gets selected into a single UBFM:
1651  //
1652  // UBFM Value, ShiftImm, BitWide + SrlImm -1
1653  //
1654 
1655  if (N->getOpcode() != ISD::SRL)
1656  return false;
1657 
1658  uint64_t AndMask = 0;
1659  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
1660  return false;
1661 
1662  Opd0 = N->getOperand(0).getOperand(0);
1663 
1664  uint64_t SrlImm = 0;
1665  if (!isIntImmediate(N->getOperand(1), SrlImm))
1666  return false;
1667 
1668  // Check whether we really have several bits extract here.
1669  unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
1670  if (BitWide && isMask_64(AndMask >> SrlImm)) {
1671  if (N->getValueType(0) == MVT::i32)
1672  Opc = AArch64::UBFMWri;
1673  else
1674  Opc = AArch64::UBFMXri;
1675 
1676  LSB = SrlImm;
1677  MSB = BitWide + SrlImm - 1;
1678  return true;
1679  }
1680 
1681  return false;
1682 }
1683 
1684 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1685  unsigned &Immr, unsigned &Imms,
1686  bool BiggerPattern) {
1687  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1688  "N must be a SHR/SRA operation to call this function");
1689 
1690  EVT VT = N->getValueType(0);
1691 
1692  // Here we can test the type of VT and return false when the type does not
1693  // match, but since it is done prior to that call in the current context
1694  // we turned that into an assert to avoid redundant code.
1695  assert((VT == MVT::i32 || VT == MVT::i64) &&
1696  "Type checking must have been done before calling this function");
1697 
1698  // Check for AND + SRL doing several bits extract.
1699  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
1700  return true;
1701 
1702  // We're looking for a shift of a shift.
1703  uint64_t ShlImm = 0;
1704  uint64_t TruncBits = 0;
1705  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
1706  Opd0 = N->getOperand(0).getOperand(0);
1707  } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1708  N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1709  // We are looking for a shift of truncate. Truncate from i64 to i32 could
1710  // be considered as setting high 32 bits as zero. Our strategy here is to
1711  // always generate 64bit UBFM. This consistency will help the CSE pass
1712  // later find more redundancy.
1713  Opd0 = N->getOperand(0).getOperand(0);
1714  TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1715  VT = Opd0.getValueType();
1716  assert(VT == MVT::i64 && "the promoted type should be i64");
1717  } else if (BiggerPattern) {
1718  // Let's pretend a 0 shift left has been performed.
1719  // FIXME: Currently we limit this to the bigger pattern case,
1720  // because some optimizations expect AND and not UBFM
1721  Opd0 = N->getOperand(0);
1722  } else
1723  return false;
1724 
1725  // Missing combines/constant folding may have left us with strange
1726  // constants.
1727  if (ShlImm >= VT.getSizeInBits()) {
1728  LLVM_DEBUG(
1729  (dbgs() << N
1730  << ": Found large shift immediate, this should not happen\n"));
1731  return false;
1732  }
1733 
1734  uint64_t SrlImm = 0;
1735  if (!isIntImmediate(N->getOperand(1), SrlImm))
1736  return false;
1737 
1738  assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
1739  "bad amount in shift node!");
1740  int immr = SrlImm - ShlImm;
1741  Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
1742  Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
1743  // SRA requires a signed extraction
1744  if (VT == MVT::i32)
1745  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1746  else
1747  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1748  return true;
1749 }
1750 
1751 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
1753 
1754  EVT VT = N->getValueType(0);
1755  EVT NarrowVT = N->getOperand(0)->getValueType(0);
1756  if (VT != MVT::i64 || NarrowVT != MVT::i32)
1757  return false;
1758 
1759  uint64_t ShiftImm;
1760  SDValue Op = N->getOperand(0);
1761  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1762  return false;
1763 
1764  SDLoc dl(N);
1765  // Extend the incoming operand of the shift to 64-bits.
1766  SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
1767  unsigned Immr = ShiftImm;
1768  unsigned Imms = NarrowVT.getSizeInBits() - 1;
1769  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1770  CurDAG->getTargetConstant(Imms, dl, VT)};
1771  CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
1772  return true;
1773 }
1774 
1775 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
1776  SDValue &Opd0, unsigned &Immr, unsigned &Imms,
1777  unsigned NumberOfIgnoredLowBits = 0,
1778  bool BiggerPattern = false) {
1779  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
1780  return false;
1781 
1782  switch (N->getOpcode()) {
1783  default:
1784  if (!N->isMachineOpcode())
1785  return false;
1786  break;
1787  case ISD::AND:
1788  return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
1789  NumberOfIgnoredLowBits, BiggerPattern);
1790  case ISD::SRL:
1791  case ISD::SRA:
1792  return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
1793 
1795  return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
1796  }
1797 
1798  unsigned NOpc = N->getMachineOpcode();
1799  switch (NOpc) {
1800  default:
1801  return false;
1802  case AArch64::SBFMWri:
1803  case AArch64::UBFMWri:
1804  case AArch64::SBFMXri:
1805  case AArch64::UBFMXri:
1806  Opc = NOpc;
1807  Opd0 = N->getOperand(0);
1808  Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
1809  Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
1810  return true;
1811  }
1812  // Unreachable
1813  return false;
1814 }
1815 
1816 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
1817  unsigned Opc, Immr, Imms;
1818  SDValue Opd0;
1819  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
1820  return false;
1821 
1822  EVT VT = N->getValueType(0);
1823  SDLoc dl(N);
1824 
1825  // If the bit extract operation is 64bit but the original type is 32bit, we
1826  // need to add one EXTRACT_SUBREG.
1827  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
1828  SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
1829  CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
1830 
1831  SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
1832  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1833  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
1834  MVT::i32, SDValue(BFM, 0), SubReg));
1835  return true;
1836  }
1837 
1838  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1839  CurDAG->getTargetConstant(Imms, dl, VT)};
1840  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1841  return true;
1842 }
1843 
1844 /// Does DstMask form a complementary pair with the mask provided by
1845 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
1846 /// this asks whether DstMask zeroes precisely those bits that will be set by
1847 /// the other half.
1848 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
1849  unsigned NumberOfIgnoredHighBits, EVT VT) {
1850  assert((VT == MVT::i32 || VT == MVT::i64) &&
1851  "i32 or i64 mask type expected!");
1852  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
1853 
1854  APInt SignificantDstMask = APInt(BitWidth, DstMask);
1855  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
1856 
1857  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
1858  (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
1859 }
1860 
1861 // Look for bits that will be useful for later uses.
1862 // A bit is consider useless as soon as it is dropped and never used
1863 // before it as been dropped.
1864 // E.g., looking for useful bit of x
1865 // 1. y = x & 0x7
1866 // 2. z = y >> 2
1867 // After #1, x useful bits are 0x7, then the useful bits of x, live through
1868 // y.
1869 // After #2, the useful bits of x are 0x4.
1870 // However, if x is used on an unpredicatable instruction, then all its bits
1871 // are useful.
1872 // E.g.
1873 // 1. y = x & 0x7
1874 // 2. z = y >> 2
1875 // 3. str x, [@x]
1876 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
1877 
1879  unsigned Depth) {
1880  uint64_t Imm =
1881  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1882  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
1883  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
1884  getUsefulBits(Op, UsefulBits, Depth + 1);
1885 }
1886 
1887 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
1888  uint64_t Imm, uint64_t MSB,
1889  unsigned Depth) {
1890  // inherit the bitwidth value
1891  APInt OpUsefulBits(UsefulBits);
1892  OpUsefulBits = 1;
1893 
1894  if (MSB >= Imm) {
1895  OpUsefulBits <<= MSB - Imm + 1;
1896  --OpUsefulBits;
1897  // The interesting part will be in the lower part of the result
1898  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1899  // The interesting part was starting at Imm in the argument
1900  OpUsefulBits <<= Imm;
1901  } else {
1902  OpUsefulBits <<= MSB + 1;
1903  --OpUsefulBits;
1904  // The interesting part will be shifted in the result
1905  OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
1906  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1907  // The interesting part was at zero in the argument
1908  OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
1909  }
1910 
1911  UsefulBits &= OpUsefulBits;
1912 }
1913 
1914 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
1915  unsigned Depth) {
1916  uint64_t Imm =
1917  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1918  uint64_t MSB =
1919  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1920 
1921  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1922 }
1923 
1925  unsigned Depth) {
1926  uint64_t ShiftTypeAndValue =
1927  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1928  APInt Mask(UsefulBits);
1929  Mask.clearAllBits();
1930  Mask.flipAllBits();
1931 
1932  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
1933  // Shift Left
1934  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1935  Mask <<= ShiftAmt;
1936  getUsefulBits(Op, Mask, Depth + 1);
1937  Mask.lshrInPlace(ShiftAmt);
1938  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
1939  // Shift Right
1940  // We do not handle AArch64_AM::ASR, because the sign will change the
1941  // number of useful bits
1942  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1943  Mask.lshrInPlace(ShiftAmt);
1944  getUsefulBits(Op, Mask, Depth + 1);
1945  Mask <<= ShiftAmt;
1946  } else
1947  return;
1948 
1949  UsefulBits &= Mask;
1950 }
1951 
1952 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
1953  unsigned Depth) {
1954  uint64_t Imm =
1955  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1956  uint64_t MSB =
1957  cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
1958 
1959  APInt OpUsefulBits(UsefulBits);
1960  OpUsefulBits = 1;
1961 
1962  APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
1963  ResultUsefulBits.flipAllBits();
1964  APInt Mask(UsefulBits.getBitWidth(), 0);
1965 
1966  getUsefulBits(Op, ResultUsefulBits, Depth + 1);
1967 
1968  if (MSB >= Imm) {
1969  // The instruction is a BFXIL.
1970  uint64_t Width = MSB - Imm + 1;
1971  uint64_t LSB = Imm;
1972 
1973  OpUsefulBits <<= Width;
1974  --OpUsefulBits;
1975 
1976  if (Op.getOperand(1) == Orig) {
1977  // Copy the low bits from the result to bits starting from LSB.
1978  Mask = ResultUsefulBits & OpUsefulBits;
1979  Mask <<= LSB;
1980  }
1981 
1982  if (Op.getOperand(0) == Orig)
1983  // Bits starting from LSB in the input contribute to the result.
1984  Mask |= (ResultUsefulBits & ~OpUsefulBits);
1985  } else {
1986  // The instruction is a BFI.
1987  uint64_t Width = MSB + 1;
1988  uint64_t LSB = UsefulBits.getBitWidth() - Imm;
1989 
1990  OpUsefulBits <<= Width;
1991  --OpUsefulBits;
1992  OpUsefulBits <<= LSB;
1993 
1994  if (Op.getOperand(1) == Orig) {
1995  // Copy the bits from the result to the zero bits.
1996  Mask = ResultUsefulBits & OpUsefulBits;
1997  Mask.lshrInPlace(LSB);
1998  }
1999 
2000  if (Op.getOperand(0) == Orig)
2001  Mask |= (ResultUsefulBits & ~OpUsefulBits);
2002  }
2003 
2004  UsefulBits &= Mask;
2005 }
2006 
2007 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2008  SDValue Orig, unsigned Depth) {
2009 
2010  // Users of this node should have already been instruction selected
2011  // FIXME: Can we turn that into an assert?
2012  if (!UserNode->isMachineOpcode())
2013  return;
2014 
2015  switch (UserNode->getMachineOpcode()) {
2016  default:
2017  return;
2018  case AArch64::ANDSWri:
2019  case AArch64::ANDSXri:
2020  case AArch64::ANDWri:
2021  case AArch64::ANDXri:
2022  // We increment Depth only when we call the getUsefulBits
2023  return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2024  Depth);
2025  case AArch64::UBFMWri:
2026  case AArch64::UBFMXri:
2027  return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2028 
2029  case AArch64::ORRWrs:
2030  case AArch64::ORRXrs:
2031  if (UserNode->getOperand(1) != Orig)
2032  return;
2033  return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2034  Depth);
2035  case AArch64::BFMWri:
2036  case AArch64::BFMXri:
2037  return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2038 
2039  case AArch64::STRBBui:
2040  case AArch64::STURBBi:
2041  if (UserNode->getOperand(0) != Orig)
2042  return;
2043  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2044  return;
2045 
2046  case AArch64::STRHHui:
2047  case AArch64::STURHHi:
2048  if (UserNode->getOperand(0) != Orig)
2049  return;
2050  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2051  return;
2052  }
2053 }
2054 
2055 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2056  if (Depth >= SelectionDAG::MaxRecursionDepth)
2057  return;
2058  // Initialize UsefulBits
2059  if (!Depth) {
2060  unsigned Bitwidth = Op.getScalarValueSizeInBits();
2061  // At the beginning, assume every produced bits is useful
2062  UsefulBits = APInt(Bitwidth, 0);
2063  UsefulBits.flipAllBits();
2064  }
2065  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2066 
2067  for (SDNode *Node : Op.getNode()->uses()) {
2068  // A use cannot produce useful bits
2069  APInt UsefulBitsForUse = APInt(UsefulBits);
2070  getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2071  UsersUsefulBits |= UsefulBitsForUse;
2072  }
2073  // UsefulBits contains the produced bits that are meaningful for the
2074  // current definition, thus a user cannot make a bit meaningful at
2075  // this point
2076  UsefulBits &= UsersUsefulBits;
2077 }
2078 
2079 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
2080 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2081 /// 0, return Op unchanged.
2082 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2083  if (ShlAmount == 0)
2084  return Op;
2085 
2086  EVT VT = Op.getValueType();
2087  SDLoc dl(Op);
2088  unsigned BitWidth = VT.getSizeInBits();
2089  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2090 
2091  SDNode *ShiftNode;
2092  if (ShlAmount > 0) {
2093  // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2094  ShiftNode = CurDAG->getMachineNode(
2095  UBFMOpc, dl, VT, Op,
2096  CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2097  CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2098  } else {
2099  // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2100  assert(ShlAmount < 0 && "expected right shift");
2101  int ShrAmount = -ShlAmount;
2102  ShiftNode = CurDAG->getMachineNode(
2103  UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2104  CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2105  }
2106 
2107  return SDValue(ShiftNode, 0);
2108 }
2109 
2110 /// Does this tree qualify as an attempt to move a bitfield into position,
2111 /// essentially "(and (shl VAL, N), Mask)".
2113  bool BiggerPattern,
2114  SDValue &Src, int &ShiftAmount,
2115  int &MaskWidth) {
2116  EVT VT = Op.getValueType();
2117  unsigned BitWidth = VT.getSizeInBits();
2118  (void)BitWidth;
2119  assert(BitWidth == 32 || BitWidth == 64);
2120 
2121  KnownBits Known = CurDAG->computeKnownBits(Op);
2122 
2123  // Non-zero in the sense that they're not provably zero, which is the key
2124  // point if we want to use this value
2125  uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2126 
2127  // Discard a constant AND mask if present. It's safe because the node will
2128  // already have been factored into the computeKnownBits calculation above.
2129  uint64_t AndImm;
2130  if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
2131  assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
2132  Op = Op.getOperand(0);
2133  }
2134 
2135  // Don't match if the SHL has more than one use, since then we'll end up
2136  // generating SHL+UBFIZ instead of just keeping SHL+AND.
2137  if (!BiggerPattern && !Op.hasOneUse())
2138  return false;
2139 
2140  uint64_t ShlImm;
2141  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2142  return false;
2143  Op = Op.getOperand(0);
2144 
2145  if (!isShiftedMask_64(NonZeroBits))
2146  return false;
2147 
2148  ShiftAmount = countTrailingZeros(NonZeroBits);
2149  MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
2150 
2151  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2152  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2153  // amount. BiggerPattern is true when this pattern is being matched for BFI,
2154  // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2155  // which case it is not profitable to insert an extra shift.
2156  if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
2157  return false;
2158  Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
2159 
2160  return true;
2161 }
2162 
2163 static bool isShiftedMask(uint64_t Mask, EVT VT) {
2164  assert(VT == MVT::i32 || VT == MVT::i64);
2165  if (VT == MVT::i32)
2166  return isShiftedMask_32(Mask);
2167  return isShiftedMask_64(Mask);
2168 }
2169 
2170 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
2171 // inserted only sets known zero bits.
2173  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2174 
2175  EVT VT = N->getValueType(0);
2176  if (VT != MVT::i32 && VT != MVT::i64)
2177  return false;
2178 
2179  unsigned BitWidth = VT.getSizeInBits();
2180 
2181  uint64_t OrImm;
2182  if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
2183  return false;
2184 
2185  // Skip this transformation if the ORR immediate can be encoded in the ORR.
2186  // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2187  // performance neutral.
2188  if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
2189  return false;
2190 
2191  uint64_t MaskImm;
2192  SDValue And = N->getOperand(0);
2193  // Must be a single use AND with an immediate operand.
2194  if (!And.hasOneUse() ||
2195  !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
2196  return false;
2197 
2198  // Compute the Known Zero for the AND as this allows us to catch more general
2199  // cases than just looking for AND with imm.
2200  KnownBits Known = CurDAG->computeKnownBits(And);
2201 
2202  // Non-zero in the sense that they're not provably zero, which is the key
2203  // point if we want to use this value.
2204  uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
2205 
2206  // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2207  if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
2208  return false;
2209 
2210  // The bits being inserted must only set those bits that are known to be zero.
2211  if ((OrImm & NotKnownZero) != 0) {
2212  // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2213  // currently handle this case.
2214  return false;
2215  }
2216 
2217  // BFI/BFXIL dst, src, #lsb, #width.
2218  int LSB = countTrailingOnes(NotKnownZero);
2219  int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2220 
2221  // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2222  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2223  unsigned ImmS = Width - 1;
2224 
2225  // If we're creating a BFI instruction avoid cases where we need more
2226  // instructions to materialize the BFI constant as compared to the original
2227  // ORR. A BFXIL will use the same constant as the original ORR, so the code
2228  // should be no worse in this case.
2229  bool IsBFI = LSB != 0;
2230  uint64_t BFIImm = OrImm >> LSB;
2231  if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
2232  // We have a BFI instruction and we know the constant can't be materialized
2233  // with a ORR-immediate with the zero register.
2234  unsigned OrChunks = 0, BFIChunks = 0;
2235  for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
2236  if (((OrImm >> Shift) & 0xFFFF) != 0)
2237  ++OrChunks;
2238  if (((BFIImm >> Shift) & 0xFFFF) != 0)
2239  ++BFIChunks;
2240  }
2241  if (BFIChunks > OrChunks)
2242  return false;
2243  }
2244 
2245  // Materialize the constant to be inserted.
2246  SDLoc DL(N);
2247  unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2248  SDNode *MOVI = CurDAG->getMachineNode(
2249  MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2250 
2251  // Create the BFI/BFXIL instruction.
2252  SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2253  CurDAG->getTargetConstant(ImmR, DL, VT),
2254  CurDAG->getTargetConstant(ImmS, DL, VT)};
2255  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2256  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2257  return true;
2258 }
2259 
2260 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
2261  SelectionDAG *CurDAG) {
2262  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2263 
2264  EVT VT = N->getValueType(0);
2265  if (VT != MVT::i32 && VT != MVT::i64)
2266  return false;
2267 
2268  unsigned BitWidth = VT.getSizeInBits();
2269 
2270  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
2271  // have the expected shape. Try to undo that.
2272 
2273  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
2274  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
2275 
2276  // Given a OR operation, check if we have the following pattern
2277  // ubfm c, b, imm, imm2 (or something that does the same jobs, see
2278  // isBitfieldExtractOp)
2279  // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
2280  // countTrailingZeros(mask2) == imm2 - imm + 1
2281  // f = d | c
2282  // if yes, replace the OR instruction with:
2283  // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
2284 
2285  // OR is commutative, check all combinations of operand order and values of
2286  // BiggerPattern, i.e.
2287  // Opd0, Opd1, BiggerPattern=false
2288  // Opd1, Opd0, BiggerPattern=false
2289  // Opd0, Opd1, BiggerPattern=true
2290  // Opd1, Opd0, BiggerPattern=true
2291  // Several of these combinations may match, so check with BiggerPattern=false
2292  // first since that will produce better results by matching more instructions
2293  // and/or inserting fewer extra instructions.
2294  for (int I = 0; I < 4; ++I) {
2295 
2296  SDValue Dst, Src;
2297  unsigned ImmR, ImmS;
2298  bool BiggerPattern = I / 2;
2299  SDValue OrOpd0Val = N->getOperand(I % 2);
2300  SDNode *OrOpd0 = OrOpd0Val.getNode();
2301  SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
2302  SDNode *OrOpd1 = OrOpd1Val.getNode();
2303 
2304  unsigned BFXOpc;
2305  int DstLSB, Width;
2306  if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
2307  NumberOfIgnoredLowBits, BiggerPattern)) {
2308  // Check that the returned opcode is compatible with the pattern,
2309  // i.e., same type and zero extended (U and not S)
2310  if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
2311  (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
2312  continue;
2313 
2314  // Compute the width of the bitfield insertion
2315  DstLSB = 0;
2316  Width = ImmS - ImmR + 1;
2317  // FIXME: This constraint is to catch bitfield insertion we may
2318  // want to widen the pattern if we want to grab general bitfied
2319  // move case
2320  if (Width <= 0)
2321  continue;
2322 
2323  // If the mask on the insertee is correct, we have a BFXIL operation. We
2324  // can share the ImmR and ImmS values from the already-computed UBFM.
2325  } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
2326  BiggerPattern,
2327  Src, DstLSB, Width)) {
2328  ImmR = (BitWidth - DstLSB) % BitWidth;
2329  ImmS = Width - 1;
2330  } else
2331  continue;
2332 
2333  // Check the second part of the pattern
2334  EVT VT = OrOpd1Val.getValueType();
2335  assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
2336 
2337  // Compute the Known Zero for the candidate of the first operand.
2338  // This allows to catch more general case than just looking for
2339  // AND with imm. Indeed, simplify-demanded-bits may have removed
2340  // the AND instruction because it proves it was useless.
2341  KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
2342 
2343  // Check if there is enough room for the second operand to appear
2344  // in the first one
2345  APInt BitsToBeInserted =
2346  APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
2347 
2348  if ((BitsToBeInserted & ~Known.Zero) != 0)
2349  continue;
2350 
2351  // Set the first operand
2352  uint64_t Imm;
2353  if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
2354  isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
2355  // In that case, we can eliminate the AND
2356  Dst = OrOpd1->getOperand(0);
2357  else
2358  // Maybe the AND has been removed by simplify-demanded-bits
2359  // or is useful because it discards more bits
2360  Dst = OrOpd1Val;
2361 
2362  // both parts match
2363  SDLoc DL(N);
2364  SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
2365  CurDAG->getTargetConstant(ImmS, DL, VT)};
2366  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2367  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2368  return true;
2369  }
2370 
2371  // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
2372  // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
2373  // mask (e.g., 0x000ffff0).
2374  uint64_t Mask0Imm, Mask1Imm;
2375  SDValue And0 = N->getOperand(0);
2376  SDValue And1 = N->getOperand(1);
2377  if (And0.hasOneUse() && And1.hasOneUse() &&
2378  isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
2379  isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
2380  APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
2381  (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
2382 
2383  // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
2384  // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
2385  // bits to be inserted.
2386  if (isShiftedMask(Mask0Imm, VT)) {
2387  std::swap(And0, And1);
2388  std::swap(Mask0Imm, Mask1Imm);
2389  }
2390 
2391  SDValue Src = And1->getOperand(0);
2392  SDValue Dst = And0->getOperand(0);
2393  unsigned LSB = countTrailingZeros(Mask1Imm);
2394  int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
2395 
2396  // The BFXIL inserts the low-order bits from a source register, so right
2397  // shift the needed bits into place.
2398  SDLoc DL(N);
2399  unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2400  SDNode *LSR = CurDAG->getMachineNode(
2401  ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
2402  CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
2403 
2404  // BFXIL is an alias of BFM, so translate to BFM operands.
2405  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2406  unsigned ImmS = Width - 1;
2407 
2408  // Create the BFXIL instruction.
2409  SDValue Ops[] = {Dst, SDValue(LSR, 0),
2410  CurDAG->getTargetConstant(ImmR, DL, VT),
2411  CurDAG->getTargetConstant(ImmS, DL, VT)};
2412  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2413  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2414  return true;
2415  }
2416 
2417  return false;
2418 }
2419 
2420 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
2421  if (N->getOpcode() != ISD::OR)
2422  return false;
2423 
2424  APInt NUsefulBits;
2425  getUsefulBits(SDValue(N, 0), NUsefulBits);
2426 
2427  // If all bits are not useful, just return UNDEF.
2428  if (!NUsefulBits) {
2429  CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
2430  return true;
2431  }
2432 
2433  if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
2434  return true;
2435 
2436  return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
2437 }
2438 
2439 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
2440 /// equivalent of a left shift by a constant amount followed by an and masking
2441 /// out a contiguous set of bits.
2442 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
2443  if (N->getOpcode() != ISD::AND)
2444  return false;
2445 
2446  EVT VT = N->getValueType(0);
2447  if (VT != MVT::i32 && VT != MVT::i64)
2448  return false;
2449 
2450  SDValue Op0;
2451  int DstLSB, Width;
2452  if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
2453  Op0, DstLSB, Width))
2454  return false;
2455 
2456  // ImmR is the rotate right amount.
2457  unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
2458  // ImmS is the most significant bit of the source to be moved.
2459  unsigned ImmS = Width - 1;
2460 
2461  SDLoc DL(N);
2462  SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
2463  CurDAG->getTargetConstant(ImmS, DL, VT)};
2464  unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2465  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2466  return true;
2467 }
2468 
2469 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
2470 /// variable shift/rotate instructions.
2471 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
2472  EVT VT = N->getValueType(0);
2473 
2474  unsigned Opc;
2475  switch (N->getOpcode()) {
2476  case ISD::ROTR:
2477  Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
2478  break;
2479  case ISD::SHL:
2480  Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
2481  break;
2482  case ISD::SRL:
2483  Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
2484  break;
2485  case ISD::SRA:
2486  Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
2487  break;
2488  default:
2489  return false;
2490  }
2491 
2492  uint64_t Size;
2493  uint64_t Bits;
2494  if (VT == MVT::i32) {
2495  Bits = 5;
2496  Size = 32;
2497  } else if (VT == MVT::i64) {
2498  Bits = 6;
2499  Size = 64;
2500  } else
2501  return false;
2502 
2503  SDValue ShiftAmt = N->getOperand(1);
2504  SDLoc DL(N);
2505  SDValue NewShiftAmt;
2506 
2507  // Skip over an extend of the shift amount.
2508  if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
2509  ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
2510  ShiftAmt = ShiftAmt->getOperand(0);
2511 
2512  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
2513  SDValue Add0 = ShiftAmt->getOperand(0);
2514  SDValue Add1 = ShiftAmt->getOperand(1);
2515  uint64_t Add0Imm;
2516  uint64_t Add1Imm;
2517  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
2518  // to avoid the ADD/SUB.
2519  if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0))
2520  NewShiftAmt = Add0;
2521  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2522  // generate a NEG instead of a SUB of a constant.
2523  else if (ShiftAmt->getOpcode() == ISD::SUB &&
2524  isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
2525  (Add0Imm % Size == 0)) {
2526  unsigned NegOpc;
2527  unsigned ZeroReg;
2528  EVT SubVT = ShiftAmt->getValueType(0);
2529  if (SubVT == MVT::i32) {
2530  NegOpc = AArch64::SUBWrr;
2531  ZeroReg = AArch64::WZR;
2532  } else {
2533  assert(SubVT == MVT::i64);
2534  NegOpc = AArch64::SUBXrr;
2535  ZeroReg = AArch64::XZR;
2536  }
2537  SDValue Zero =
2538  CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
2539  MachineSDNode *Neg =
2540  CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
2541  NewShiftAmt = SDValue(Neg, 0);
2542  } else
2543  return false;
2544  } else {
2545  // If the shift amount is masked with an AND, check that the mask covers the
2546  // bits that are implicitly ANDed off by the above opcodes and if so, skip
2547  // the AND.
2548  uint64_t MaskImm;
2549  if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm))
2550  return false;
2551 
2552  if (countTrailingOnes(MaskImm) < Bits)
2553  return false;
2554 
2555  NewShiftAmt = ShiftAmt->getOperand(0);
2556  }
2557 
2558  // Narrow/widen the shift amount to match the size of the shift operation.
2559  if (VT == MVT::i32)
2560  NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
2561  else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
2562  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
2563  MachineSDNode *Ext = CurDAG->getMachineNode(
2564  AArch64::SUBREG_TO_REG, DL, VT,
2565  CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
2566  NewShiftAmt = SDValue(Ext, 0);
2567  }
2568 
2569  SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
2570  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2571  return true;
2572 }
2573 
2574 bool
2575 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2576  unsigned RegWidth) {
2577  APFloat FVal(0.0);
2578  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2579  FVal = CN->getValueAPF();
2580  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2581  // Some otherwise illegal constants are allowed in this case.
2582  if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2583  !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2584  return false;
2585 
2586  ConstantPoolSDNode *CN =
2587  dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2588  FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2589  } else
2590  return false;
2591 
2592  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2593  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2594  // x-register.
2595  //
2596  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2597  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2598  // integers.
2599  bool IsExact;
2600 
2601  // fbits is between 1 and 64 in the worst-case, which means the fmul
2602  // could have 2^64 as an actual operand. Need 65 bits of precision.
2603  APSInt IntVal(65, true);
2604  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2605 
2606  // N.b. isPowerOf2 also checks for > 0.
2607  if (!IsExact || !IntVal.isPowerOf2()) return false;
2608  unsigned FBits = IntVal.logBase2();
2609 
2610  // Checks above should have guaranteed that we haven't lost information in
2611  // finding FBits, but it must still be in range.
2612  if (FBits == 0 || FBits > RegWidth) return false;
2613 
2614  FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
2615  return true;
2616 }
2617 
2618 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
2619 // of the string and obtains the integer values from them and combines these
2620 // into a single value to be used in the MRS/MSR instruction.
2623  RegString.split(Fields, ':');
2624 
2625  if (Fields.size() == 1)
2626  return -1;
2627 
2628  assert(Fields.size() == 5
2629  && "Invalid number of fields in read register string");
2630 
2631  SmallVector<int, 5> Ops;
2632  bool AllIntFields = true;
2633 
2634  for (StringRef Field : Fields) {
2635  unsigned IntField;
2636  AllIntFields &= !Field.getAsInteger(10, IntField);
2637  Ops.push_back(IntField);
2638  }
2639 
2640  assert(AllIntFields &&
2641  "Unexpected non-integer value in special register string.");
2642 
2643  // Need to combine the integer fields of the string into a single value
2644  // based on the bit encoding of MRS/MSR instruction.
2645  return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2646  (Ops[3] << 3) | (Ops[4]);
2647 }
2648 
2649 // Lower the read_register intrinsic to an MRS instruction node if the special
2650 // register string argument is either of the form detailed in the ALCE (the
2651 // form described in getIntOperandsFromRegsterString) or is a named register
2652 // known by the MRS SysReg mapper.
2653 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
2654  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2655  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2656  SDLoc DL(N);
2657 
2658  int Reg = getIntOperandFromRegisterString(RegString->getString());
2659  if (Reg != -1) {
2660  ReplaceNode(N, CurDAG->getMachineNode(
2661  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2662  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2663  N->getOperand(0)));
2664  return true;
2665  }
2666 
2667  // Use the sysreg mapper to map the remaining possible strings to the
2668  // value for the register to be used for the instruction operand.
2669  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2670  if (TheReg && TheReg->Readable &&
2671  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2672  Reg = TheReg->Encoding;
2673  else
2674  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2675 
2676  if (Reg != -1) {
2677  ReplaceNode(N, CurDAG->getMachineNode(
2678  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2679  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2680  N->getOperand(0)));
2681  return true;
2682  }
2683 
2684  if (RegString->getString() == "pc") {
2685  ReplaceNode(N, CurDAG->getMachineNode(
2687  CurDAG->getTargetConstant(0, DL, MVT::i32),
2688  N->getOperand(0)));
2689  return true;
2690  }
2691 
2692  return false;
2693 }
2694 
2695 // Lower the write_register intrinsic to an MSR instruction node if the special
2696 // register string argument is either of the form detailed in the ALCE (the
2697 // form described in getIntOperandsFromRegsterString) or is a named register
2698 // known by the MSR SysReg mapper.
2699 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
2700  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2701  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2702  SDLoc DL(N);
2703 
2704  int Reg = getIntOperandFromRegisterString(RegString->getString());
2705  if (Reg != -1) {
2706  ReplaceNode(
2707  N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2708  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2709  N->getOperand(2), N->getOperand(0)));
2710  return true;
2711  }
2712 
2713  // Check if the register was one of those allowed as the pstatefield value in
2714  // the MSR (immediate) instruction. To accept the values allowed in the
2715  // pstatefield for the MSR (immediate) instruction, we also require that an
2716  // immediate value has been provided as an argument, we know that this is
2717  // the case as it has been ensured by semantic checking.
2718  auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
2719  if (PMapper) {
2720  assert (isa<ConstantSDNode>(N->getOperand(2))
2721  && "Expected a constant integer expression.");
2722  unsigned Reg = PMapper->Encoding;
2723  uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
2724  unsigned State;
2725  if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) {
2726  assert(Immed < 2 && "Bad imm");
2727  State = AArch64::MSRpstateImm1;
2728  } else {
2729  assert(Immed < 16 && "Bad imm");
2730  State = AArch64::MSRpstateImm4;
2731  }
2732  ReplaceNode(N, CurDAG->getMachineNode(
2733  State, DL, MVT::Other,
2734  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2735  CurDAG->getTargetConstant(Immed, DL, MVT::i16),
2736  N->getOperand(0)));
2737  return true;
2738  }
2739 
2740  // Use the sysreg mapper to attempt to map the remaining possible strings
2741  // to the value for the register to be used for the MSR (register)
2742  // instruction operand.
2743  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2744  if (TheReg && TheReg->Writeable &&
2745  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2746  Reg = TheReg->Encoding;
2747  else
2748  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2749  if (Reg != -1) {
2750  ReplaceNode(N, CurDAG->getMachineNode(
2751  AArch64::MSR, DL, MVT::Other,
2752  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2753  N->getOperand(2), N->getOperand(0)));
2754  return true;
2755  }
2756 
2757  return false;
2758 }
2759 
2760 /// We've got special pseudo-instructions for these
2761 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2762  unsigned Opcode;
2763  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2764 
2765  // Leave IR for LSE if subtarget supports it.
2766  if (Subtarget->hasLSE()) return false;
2767 
2768  if (MemTy == MVT::i8)
2769  Opcode = AArch64::CMP_SWAP_8;
2770  else if (MemTy == MVT::i16)
2771  Opcode = AArch64::CMP_SWAP_16;
2772  else if (MemTy == MVT::i32)
2773  Opcode = AArch64::CMP_SWAP_32;
2774  else if (MemTy == MVT::i64)
2775  Opcode = AArch64::CMP_SWAP_64;
2776  else
2777  llvm_unreachable("Unknown AtomicCmpSwap type");
2778 
2779  MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
2780  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2781  N->getOperand(0)};
2782  SDNode *CmpSwap = CurDAG->getMachineNode(
2783  Opcode, SDLoc(N),
2784  CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
2785 
2786  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2787  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2788 
2789  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2790  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2791  CurDAG->RemoveDeadNode(N);
2792 
2793  return true;
2794 }
2795 
2796 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
2797  // tagp(FrameIndex, IRGstack, tag_offset):
2798  // since the offset between FrameIndex and IRGstack is a compile-time
2799  // constant, this can be lowered to a single ADDG instruction.
2800  if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
2801  return false;
2802  }
2803 
2804  SDValue IRG_SP = N->getOperand(2);
2805  if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
2806  cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
2807  Intrinsic::aarch64_irg_sp) {
2808  return false;
2809  }
2810 
2811  const TargetLowering *TLI = getTargetLowering();
2812  SDLoc DL(N);
2813  int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
2814  SDValue FiOp = CurDAG->getTargetFrameIndex(
2815  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2816  int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2817 
2818  SDNode *Out = CurDAG->getMachineNode(
2819  AArch64::TAGPstack, DL, MVT::i64,
2820  {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
2821  CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
2822  ReplaceNode(N, Out);
2823  return true;
2824 }
2825 
2826 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
2827  assert(isa<ConstantSDNode>(N->getOperand(3)) &&
2828  "llvm.aarch64.tagp third argument must be an immediate");
2829  if (trySelectStackSlotTagP(N))
2830  return;
2831  // FIXME: above applies in any case when offset between Op1 and Op2 is a
2832  // compile-time constant, not just for stack allocations.
2833 
2834  // General case for unrelated pointers in Op1 and Op2.
2835  SDLoc DL(N);
2836  int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2837  SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
2838  {N->getOperand(1), N->getOperand(2)});
2839  SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
2840  {SDValue(N1, 0), N->getOperand(2)});
2841  SDNode *N3 = CurDAG->getMachineNode(
2842  AArch64::ADDG, DL, MVT::i64,
2843  {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
2844  CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
2845  ReplaceNode(N, N3);
2846 }
2847 
2848 void AArch64DAGToDAGISel::Select(SDNode *Node) {
2849  // If we have a custom node, we already have selected!
2850  if (Node->isMachineOpcode()) {
2851  LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
2852  Node->setNodeId(-1);
2853  return;
2854  }
2855 
2856  // Few custom selection stuff.
2857  EVT VT = Node->getValueType(0);
2858 
2859  switch (Node->getOpcode()) {
2860  default:
2861  break;
2862 
2863  case ISD::ATOMIC_CMP_SWAP:
2864  if (SelectCMP_SWAP(Node))
2865  return;
2866  break;
2867 
2868  case ISD::READ_REGISTER:
2869  if (tryReadRegister(Node))
2870  return;
2871  break;
2872 
2873  case ISD::WRITE_REGISTER:
2874  if (tryWriteRegister(Node))
2875  return;
2876  break;
2877 
2878  case ISD::ADD:
2879  if (tryMLAV64LaneV128(Node))
2880  return;
2881  break;
2882 
2883  case ISD::LOAD: {
2884  // Try to select as an indexed load. Fall through to normal processing
2885  // if we can't.
2886  if (tryIndexedLoad(Node))
2887  return;
2888  break;
2889  }
2890 
2891  case ISD::SRL:
2892  case ISD::AND:
2893  case ISD::SRA:
2895  if (tryBitfieldExtractOp(Node))
2896  return;
2897  if (tryBitfieldInsertInZeroOp(Node))
2898  return;
2900  case ISD::ROTR:
2901  case ISD::SHL:
2902  if (tryShiftAmountMod(Node))
2903  return;
2904  break;
2905 
2906  case ISD::SIGN_EXTEND:
2907  if (tryBitfieldExtractOpFromSExt(Node))
2908  return;
2909  break;
2910 
2911  case ISD::OR:
2912  if (tryBitfieldInsertOp(Node))
2913  return;
2914  break;
2915 
2916  case ISD::Constant: {
2917  // Materialize zero constants as copies from WZR/XZR. This allows
2918  // the coalescer to propagate these into other instructions.
2919  ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
2920  if (ConstNode->isNullValue()) {
2921  if (VT == MVT::i32) {
2922  SDValue New = CurDAG->getCopyFromReg(
2923  CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
2924  ReplaceNode(Node, New.getNode());
2925  return;
2926  } else if (VT == MVT::i64) {
2927  SDValue New = CurDAG->getCopyFromReg(
2928  CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
2929  ReplaceNode(Node, New.getNode());
2930  return;
2931  }
2932  }
2933  break;
2934  }
2935 
2936  case ISD::FrameIndex: {
2937  // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
2938  int FI = cast<FrameIndexSDNode>(Node)->getIndex();
2939  unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
2940  const TargetLowering *TLI = getTargetLowering();
2941  SDValue TFI = CurDAG->getTargetFrameIndex(
2942  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2943  SDLoc DL(Node);
2944  SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
2945  CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
2946  CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
2947  return;
2948  }
2949  case ISD::INTRINSIC_W_CHAIN: {
2950  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2951  switch (IntNo) {
2952  default:
2953  break;
2954  case Intrinsic::aarch64_ldaxp:
2955  case Intrinsic::aarch64_ldxp: {
2956  unsigned Op =
2957  IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
2958  SDValue MemAddr = Node->getOperand(2);
2959  SDLoc DL(Node);
2960  SDValue Chain = Node->getOperand(0);
2961 
2962  SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
2963  MVT::Other, MemAddr, Chain);
2964 
2965  // Transfer memoperands.
2966  MachineMemOperand *MemOp =
2967  cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2968  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
2969  ReplaceNode(Node, Ld);
2970  return;
2971  }
2972  case Intrinsic::aarch64_stlxp:
2973  case Intrinsic::aarch64_stxp: {
2974  unsigned Op =
2975  IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
2976  SDLoc DL(Node);
2977  SDValue Chain = Node->getOperand(0);
2978  SDValue ValLo = Node->getOperand(2);
2979  SDValue ValHi = Node->getOperand(3);
2980  SDValue MemAddr = Node->getOperand(4);
2981 
2982  // Place arguments in the right order.
2983  SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
2984 
2985  SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
2986  // Transfer memoperands.
2987  MachineMemOperand *MemOp =
2988  cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2989  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2990 
2991  ReplaceNode(Node, St);
2992  return;
2993  }
2994  case Intrinsic::aarch64_neon_ld1x2:
2995  if (VT == MVT::v8i8) {
2996  SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
2997  return;
2998  } else if (VT == MVT::v16i8) {
2999  SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
3000  return;
3001  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3002  SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
3003  return;
3004  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3005  SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
3006  return;
3007  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3008  SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
3009  return;
3010  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3011  SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
3012  return;
3013  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3014  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3015  return;
3016  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3017  SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
3018  return;
3019  }
3020  break;
3021  case Intrinsic::aarch64_neon_ld1x3:
3022  if (VT == MVT::v8i8) {
3023  SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
3024  return;
3025  } else if (VT == MVT::v16i8) {
3026  SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
3027  return;
3028  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3029  SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
3030  return;
3031  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3032  SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
3033  return;
3034  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3035  SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
3036  return;
3037  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3038  SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
3039  return;
3040  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3041  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3042  return;
3043  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3044  SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
3045  return;
3046  }
3047  break;
3048  case Intrinsic::aarch64_neon_ld1x4:
3049  if (VT == MVT::v8i8) {
3050  SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
3051  return;
3052  } else if (VT == MVT::v16i8) {
3053  SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
3054  return;
3055  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3056  SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
3057  return;
3058  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3059  SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
3060  return;
3061  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3062  SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
3063  return;
3064  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3065  SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
3066  return;
3067  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3068  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3069  return;
3070  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3071  SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
3072  return;
3073  }
3074  break;
3075  case Intrinsic::aarch64_neon_ld2:
3076  if (VT == MVT::v8i8) {
3077  SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
3078  return;
3079  } else if (VT == MVT::v16i8) {
3080  SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
3081  return;
3082  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3083  SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
3084  return;
3085  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3086  SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
3087  return;
3088  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3089  SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
3090  return;
3091  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3092  SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
3093  return;
3094  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3095  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3096  return;
3097  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3098  SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
3099  return;
3100  }
3101  break;
3102  case Intrinsic::aarch64_neon_ld3:
3103  if (VT == MVT::v8i8) {
3104  SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
3105  return;
3106  } else if (VT == MVT::v16i8) {
3107  SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
3108  return;
3109  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3110  SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
3111  return;
3112  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3113  SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
3114  return;
3115  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3116  SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
3117  return;
3118  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3119  SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
3120  return;
3121  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3122  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3123  return;
3124  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3125  SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
3126  return;
3127  }
3128  break;
3129  case Intrinsic::aarch64_neon_ld4:
3130  if (VT == MVT::v8i8) {
3131  SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
3132  return;
3133  } else if (VT == MVT::v16i8) {
3134  SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
3135  return;
3136  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3137  SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
3138  return;
3139  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3140  SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
3141  return;
3142  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3143  SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
3144  return;
3145  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3146  SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
3147  return;
3148  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3149  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3150  return;
3151  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3152  SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
3153  return;
3154  }
3155  break;
3156  case Intrinsic::aarch64_neon_ld2r:
3157  if (VT == MVT::v8i8) {
3158  SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
3159  return;
3160  } else if (VT == MVT::v16i8) {
3161  SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
3162  return;
3163  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3164  SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
3165  return;
3166  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3167  SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
3168  return;
3169  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3170  SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
3171  return;
3172  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3173  SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
3174  return;
3175  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3176  SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
3177  return;
3178  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3179  SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
3180  return;
3181  }
3182  break;
3183  case Intrinsic::aarch64_neon_ld3r:
3184  if (VT == MVT::v8i8) {
3185  SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
3186  return;
3187  } else if (VT == MVT::v16i8) {
3188  SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
3189  return;
3190  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3191  SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
3192  return;
3193  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3194  SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
3195  return;
3196  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3197  SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
3198  return;
3199  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3200  SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
3201  return;
3202  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3203  SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
3204  return;
3205  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3206  SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
3207  return;
3208  }
3209  break;
3210  case Intrinsic::aarch64_neon_ld4r:
3211  if (VT == MVT::v8i8) {
3212  SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
3213  return;
3214  } else if (VT == MVT::v16i8) {
3215  SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
3216  return;
3217  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3218  SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
3219  return;
3220  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3221  SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
3222  return;
3223  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3224  SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
3225  return;
3226  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3227  SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
3228  return;
3229  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3230  SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
3231  return;
3232  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3233  SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
3234  return;
3235  }
3236  break;
3237  case Intrinsic::aarch64_neon_ld2lane:
3238  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3239  SelectLoadLane(Node, 2, AArch64::LD2i8);
3240  return;
3241  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3242  VT == MVT::v8f16) {
3243  SelectLoadLane(Node, 2, AArch64::LD2i16);
3244  return;
3245  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3246  VT == MVT::v2f32) {
3247  SelectLoadLane(Node, 2, AArch64::LD2i32);
3248  return;
3249  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3250  VT == MVT::v1f64) {
3251  SelectLoadLane(Node, 2, AArch64::LD2i64);
3252  return;
3253  }
3254  break;
3255  case Intrinsic::aarch64_neon_ld3lane:
3256  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3257  SelectLoadLane(Node, 3, AArch64::LD3i8);
3258  return;
3259  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3260  VT == MVT::v8f16) {
3261  SelectLoadLane(Node, 3, AArch64::LD3i16);
3262  return;
3263  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3264  VT == MVT::v2f32) {
3265  SelectLoadLane(Node, 3, AArch64::LD3i32);
3266  return;
3267  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3268  VT == MVT::v1f64) {
3269  SelectLoadLane(Node, 3, AArch64::LD3i64);
3270  return;
3271  }
3272  break;
3273  case Intrinsic::aarch64_neon_ld4lane:
3274  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3275  SelectLoadLane(Node, 4, AArch64::LD4i8);
3276  return;
3277  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3278  VT == MVT::v8f16) {
3279  SelectLoadLane(Node, 4, AArch64::LD4i16);
3280  return;
3281  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3282  VT == MVT::v2f32) {
3283  SelectLoadLane(Node, 4, AArch64::LD4i32);
3284  return;
3285  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3286  VT == MVT::v1f64) {
3287  SelectLoadLane(Node, 4, AArch64::LD4i64);
3288  return;
3289  }
3290  break;
3291  }
3292  } break;
3293  case ISD::INTRINSIC_WO_CHAIN: {
3294  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
3295  switch (IntNo) {
3296  default:
3297  break;
3298  case Intrinsic::aarch64_tagp:
3299  SelectTagP(Node);
3300  return;
3301  case Intrinsic::aarch64_neon_tbl2:
3302  SelectTable(Node, 2,
3303  VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
3304  false);
3305  return;
3306  case Intrinsic::aarch64_neon_tbl3:
3307  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
3308  : AArch64::TBLv16i8Three,
3309  false);
3310  return;
3311  case Intrinsic::aarch64_neon_tbl4:
3312  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
3313  : AArch64::TBLv16i8Four,
3314  false);
3315  return;
3316  case Intrinsic::aarch64_neon_tbx2:
3317  SelectTable(Node, 2,
3318  VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
3319  true);
3320  return;
3321  case Intrinsic::aarch64_neon_tbx3:
3322  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
3323  : AArch64::TBXv16i8Three,
3324  true);
3325  return;
3326  case Intrinsic::aarch64_neon_tbx4:
3327  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
3328  : AArch64::TBXv16i8Four,
3329  true);
3330  return;
3331  case Intrinsic::aarch64_neon_smull:
3332  case Intrinsic::aarch64_neon_umull:
3333  if (tryMULLV64LaneV128(IntNo, Node))
3334  return;
3335  break;
3336  }
3337  break;
3338  }
3339  case ISD::INTRINSIC_VOID: {
3340  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3341  if (Node->getNumOperands() >= 3)
3342  VT = Node->getOperand(2)->getValueType(0);
3343  switch (IntNo) {
3344  default:
3345  break;
3346  case Intrinsic::aarch64_neon_st1x2: {
3347  if (VT == MVT::v8i8) {
3348  SelectStore(Node, 2, AArch64::ST1Twov8b);
3349  return;
3350  } else if (VT == MVT::v16i8) {
3351  SelectStore(Node, 2, AArch64::ST1Twov16b);
3352  return;
3353  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3354  SelectStore(Node, 2, AArch64::ST1Twov4h);
3355  return;
3356  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3357  SelectStore(Node, 2, AArch64::ST1Twov8h);
3358  return;
3359  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3360  SelectStore(Node, 2, AArch64::ST1Twov2s);
3361  return;
3362  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3363  SelectStore(Node, 2, AArch64::ST1Twov4s);
3364  return;
3365  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3366  SelectStore(Node, 2, AArch64::ST1Twov2d);
3367  return;
3368  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3369  SelectStore(Node, 2, AArch64::ST1Twov1d);
3370  return;
3371  }
3372  break;
3373  }
3374  case Intrinsic::aarch64_neon_st1x3: {
3375  if (VT == MVT::v8i8) {
3376  SelectStore(Node, 3, AArch64::ST1Threev8b);
3377  return;
3378  } else if (VT == MVT::v16i8) {
3379  SelectStore(Node, 3, AArch64::ST1Threev16b);
3380  return;
3381  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3382  SelectStore(Node, 3, AArch64::ST1Threev4h);
3383  return;
3384  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3385  SelectStore(Node, 3, AArch64::ST1Threev8h);
3386  return;
3387  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3388  SelectStore(Node, 3, AArch64::ST1Threev2s);
3389  return;
3390  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3391  SelectStore(Node, 3, AArch64::ST1Threev4s);
3392  return;
3393  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3394  SelectStore(Node, 3, AArch64::ST1Threev2d);
3395  return;
3396  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3397  SelectStore(Node, 3, AArch64::ST1Threev1d);
3398  return;
3399  }
3400  break;
3401  }
3402  case Intrinsic::aarch64_neon_st1x4: {
3403  if (VT == MVT::v8i8) {
3404  SelectStore(Node, 4, AArch64::ST1Fourv8b);
3405  return;
3406  } else if (VT == MVT::v16i8) {
3407  SelectStore(Node, 4, AArch64::ST1Fourv16b);
3408  return;
3409  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3410  SelectStore(Node, 4, AArch64::ST1Fourv4h);
3411  return;
3412  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3413  SelectStore(Node, 4, AArch64::ST1Fourv8h);
3414  return;
3415  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3416  SelectStore(Node, 4, AArch64::ST1Fourv2s);
3417  return;
3418  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3419  SelectStore(Node, 4, AArch64::ST1Fourv4s);
3420  return;
3421  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3422  SelectStore(Node, 4, AArch64::ST1Fourv2d);
3423  return;
3424  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3425  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3426  return;
3427  }
3428  break;
3429  }
3430  case Intrinsic::aarch64_neon_st2: {
3431  if (VT == MVT::v8i8) {
3432  SelectStore(Node, 2, AArch64::ST2Twov8b);
3433  return;
3434  } else if (VT == MVT::v16i8) {
3435  SelectStore(Node, 2, AArch64::ST2Twov16b);
3436  return;
3437  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3438  SelectStore(Node, 2, AArch64::ST2Twov4h);
3439  return;
3440  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3441  SelectStore(Node, 2, AArch64::ST2Twov8h);
3442  return;
3443  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3444  SelectStore(Node, 2, AArch64::ST2Twov2s);
3445  return;
3446  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3447  SelectStore(Node, 2, AArch64::ST2Twov4s);
3448  return;
3449  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3450  SelectStore(Node, 2, AArch64::ST2Twov2d);
3451  return;
3452  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3453  SelectStore(Node, 2, AArch64::ST1Twov1d);
3454  return;
3455  }
3456  break;
3457  }
3458  case Intrinsic::aarch64_neon_st3: {
3459  if (VT == MVT::v8i8) {
3460  SelectStore(Node, 3, AArch64::ST3Threev8b);
3461  return;
3462  } else if (VT == MVT::v16i8) {
3463  SelectStore(Node, 3, AArch64::ST3Threev16b);
3464  return;
3465  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3466  SelectStore(Node, 3, AArch64::ST3Threev4h);
3467  return;
3468  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3469  SelectStore(Node, 3, AArch64::ST3Threev8h);
3470  return;
3471  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3472  SelectStore(Node, 3, AArch64::ST3Threev2s);
3473  return;
3474  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3475  SelectStore(Node, 3, AArch64::ST3Threev4s);
3476  return;
3477  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3478  SelectStore(Node, 3, AArch64::ST3Threev2d);
3479  return;
3480  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3481  SelectStore(Node, 3, AArch64::ST1Threev1d);
3482  return;
3483  }
3484  break;
3485  }
3486  case Intrinsic::aarch64_neon_st4: {
3487  if (VT == MVT::v8i8) {
3488  SelectStore(Node, 4, AArch64::ST4Fourv8b);
3489  return;
3490  } else if (VT == MVT::v16i8) {
3491  SelectStore(Node, 4, AArch64::ST4Fourv16b);
3492  return;
3493  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3494  SelectStore(Node, 4, AArch64::ST4Fourv4h);
3495  return;
3496  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3497  SelectStore(Node, 4, AArch64::ST4Fourv8h);
3498  return;
3499  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3500  SelectStore(Node, 4, AArch64::ST4Fourv2s);
3501  return;
3502  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3503  SelectStore(Node, 4, AArch64::ST4Fourv4s);
3504  return;
3505  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3506  SelectStore(Node, 4, AArch64::ST4Fourv2d);
3507  return;
3508  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3509  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3510  return;
3511  }
3512  break;
3513  }
3514  case Intrinsic::aarch64_neon_st2lane: {
3515  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3516  SelectStoreLane(Node, 2, AArch64::ST2i8);
3517  return;
3518  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3519  VT == MVT::v8f16) {
3520  SelectStoreLane(Node, 2, AArch64::ST2i16);
3521  return;
3522  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3523  VT == MVT::v2f32) {
3524  SelectStoreLane(Node, 2, AArch64::ST2i32);
3525  return;
3526  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3527  VT == MVT::v1f64) {
3528  SelectStoreLane(Node, 2, AArch64::ST2i64);
3529  return;
3530  }
3531  break;
3532  }
3533  case Intrinsic::aarch64_neon_st3lane: {
3534  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3535  SelectStoreLane(Node, 3, AArch64::ST3i8);
3536  return;
3537  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3538  VT == MVT::v8f16) {
3539  SelectStoreLane(Node, 3, AArch64::ST3i16);
3540  return;
3541  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3542  VT == MVT::v2f32) {
3543  SelectStoreLane(Node, 3, AArch64::ST3i32);
3544  return;
3545  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3546  VT == MVT::v1f64) {
3547  SelectStoreLane(Node, 3, AArch64::ST3i64);
3548  return;
3549  }
3550  break;
3551  }
3552  case Intrinsic::aarch64_neon_st4lane: {
3553  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3554  SelectStoreLane(Node, 4, AArch64::ST4i8);
3555  return;
3556  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3557  VT == MVT::v8f16) {
3558  SelectStoreLane(Node, 4, AArch64::ST4i16);
3559  return;
3560  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3561  VT == MVT::v2f32) {
3562  SelectStoreLane(Node, 4, AArch64::ST4i32);
3563  return;
3564  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3565  VT == MVT::v1f64) {
3566  SelectStoreLane(Node, 4, AArch64::ST4i64);
3567  return;
3568  }
3569  break;
3570  }
3571  }
3572  break;
3573  }
3574  case AArch64ISD::LD2post: {
3575  if (VT == MVT::v8i8) {
3576  SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
3577  return;
3578  } else if (VT == MVT::v16i8) {
3579  SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
3580  return;
3581  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3582  SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
3583  return;
3584  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3585  SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
3586  return;
3587  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3588  SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
3589  return;
3590  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3591  SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
3592  return;
3593  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3594  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3595  return;
3596  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3597  SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
3598  return;
3599  }
3600  break;
3601  }
3602  case AArch64ISD::LD3post: {
3603  if (VT == MVT::v8i8) {
3604  SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
3605  return;
3606  } else if (VT == MVT::v16i8) {
3607  SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
3608  return;
3609  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3610  SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
3611  return;
3612  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3613  SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
3614  return;
3615  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3616  SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
3617  return;
3618  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3619  SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
3620  return;
3621  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3622  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3623  return;
3624  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3625  SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
3626  return;
3627  }
3628  break;
3629  }
3630  case AArch64ISD::LD4post: {
3631  if (VT == MVT::v8i8) {
3632  SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
3633  return;
3634  } else if (VT == MVT::v16i8) {
3635  SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
3636  return;
3637  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3638  SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
3639  return;
3640  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3641  SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
3642  return;
3643  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3644  SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
3645  return;
3646  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3647  SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
3648  return;
3649  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3650  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3651  return;
3652  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3653  SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
3654  return;
3655  }
3656  break;
3657  }
3658  case AArch64ISD::LD1x2post: {
3659  if (VT == MVT::v8i8) {
3660  SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
3661  return;
3662  } else if (VT == MVT::v16i8) {
3663  SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
3664  return;
3665  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3666  SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
3667  return;
3668  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3669  SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
3670  return;
3671  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3672  SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
3673  return;
3674  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3675  SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
3676  return;
3677  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3678  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3679  return;
3680  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3681  SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
3682  return;
3683  }
3684  break;
3685  }
3686  case AArch64ISD::LD1x3post: {
3687  if (VT == MVT::v8i8) {
3688  SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
3689  return;
3690  } else if (VT == MVT::v16i8) {
3691  SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
3692  return;
3693  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3694  SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
3695  return;
3696  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3697  SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
3698  return;
3699  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3700  SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
3701  return;
3702  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3703  SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
3704  return;
3705  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3706  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3707  return;
3708  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3709  SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
3710  return;
3711  }
3712  break;
3713  }
3714  case AArch64ISD::LD1x4post: {
3715  if (VT == MVT::v8i8) {
3716  SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
3717  return;
3718  } else if (VT == MVT::v16i8) {
3719  SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
3720  return;
3721  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3722  SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
3723  return;
3724  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3725  SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
3726  return;
3727  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3728  SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
3729  return;
3730  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3731  SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
3732  return;
3733  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3734  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3735  return;
3736  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3737  SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
3738  return;
3739  }
3740  break;
3741  }
3742  case AArch64ISD::LD1DUPpost: {
3743  if (VT == MVT::v8i8) {
3744  SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
3745  return;
3746  } else if (VT == MVT::v16i8) {
3747  SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
3748  return;
3749  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3750  SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
3751  return;
3752  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3753  SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
3754  return;
3755  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3756  SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
3757  return;
3758  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3759  SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
3760  return;
3761  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3762  SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
3763  return;
3764  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3765  SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
3766  return;
3767  }
3768  break;
3769  }
3770  case AArch64ISD::LD2DUPpost: {
3771  if (VT == MVT::v8i8) {
3772  SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
3773  return;
3774  } else if (VT == MVT::v16i8) {
3775  SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
3776  return;
3777  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3778  SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
3779  return;
3780  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3781  SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
3782  return;
3783  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3784  SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
3785  return;
3786  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3787  SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
3788  return;
3789  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3790  SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
3791  return;
3792  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3793  SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
3794  return;
3795  }
3796  break;
3797  }
3798  case AArch64ISD::LD3DUPpost: {
3799  if (VT == MVT::v8i8) {
3800  SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
3801  return;
3802  } else if (VT == MVT::v16i8) {
3803  SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
3804  return;
3805  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3806  SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
3807  return;
3808  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3809  SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
3810  return;
3811  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3812  SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
3813  return;
3814  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3815  SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
3816  return;
3817  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3818  SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
3819  return;
3820  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3821  SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
3822  return;
3823  }
3824  break;
3825  }
3826  case AArch64ISD::LD4DUPpost: {
3827  if (VT == MVT::v8i8) {
3828  SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
3829  return;
3830  } else if (VT == MVT::v16i8) {
3831  SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
3832  return;
3833  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3834  SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
3835  return;
3836  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3837  SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
3838  return;
3839  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3840  SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
3841  return;
3842  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3843  SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
3844  return;
3845  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3846  SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
3847  return;
3848  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3849  SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
3850  return;
3851  }
3852  break;
3853  }
3854  case AArch64ISD::LD1LANEpost: {
3855  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3856  SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
3857  return;
3858  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3859  VT == MVT::v8f16) {
3860  SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
3861  return;
3862  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3863  VT == MVT::v2f32) {
3864  SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
3865  return;
3866  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3867  VT == MVT::v1f64) {
3868  SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
3869  return;
3870  }
3871  break;
3872  }
3873  case AArch64ISD::LD2LANEpost: {
3874  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3875  SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
3876  return;
3877  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3878  VT == MVT::v8f16) {
3879  SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
3880  return;
3881  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3882  VT == MVT::v2f32) {
3883  SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
3884  return;
3885  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3886  VT == MVT::v1f64) {
3887  SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
3888  return;
3889  }
3890  break;
3891  }
3892  case AArch64ISD::LD3LANEpost: {
3893  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3894  SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
3895  return;
3896  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3897  VT == MVT::v8f16) {
3898  SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
3899  return;
3900  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3901  VT == MVT::v2f32) {
3902  SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
3903  return;
3904  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3905  VT == MVT::v1f64) {
3906  SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
3907  return;
3908  }
3909  break;
3910  }
3911  case AArch64ISD::LD4LANEpost: {
3912  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3913  SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
3914  return;
3915  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3916  VT == MVT::v8f16) {
3917  SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
3918  return;
3919  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3920  VT == MVT::v2f32) {
3921  SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
3922  return;
3923  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3924  VT == MVT::v1f64) {
3925  SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
3926  return;
3927  }
3928  break;
3929  }
3930  case AArch64ISD::ST2post: {
3931  VT = Node->getOperand(1).getValueType();
3932  if (VT == MVT::v8i8) {
3933  SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
3934  return;
3935  } else if (VT == MVT::v16i8) {
3936  SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
3937  return;
3938  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3939  SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
3940  return;
3941  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3942  SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
3943  return;
3944  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3945  SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
3946  return;
3947  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3948  SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
3949  return;
3950  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3951  SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
3952  return;
3953  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3954  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3955  return;
3956  }
3957  break;
3958  }
3959  case AArch64ISD::ST3post: {
3960  VT = Node->getOperand(1).getValueType();
3961  if (VT == MVT::v8i8) {
3962  SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
3963  return;
3964  } else if (VT == MVT::v16i8) {
3965  SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
3966  return;
3967  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3968  SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
3969  return;
3970  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3971  SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
3972  return;
3973  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3974  SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
3975  return;
3976  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3977  SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
3978  return;
3979  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3980  SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
3981  return;
3982  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3983  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
3984  return;
3985  }
3986  break;
3987  }
3988  case AArch64ISD::ST4post: {
3989  VT = Node->getOperand(1).getValueType();
3990  if (VT == MVT::v8i8) {
3991  SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
3992  return;
3993  } else if (VT == MVT::v16i8) {
3994  SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
3995  return;
3996  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3997  SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
3998  return;
3999  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4000  SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
4001  return;
4002  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4003  SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
4004  return;
4005  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4006  SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
4007  return;
4008  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4009  SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
4010  return;
4011  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4012  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4013  return;
4014  }
4015  break;
4016  }
4017  case AArch64ISD::ST1x2post: {
4018  VT = Node->getOperand(1).getValueType();
4019  if (VT == MVT::v8i8) {
4020  SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
4021  return;
4022  } else if (VT == MVT::v16i8) {
4023  SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
4024  return;
4025  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4026  SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
4027  return;
4028  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4029  SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
4030  return;
4031  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4032  SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
4033  return;
4034  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4035  SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
4036  return;
4037  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4038  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
4039  return;
4040  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4041  SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
4042  return;
4043  }
4044  break;
4045  }
4046  case AArch64ISD::ST1x3post: {
4047  VT = Node->getOperand(1).getValueType();
4048  if (VT == MVT::v8i8) {
4049  SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
4050  return;
4051  } else if (VT == MVT::v16i8) {
4052  SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
4053  return;
4054  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4055  SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
4056  return;
4057  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4058  SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
4059  return;
4060  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4061  SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
4062  return;
4063  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4064  SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
4065  return;
4066  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4067  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4068  return;
4069  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4070  SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
4071  return;
4072  }
4073  break;
4074  }
4075  case AArch64ISD::ST1x4post: {
4076  VT = Node->getOperand(1).getValueType();
4077  if (VT == MVT::v8i8) {
4078  SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
4079  return;
4080  } else if (VT == MVT::v16i8) {
4081  SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
4082  return;
4083  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4084  SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
4085  return;
4086  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4087  SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
4088  return;
4089  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4090  SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
4091  return;
4092  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4093  SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
4094  return;
4095  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4096  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4097  return;
4098  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4099  SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
4100  return;
4101  }
4102  break;
4103  }
4104  case AArch64ISD::ST2LANEpost: {
4105  VT = Node->getOperand(1).getValueType();
4106  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4107  SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
4108  return;
4109  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4110  VT == MVT::v8f16) {
4111  SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
4112  return;
4113  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4114  VT == MVT::v2f32) {
4115  SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
4116  return;
4117  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4118  VT == MVT::v1f64) {
4119  SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
4120  return;
4121  }
4122  break;
4123  }
4124  case AArch64ISD::ST3LANEpost: {
4125  VT = Node->getOperand(1).getValueType();
4126  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4127  SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
4128  return;
4129  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4130  VT == MVT::v8f16) {
4131  SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
4132  return;
4133  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4134  VT == MVT::v2f32) {
4135  SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
4136  return;
4137  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4138  VT == MVT::v1f64) {
4139  SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
4140  return;
4141  }
4142  break;
4143  }
4144  case AArch64ISD::ST4LANEpost: {
4145  VT = Node->getOperand(1).getValueType();
4146  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4147  SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
4148  return;
4149  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4150  VT == MVT::v8f16) {
4151  SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
4152  return;
4153  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4154  VT == MVT::v2f32) {
4155  SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
4156  return;
4157  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4158  VT == MVT::v1f64) {
4159  SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
4160  return;
4161  }
4162  break;
4163  }
4164  }
4165 
4166  // Select the default instruction
4167  SelectCode(Node);
4168 }
4169 
4170 /// createAArch64ISelDag - This pass converts a legalized DAG into a
4171 /// AArch64-specific DAG, ready for instruction scheduling.
4173  CodeGenOpt::Level OptLevel) {
4174  return new AArch64DAGToDAGISel(TM, OptLevel);
4175 }
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1451
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
uint64_t CallInst * C
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:112
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &ShiftAmount, int &MaskWidth)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL...
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOffset() const
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1483
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1571
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
DiagnosticInfoOptimizationBase::Argument NV
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:390
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:265
iterator begin() const
Definition: ArrayRef.h:136
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:622
unsigned countLeadingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the most significant bit to the first zero bit.
Definition: MathExtras.h:498
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isWorthFoldingADDlow(SDValue N)
If there&#39;s a use of this ADDlow that&#39;s not itself a load/store then we&#39;ll need to create a real ADD i...
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
const SDValue & getChain() const
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:930
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:385
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:39
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1517
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:156
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1084
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1640
A description of a memory reference used in the backend.
static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, SDValue &LaneOp, int &LaneIdx)
static bool isPreferredADD(int64_t ImmOff)
Shift and rotation operations.
Definition: ISDOpcodes.h:442
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
unsigned SubReg
const MDNode * getMD() const
unsigned getScalarValueSizeInBits() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:977
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
int64_t getSExtValue() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:447
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
op_iterator op_begin() const
unsigned getAlignment() const
Definition: Globals.cpp:97
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:596
bool isStrongerThanMonotonic(AtomicOrdering ao)
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:150
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:165
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:84
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1666
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:988
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32...
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:828
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Extended Value Type.
Definition: ValueTypes.h:33
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:52
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
void dump() const
Dump this node, for debugging.
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:710
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:755
An SDNode that represents everything that will be needed to construct a MachineInstr.
const Constant * getConstVal() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
unsigned logBase2() const
Definition: APInt.h:1756
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:585
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< use_iterator > uses()
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:463
const SysReg * lookupSysRegByName(StringRef)
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:500
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:503
uint32_t parseGenericRegister(StringRef Name)
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOpt::Level OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG...
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:606
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:176
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:459
static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx)
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:419
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:181
static int getIntOperandFromRegisterString(StringRef RegString)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:518
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:643
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted, suitable for use in a BFI instruction.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
Type * getValueType() const
Definition: GlobalValue.h:279
uint32_t Size
Definition: Profile.cpp:46
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:394
unsigned getOpcode() const
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1247
static bool isShiftedMask(uint64_t Mask, EVT VT)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
unsigned countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:514
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:273
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:453
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A single uniqued string.
Definition: Metadata.h:603
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1604
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Conversion operators.
Definition: ISDOpcodes.h:497
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:821
uint64_t getZExtValue() const
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:506
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:817
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
virtual const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const
Returns a TargetRegisterClass used for pointer values.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:951
This class is used to represent ISD::LOAD nodes.