LLVM  7.0.0svn
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the AArch64 target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64TargetMachine.h"
16 #include "llvm/ADT/APSInt.h"
18 #include "llvm/IR/Function.h" // To access function attributes.
19 #include "llvm/IR/GlobalValue.h"
20 #include "llvm/IR/Intrinsics.h"
21 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/KnownBits.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "aarch64-isel"
30 
31 //===--------------------------------------------------------------------===//
32 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
33 /// instructions for SelectionDAG operations.
34 ///
35 namespace {
36 
37 class AArch64DAGToDAGISel : public SelectionDAGISel {
38 
39  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
40  /// make the right decision when generating code for different targets.
41  const AArch64Subtarget *Subtarget;
42 
43  bool ForCodeSize;
44 
45 public:
46  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
47  CodeGenOpt::Level OptLevel)
48  : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
49  ForCodeSize(false) {}
50 
51  StringRef getPassName() const override {
52  return "AArch64 Instruction Selection";
53  }
54 
55  bool runOnMachineFunction(MachineFunction &MF) override {
56  ForCodeSize = MF.getFunction().optForSize();
57  Subtarget = &MF.getSubtarget<AArch64Subtarget>();
59  }
60 
61  void Select(SDNode *Node) override;
62 
63  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
64  /// inline asm expressions.
65  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
66  unsigned ConstraintID,
67  std::vector<SDValue> &OutOps) override;
68 
69  bool tryMLAV64LaneV128(SDNode *N);
70  bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
71  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
72  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
74  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
75  return SelectShiftedRegister(N, false, Reg, Shift);
76  }
77  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
78  return SelectShiftedRegister(N, true, Reg, Shift);
79  }
80  bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
81  return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
82  }
83  bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
84  return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
85  }
86  bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
87  return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
88  }
89  bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
90  return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
91  }
92  bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
93  return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
94  }
95  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
96  return SelectAddrModeIndexed(N, 1, Base, OffImm);
97  }
98  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
99  return SelectAddrModeIndexed(N, 2, Base, OffImm);
100  }
101  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
102  return SelectAddrModeIndexed(N, 4, Base, OffImm);
103  }
104  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
105  return SelectAddrModeIndexed(N, 8, Base, OffImm);
106  }
107  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
108  return SelectAddrModeIndexed(N, 16, Base, OffImm);
109  }
110  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
111  return SelectAddrModeUnscaled(N, 1, Base, OffImm);
112  }
113  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
114  return SelectAddrModeUnscaled(N, 2, Base, OffImm);
115  }
116  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
117  return SelectAddrModeUnscaled(N, 4, Base, OffImm);
118  }
119  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
120  return SelectAddrModeUnscaled(N, 8, Base, OffImm);
121  }
122  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
123  return SelectAddrModeUnscaled(N, 16, Base, OffImm);
124  }
125 
126  template<int Width>
127  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
128  SDValue &SignExtend, SDValue &DoShift) {
129  return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
130  }
131 
132  template<int Width>
133  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
134  SDValue &SignExtend, SDValue &DoShift) {
135  return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
136  }
137 
138 
139  /// Form sequences of consecutive 64/128-bit registers for use in NEON
140  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
141  /// between 1 and 4 elements. If it contains a single element that is returned
142  /// unchanged; otherwise a REG_SEQUENCE value is returned.
143  SDValue createDTuple(ArrayRef<SDValue> Vecs);
144  SDValue createQTuple(ArrayRef<SDValue> Vecs);
145 
146  /// Generic helper for the createDTuple/createQTuple
147  /// functions. Those should almost always be called instead.
148  SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
149  const unsigned SubRegs[]);
150 
151  void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
152 
153  bool tryIndexedLoad(SDNode *N);
154 
155  void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
156  unsigned SubRegIdx);
157  void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
158  unsigned SubRegIdx);
159  void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
160  void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
161 
162  void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
163  void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
164  void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
165  void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
166 
167  bool tryBitfieldExtractOp(SDNode *N);
168  bool tryBitfieldExtractOpFromSExt(SDNode *N);
169  bool tryBitfieldInsertOp(SDNode *N);
170  bool tryBitfieldInsertInZeroOp(SDNode *N);
171  bool tryShiftAmountMod(SDNode *N);
172 
173  bool tryReadRegister(SDNode *N);
174  bool tryWriteRegister(SDNode *N);
175 
176 // Include the pieces autogenerated from the target description.
177 #include "AArch64GenDAGISel.inc"
178 
179 private:
180  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
181  SDValue &Shift);
182  bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
183  SDValue &OffImm);
184  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
185  SDValue &OffImm);
186  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
187  SDValue &OffImm);
188  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
189  SDValue &Offset, SDValue &SignExtend,
190  SDValue &DoShift);
191  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
192  SDValue &Offset, SDValue &SignExtend,
193  SDValue &DoShift);
194  bool isWorthFolding(SDValue V) const;
195  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
196  SDValue &Offset, SDValue &SignExtend);
197 
198  template<unsigned RegWidth>
199  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
200  return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
201  }
202 
203  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
204 
205  bool SelectCMP_SWAP(SDNode *N);
206 
207 };
208 } // end anonymous namespace
209 
210 /// isIntImmediate - This method tests to see if the node is a constant
211 /// operand. If so Imm will receive the 32-bit value.
212 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
213  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
214  Imm = C->getZExtValue();
215  return true;
216  }
217  return false;
218 }
219 
220 // isIntImmediate - This method tests to see if a constant operand.
221 // If so Imm will receive the value.
222 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
223  return isIntImmediate(N.getNode(), Imm);
224 }
225 
226 // isOpcWithIntImmediate - This method tests to see if the node is a specific
227 // opcode and that it has a immediate integer right operand.
228 // If so Imm will receive the 32 bit value.
229 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
230  uint64_t &Imm) {
231  return N->getOpcode() == Opc &&
232  isIntImmediate(N->getOperand(1).getNode(), Imm);
233 }
234 
235 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
236  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
237  switch(ConstraintID) {
238  default:
239  llvm_unreachable("Unexpected asm memory constraint");
243  // We need to make sure that this one operand does not end up in XZR, thus
244  // require the address to be in a PointerRegClass register.
245  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
246  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
247  SDLoc dl(Op);
248  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
249  SDValue NewOp =
250  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
251  dl, Op.getValueType(),
252  Op, RC), 0);
253  OutOps.push_back(NewOp);
254  return false;
255  }
256  return true;
257 }
258 
259 /// SelectArithImmed - Select an immediate value that can be represented as
260 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
261 /// Val set to the 12-bit value and Shift set to the shifter operand.
262 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
263  SDValue &Shift) {
264  // This function is called from the addsub_shifted_imm ComplexPattern,
265  // which lists [imm] as the list of opcode it's interested in, however
266  // we still need to check whether the operand is actually an immediate
267  // here because the ComplexPattern opcode list is only used in
268  // root-level opcode matching.
269  if (!isa<ConstantSDNode>(N.getNode()))
270  return false;
271 
272  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
273  unsigned ShiftAmt;
274 
275  if (Immed >> 12 == 0) {
276  ShiftAmt = 0;
277  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
278  ShiftAmt = 12;
279  Immed = Immed >> 12;
280  } else
281  return false;
282 
283  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
284  SDLoc dl(N);
285  Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
286  Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
287  return true;
288 }
289 
290 /// SelectNegArithImmed - As above, but negates the value before trying to
291 /// select it.
292 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
293  SDValue &Shift) {
294  // This function is called from the addsub_shifted_imm ComplexPattern,
295  // which lists [imm] as the list of opcode it's interested in, however
296  // we still need to check whether the operand is actually an immediate
297  // here because the ComplexPattern opcode list is only used in
298  // root-level opcode matching.
299  if (!isa<ConstantSDNode>(N.getNode()))
300  return false;
301 
302  // The immediate operand must be a 24-bit zero-extended immediate.
303  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
304 
305  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
306  // have the opposite effect on the C flag, so this pattern mustn't match under
307  // those circumstances.
308  if (Immed == 0)
309  return false;
310 
311  if (N.getValueType() == MVT::i32)
312  Immed = ~((uint32_t)Immed) + 1;
313  else
314  Immed = ~Immed + 1ULL;
315  if (Immed & 0xFFFFFFFFFF000000ULL)
316  return false;
317 
318  Immed &= 0xFFFFFFULL;
319  return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
320  Shift);
321 }
322 
323 /// getShiftTypeForNode - Translate a shift node to the corresponding
324 /// ShiftType value.
326  switch (N.getOpcode()) {
327  default:
329  case ISD::SHL:
330  return AArch64_AM::LSL;
331  case ISD::SRL:
332  return AArch64_AM::LSR;
333  case ISD::SRA:
334  return AArch64_AM::ASR;
335  case ISD::ROTR:
336  return AArch64_AM::ROR;
337  }
338 }
339 
340 /// Determine whether it is worth it to fold SHL into the addressing
341 /// mode.
342 static bool isWorthFoldingSHL(SDValue V) {
343  assert(V.getOpcode() == ISD::SHL && "invalid opcode");
344  // It is worth folding logical shift of up to three places.
345  auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
346  if (!CSD)
347  return false;
348  unsigned ShiftVal = CSD->getZExtValue();
349  if (ShiftVal > 3)
350  return false;
351 
352  // Check if this particular node is reused in any non-memory related
353  // operation. If yes, do not try to fold this node into the address
354  // computation, since the computation will be kept.
355  const SDNode *Node = V.getNode();
356  for (SDNode *UI : Node->uses())
357  if (!isa<MemSDNode>(*UI))
358  for (SDNode *UII : UI->uses())
359  if (!isa<MemSDNode>(*UII))
360  return false;
361  return true;
362 }
363 
364 /// Determine whether it is worth to fold V into an extended register.
365 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
366  // Trivial if we are optimizing for code size or if there is only
367  // one use of the value.
368  if (ForCodeSize || V.hasOneUse())
369  return true;
370  // If a subtarget has a fastpath LSL we can fold a logical shift into
371  // the addressing mode and save a cycle.
372  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
374  return true;
375  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
376  const SDValue LHS = V.getOperand(0);
377  const SDValue RHS = V.getOperand(1);
378  if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
379  return true;
380  if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
381  return true;
382  }
383 
384  // It hurts otherwise, since the value will be reused.
385  return false;
386 }
387 
388 /// SelectShiftedRegister - Select a "shifted register" operand. If the value
389 /// is not shifted, set the Shift operand to default of "LSL 0". The logical
390 /// instructions allow the shifted register to be rotated, but the arithmetic
391 /// instructions do not. The AllowROR parameter specifies whether ROR is
392 /// supported.
393 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
394  SDValue &Reg, SDValue &Shift) {
396  if (ShType == AArch64_AM::InvalidShiftExtend)
397  return false;
398  if (!AllowROR && ShType == AArch64_AM::ROR)
399  return false;
400 
401  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
402  unsigned BitSize = N.getValueSizeInBits();
403  unsigned Val = RHS->getZExtValue() & (BitSize - 1);
404  unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
405 
406  Reg = N.getOperand(0);
407  Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
408  return isWorthFolding(N);
409  }
410 
411  return false;
412 }
413 
414 /// getExtendTypeForNode - Translate an extend node to the corresponding
415 /// ExtendType value.
417 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
418  if (N.getOpcode() == ISD::SIGN_EXTEND ||
420  EVT SrcVT;
422  SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
423  else
424  SrcVT = N.getOperand(0).getValueType();
425 
426  if (!IsLoadStore && SrcVT == MVT::i8)
427  return AArch64_AM::SXTB;
428  else if (!IsLoadStore && SrcVT == MVT::i16)
429  return AArch64_AM::SXTH;
430  else if (SrcVT == MVT::i32)
431  return AArch64_AM::SXTW;
432  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
433 
435  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
436  N.getOpcode() == ISD::ANY_EXTEND) {
437  EVT SrcVT = N.getOperand(0).getValueType();
438  if (!IsLoadStore && SrcVT == MVT::i8)
439  return AArch64_AM::UXTB;
440  else if (!IsLoadStore && SrcVT == MVT::i16)
441  return AArch64_AM::UXTH;
442  else if (SrcVT == MVT::i32)
443  return AArch64_AM::UXTW;
444  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
445 
447  } else if (N.getOpcode() == ISD::AND) {
449  if (!CSD)
451  uint64_t AndMask = CSD->getZExtValue();
452 
453  switch (AndMask) {
454  default:
456  case 0xFF:
457  return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
458  case 0xFFFF:
459  return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
460  case 0xFFFFFFFF:
461  return AArch64_AM::UXTW;
462  }
463  }
464 
466 }
467 
468 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
469 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
470  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
472  return false;
473 
474  SDValue SV = DL->getOperand(0);
475  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
476  return false;
477 
478  SDValue EV = SV.getOperand(1);
479  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
480  return false;
481 
482  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
483  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
484  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
485  LaneOp = EV.getOperand(0);
486 
487  return true;
488 }
489 
490 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
491 // high lane extract.
492 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
493  SDValue &LaneOp, int &LaneIdx) {
494 
495  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
496  std::swap(Op0, Op1);
497  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
498  return false;
499  }
500  StdOp = Op1;
501  return true;
502 }
503 
504 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
505 /// is a lane in the upper half of a 128-bit vector. Recognize and select this
506 /// so that we don't emit unnecessary lane extracts.
507 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
508  SDLoc dl(N);
509  SDValue Op0 = N->getOperand(0);
510  SDValue Op1 = N->getOperand(1);
511  SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
512  SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
513  int LaneIdx = -1; // Will hold the lane index.
514 
515  if (Op1.getOpcode() != ISD::MUL ||
516  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
517  LaneIdx)) {
518  std::swap(Op0, Op1);
519  if (Op1.getOpcode() != ISD::MUL ||
520  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
521  LaneIdx))
522  return false;
523  }
524 
525  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
526 
527  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
528 
529  unsigned MLAOpc = ~0U;
530 
531  switch (N->getSimpleValueType(0).SimpleTy) {
532  default:
533  llvm_unreachable("Unrecognized MLA.");
534  case MVT::v4i16:
535  MLAOpc = AArch64::MLAv4i16_indexed;
536  break;
537  case MVT::v8i16:
538  MLAOpc = AArch64::MLAv8i16_indexed;
539  break;
540  case MVT::v2i32:
541  MLAOpc = AArch64::MLAv2i32_indexed;
542  break;
543  case MVT::v4i32:
544  MLAOpc = AArch64::MLAv4i32_indexed;
545  break;
546  }
547 
548  ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
549  return true;
550 }
551 
552 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
553  SDLoc dl(N);
554  SDValue SMULLOp0;
555  SDValue SMULLOp1;
556  int LaneIdx;
557 
558  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
559  LaneIdx))
560  return false;
561 
562  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
563 
564  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
565 
566  unsigned SMULLOpc = ~0U;
567 
568  if (IntNo == Intrinsic::aarch64_neon_smull) {
569  switch (N->getSimpleValueType(0).SimpleTy) {
570  default:
571  llvm_unreachable("Unrecognized SMULL.");
572  case MVT::v4i32:
573  SMULLOpc = AArch64::SMULLv4i16_indexed;
574  break;
575  case MVT::v2i64:
576  SMULLOpc = AArch64::SMULLv2i32_indexed;
577  break;
578  }
579  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
580  switch (N->getSimpleValueType(0).SimpleTy) {
581  default:
582  llvm_unreachable("Unrecognized SMULL.");
583  case MVT::v4i32:
584  SMULLOpc = AArch64::UMULLv4i16_indexed;
585  break;
586  case MVT::v2i64:
587  SMULLOpc = AArch64::UMULLv2i32_indexed;
588  break;
589  }
590  } else
591  llvm_unreachable("Unrecognized intrinsic.");
592 
593  ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
594  return true;
595 }
596 
597 /// Instructions that accept extend modifiers like UXTW expect the register
598 /// being extended to be a GPR32, but the incoming DAG might be acting on a
599 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
600 /// this is the case.
602  if (N.getValueType() == MVT::i32)
603  return N;
604 
605  SDLoc dl(N);
606  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
607  MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
608  dl, MVT::i32, N, SubReg);
609  return SDValue(Node, 0);
610 }
611 
612 
613 /// SelectArithExtendedRegister - Select a "extended register" operand. This
614 /// operand folds in an extend followed by an optional left shift.
615 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
616  SDValue &Shift) {
617  unsigned ShiftVal = 0;
619 
620  if (N.getOpcode() == ISD::SHL) {
622  if (!CSD)
623  return false;
624  ShiftVal = CSD->getZExtValue();
625  if (ShiftVal > 4)
626  return false;
627 
628  Ext = getExtendTypeForNode(N.getOperand(0));
630  return false;
631 
632  Reg = N.getOperand(0).getOperand(0);
633  } else {
634  Ext = getExtendTypeForNode(N);
636  return false;
637 
638  Reg = N.getOperand(0);
639 
640  // Don't match if free 32-bit -> 64-bit zext can be used instead.
641  if (Ext == AArch64_AM::UXTW &&
642  Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
643  return false;
644  }
645 
646  // AArch64 mandates that the RHS of the operation must use the smallest
647  // register class that could contain the size being extended from. Thus,
648  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
649  // there might not be an actual 32-bit value in the program. We can
650  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
651  assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
652  Reg = narrowIfNeeded(CurDAG, Reg);
653  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
654  MVT::i32);
655  return isWorthFolding(N);
656 }
657 
658 /// If there's a use of this ADDlow that's not itself a load/store then we'll
659 /// need to create a real ADD instruction from it anyway and there's no point in
660 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
661 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
662 /// leads to duplicated ADRP instructions.
664  for (auto Use : N->uses()) {
665  if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
666  Use->getOpcode() != ISD::ATOMIC_LOAD &&
667  Use->getOpcode() != ISD::ATOMIC_STORE)
668  return false;
669 
670  // ldar and stlr have much more restrictive addressing modes (just a
671  // register).
672  if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering()))
673  return false;
674  }
675 
676  return true;
677 }
678 
679 /// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
680 /// immediate" address. The "Size" argument is the size in bytes of the memory
681 /// reference, which determines the scale.
682 bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
683  SDValue &Base,
684  SDValue &OffImm) {
685  SDLoc dl(N);
686  const DataLayout &DL = CurDAG->getDataLayout();
687  const TargetLowering *TLI = getTargetLowering();
688  if (N.getOpcode() == ISD::FrameIndex) {
689  int FI = cast<FrameIndexSDNode>(N)->getIndex();
690  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
691  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
692  return true;
693  }
694 
695  // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
696  // selected here doesn't support labels/immediates, only base+offset.
697 
698  if (CurDAG->isBaseWithConstantOffset(N)) {
699  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
700  int64_t RHSC = RHS->getSExtValue();
701  unsigned Scale = Log2_32(Size);
702  if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) &&
703  RHSC < (0x40 << Scale)) {
704  Base = N.getOperand(0);
705  if (Base.getOpcode() == ISD::FrameIndex) {
706  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
707  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
708  }
709  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
710  return true;
711  }
712  }
713  }
714 
715  // Base only. The address will be materialized into a register before
716  // the memory is accessed.
717  // add x0, Xbase, #offset
718  // stp x1, x2, [x0]
719  Base = N;
720  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
721  return true;
722 }
723 
724 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
725 /// immediate" address. The "Size" argument is the size in bytes of the memory
726 /// reference, which determines the scale.
727 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
728  SDValue &Base, SDValue &OffImm) {
729  SDLoc dl(N);
730  const DataLayout &DL = CurDAG->getDataLayout();
731  const TargetLowering *TLI = getTargetLowering();
732  if (N.getOpcode() == ISD::FrameIndex) {
733  int FI = cast<FrameIndexSDNode>(N)->getIndex();
734  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
735  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
736  return true;
737  }
738 
740  GlobalAddressSDNode *GAN =
742  Base = N.getOperand(0);
743  OffImm = N.getOperand(1);
744  if (!GAN)
745  return true;
746 
747  if (GAN->getOffset() % Size == 0) {
748  const GlobalValue *GV = GAN->getGlobal();
749  unsigned Alignment = GV->getAlignment();
750  Type *Ty = GV->getValueType();
751  if (Alignment == 0 && Ty->isSized())
752  Alignment = DL.getABITypeAlignment(Ty);
753 
754  if (Alignment >= Size)
755  return true;
756  }
757  }
758 
759  if (CurDAG->isBaseWithConstantOffset(N)) {
760  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
761  int64_t RHSC = (int64_t)RHS->getZExtValue();
762  unsigned Scale = Log2_32(Size);
763  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
764  Base = N.getOperand(0);
765  if (Base.getOpcode() == ISD::FrameIndex) {
766  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
767  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
768  }
769  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
770  return true;
771  }
772  }
773  }
774 
775  // Before falling back to our general case, check if the unscaled
776  // instructions can handle this. If so, that's preferable.
777  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
778  return false;
779 
780  // Base only. The address will be materialized into a register before
781  // the memory is accessed.
782  // add x0, Xbase, #offset
783  // ldr x0, [x0]
784  Base = N;
785  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
786  return true;
787 }
788 
789 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
790 /// immediate" address. This should only match when there is an offset that
791 /// is not valid for a scaled immediate addressing mode. The "Size" argument
792 /// is the size in bytes of the memory reference, which is needed here to know
793 /// what is valid for a scaled immediate.
794 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
795  SDValue &Base,
796  SDValue &OffImm) {
797  if (!CurDAG->isBaseWithConstantOffset(N))
798  return false;
799  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
800  int64_t RHSC = RHS->getSExtValue();
801  // If the offset is valid as a scaled immediate, don't match here.
802  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
803  RHSC < (0x1000 << Log2_32(Size)))
804  return false;
805  if (RHSC >= -256 && RHSC < 256) {
806  Base = N.getOperand(0);
807  if (Base.getOpcode() == ISD::FrameIndex) {
808  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
809  const TargetLowering *TLI = getTargetLowering();
810  Base = CurDAG->getTargetFrameIndex(
811  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
812  }
813  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
814  return true;
815  }
816  }
817  return false;
818 }
819 
820 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
821  SDLoc dl(N);
822  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
823  SDValue ImpDef = SDValue(
824  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
825  MachineSDNode *Node = CurDAG->getMachineNode(
826  TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
827  return SDValue(Node, 0);
828 }
829 
830 /// Check if the given SHL node (\p N), can be used to form an
831 /// extended register for an addressing mode.
832 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
833  bool WantExtend, SDValue &Offset,
834  SDValue &SignExtend) {
835  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
837  if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
838  return false;
839 
840  SDLoc dl(N);
841  if (WantExtend) {
843  getExtendTypeForNode(N.getOperand(0), true);
845  return false;
846 
847  Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
848  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
849  MVT::i32);
850  } else {
851  Offset = N.getOperand(0);
852  SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
853  }
854 
855  unsigned LegalShiftVal = Log2_32(Size);
856  unsigned ShiftVal = CSD->getZExtValue();
857 
858  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
859  return false;
860 
861  return isWorthFolding(N);
862 }
863 
864 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
865  SDValue &Base, SDValue &Offset,
866  SDValue &SignExtend,
867  SDValue &DoShift) {
868  if (N.getOpcode() != ISD::ADD)
869  return false;
870  SDValue LHS = N.getOperand(0);
871  SDValue RHS = N.getOperand(1);
872  SDLoc dl(N);
873 
874  // We don't want to match immediate adds here, because they are better lowered
875  // to the register-immediate addressing modes.
876  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
877  return false;
878 
879  // Check if this particular node is reused in any non-memory related
880  // operation. If yes, do not try to fold this node into the address
881  // computation, since the computation will be kept.
882  const SDNode *Node = N.getNode();
883  for (SDNode *UI : Node->uses()) {
884  if (!isa<MemSDNode>(*UI))
885  return false;
886  }
887 
888  // Remember if it is worth folding N when it produces extended register.
889  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
890 
891  // Try to match a shifted extend on the RHS.
892  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
893  SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
894  Base = LHS;
895  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
896  return true;
897  }
898 
899  // Try to match a shifted extend on the LHS.
900  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
901  SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
902  Base = RHS;
903  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
904  return true;
905  }
906 
907  // There was no shift, whatever else we find.
908  DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
909 
911  // Try to match an unshifted extend on the LHS.
912  if (IsExtendedRegisterWorthFolding &&
913  (Ext = getExtendTypeForNode(LHS, true)) !=
915  Base = RHS;
916  Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
917  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
918  MVT::i32);
919  if (isWorthFolding(LHS))
920  return true;
921  }
922 
923  // Try to match an unshifted extend on the RHS.
924  if (IsExtendedRegisterWorthFolding &&
925  (Ext = getExtendTypeForNode(RHS, true)) !=
927  Base = LHS;
928  Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
929  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
930  MVT::i32);
931  if (isWorthFolding(RHS))
932  return true;
933  }
934 
935  return false;
936 }
937 
938 // Check if the given immediate is preferred by ADD. If an immediate can be
939 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
940 // encoded by one MOVZ, return true.
941 static bool isPreferredADD(int64_t ImmOff) {
942  // Constant in [0x0, 0xfff] can be encoded in ADD.
943  if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
944  return true;
945  // Check if it can be encoded in an "ADD LSL #12".
946  if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
947  // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
948  return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
949  (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
950  return false;
951 }
952 
953 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
954  SDValue &Base, SDValue &Offset,
955  SDValue &SignExtend,
956  SDValue &DoShift) {
957  if (N.getOpcode() != ISD::ADD)
958  return false;
959  SDValue LHS = N.getOperand(0);
960  SDValue RHS = N.getOperand(1);
961  SDLoc DL(N);
962 
963  // Check if this particular node is reused in any non-memory related
964  // operation. If yes, do not try to fold this node into the address
965  // computation, since the computation will be kept.
966  const SDNode *Node = N.getNode();
967  for (SDNode *UI : Node->uses()) {
968  if (!isa<MemSDNode>(*UI))
969  return false;
970  }
971 
972  // Watch out if RHS is a wide immediate, it can not be selected into
973  // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
974  // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
975  // instructions like:
976  // MOV X0, WideImmediate
977  // ADD X1, BaseReg, X0
978  // LDR X2, [X1, 0]
979  // For such situation, using [BaseReg, XReg] addressing mode can save one
980  // ADD/SUB:
981  // MOV X0, WideImmediate
982  // LDR X2, [BaseReg, X0]
983  if (isa<ConstantSDNode>(RHS)) {
984  int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
985  unsigned Scale = Log2_32(Size);
986  // Skip the immediate can be selected by load/store addressing mode.
987  // Also skip the immediate can be encoded by a single ADD (SUB is also
988  // checked by using -ImmOff).
989  if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
990  isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
991  return false;
992 
993  SDValue Ops[] = { RHS };
994  SDNode *MOVI =
995  CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
996  SDValue MOVIV = SDValue(MOVI, 0);
997  // This ADD of two X register will be selected into [Reg+Reg] mode.
998  N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
999  }
1000 
1001  // Remember if it is worth folding N when it produces extended register.
1002  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1003 
1004  // Try to match a shifted extend on the RHS.
1005  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1006  SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1007  Base = LHS;
1008  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1009  return true;
1010  }
1011 
1012  // Try to match a shifted extend on the LHS.
1013  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1014  SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1015  Base = RHS;
1016  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1017  return true;
1018  }
1019 
1020  // Match any non-shifted, non-extend, non-immediate add expression.
1021  Base = LHS;
1022  Offset = RHS;
1023  SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1024  DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1025  // Reg1 + Reg2 is free: no check needed.
1026  return true;
1027 }
1028 
1029 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1030  static const unsigned RegClassIDs[] = {
1031  AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1032  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1033  AArch64::dsub2, AArch64::dsub3};
1034 
1035  return createTuple(Regs, RegClassIDs, SubRegs);
1036 }
1037 
1038 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1039  static const unsigned RegClassIDs[] = {
1040  AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1041  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1042  AArch64::qsub2, AArch64::qsub3};
1043 
1044  return createTuple(Regs, RegClassIDs, SubRegs);
1045 }
1046 
1047 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1048  const unsigned RegClassIDs[],
1049  const unsigned SubRegs[]) {
1050  // There's no special register-class for a vector-list of 1 element: it's just
1051  // a vector.
1052  if (Regs.size() == 1)
1053  return Regs[0];
1054 
1055  assert(Regs.size() >= 2 && Regs.size() <= 4);
1056 
1057  SDLoc DL(Regs[0]);
1058 
1060 
1061  // First operand of REG_SEQUENCE is the desired RegClass.
1062  Ops.push_back(
1063  CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1064 
1065  // Then we get pairs of source & subregister-position for the components.
1066  for (unsigned i = 0; i < Regs.size(); ++i) {
1067  Ops.push_back(Regs[i]);
1068  Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1069  }
1070 
1071  SDNode *N =
1072  CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1073  return SDValue(N, 0);
1074 }
1075 
1076 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1077  bool isExt) {
1078  SDLoc dl(N);
1079  EVT VT = N->getValueType(0);
1080 
1081  unsigned ExtOff = isExt;
1082 
1083  // Form a REG_SEQUENCE to force register allocation.
1084  unsigned Vec0Off = ExtOff + 1;
1085  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1086  N->op_begin() + Vec0Off + NumVecs);
1087  SDValue RegSeq = createQTuple(Regs);
1088 
1090  if (isExt)
1091  Ops.push_back(N->getOperand(1));
1092  Ops.push_back(RegSeq);
1093  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1094  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1095 }
1096 
1097 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1098  LoadSDNode *LD = cast<LoadSDNode>(N);
1099  if (LD->isUnindexed())
1100  return false;
1101  EVT VT = LD->getMemoryVT();
1102  EVT DstVT = N->getValueType(0);
1104  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1105 
1106  // We're not doing validity checking here. That was done when checking
1107  // if we should mark the load as indexed or not. We're just selecting
1108  // the right instruction.
1109  unsigned Opcode = 0;
1110 
1111  ISD::LoadExtType ExtType = LD->getExtensionType();
1112  bool InsertTo64 = false;
1113  if (VT == MVT::i64)
1114  Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1115  else if (VT == MVT::i32) {
1116  if (ExtType == ISD::NON_EXTLOAD)
1117  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1118  else if (ExtType == ISD::SEXTLOAD)
1119  Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1120  else {
1121  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1122  InsertTo64 = true;
1123  // The result of the load is only i32. It's the subreg_to_reg that makes
1124  // it into an i64.
1125  DstVT = MVT::i32;
1126  }
1127  } else if (VT == MVT::i16) {
1128  if (ExtType == ISD::SEXTLOAD) {
1129  if (DstVT == MVT::i64)
1130  Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1131  else
1132  Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1133  } else {
1134  Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1135  InsertTo64 = DstVT == MVT::i64;
1136  // The result of the load is only i32. It's the subreg_to_reg that makes
1137  // it into an i64.
1138  DstVT = MVT::i32;
1139  }
1140  } else if (VT == MVT::i8) {
1141  if (ExtType == ISD::SEXTLOAD) {
1142  if (DstVT == MVT::i64)
1143  Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1144  else
1145  Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1146  } else {
1147  Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1148  InsertTo64 = DstVT == MVT::i64;
1149  // The result of the load is only i32. It's the subreg_to_reg that makes
1150  // it into an i64.
1151  DstVT = MVT::i32;
1152  }
1153  } else if (VT == MVT::f16) {
1154  Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1155  } else if (VT == MVT::f32) {
1156  Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1157  } else if (VT == MVT::f64 || VT.is64BitVector()) {
1158  Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1159  } else if (VT.is128BitVector()) {
1160  Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1161  } else
1162  return false;
1163  SDValue Chain = LD->getChain();
1164  SDValue Base = LD->getBasePtr();
1165  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1166  int OffsetVal = (int)OffsetOp->getZExtValue();
1167  SDLoc dl(N);
1168  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1169  SDValue Ops[] = { Base, Offset, Chain };
1170  SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1171  MVT::Other, Ops);
1172  // Either way, we're replacing the node, so tell the caller that.
1173  SDValue LoadedVal = SDValue(Res, 1);
1174  if (InsertTo64) {
1175  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1176  LoadedVal =
1177  SDValue(CurDAG->getMachineNode(
1178  AArch64::SUBREG_TO_REG, dl, MVT::i64,
1179  CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1180  SubReg),
1181  0);
1182  }
1183 
1184  ReplaceUses(SDValue(N, 0), LoadedVal);
1185  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1186  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1187  CurDAG->RemoveDeadNode(N);
1188  return true;
1189 }
1190 
1191 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1192  unsigned SubRegIdx) {
1193  SDLoc dl(N);
1194  EVT VT = N->getValueType(0);
1195  SDValue Chain = N->getOperand(0);
1196 
1197  SDValue Ops[] = {N->getOperand(2), // Mem operand;
1198  Chain};
1199 
1200  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1201 
1202  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1203  SDValue SuperReg = SDValue(Ld, 0);
1204  for (unsigned i = 0; i < NumVecs; ++i)
1205  ReplaceUses(SDValue(N, i),
1206  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1207 
1208  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1209 
1210  // Transfer memoperands.
1211  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1212  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1213  cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
1214 
1215  CurDAG->RemoveDeadNode(N);
1216 }
1217 
1218 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1219  unsigned Opc, unsigned SubRegIdx) {
1220  SDLoc dl(N);
1221  EVT VT = N->getValueType(0);
1222  SDValue Chain = N->getOperand(0);
1223 
1224  SDValue Ops[] = {N->getOperand(1), // Mem operand
1225  N->getOperand(2), // Incremental
1226  Chain};
1227 
1228  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1230 
1231  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1232 
1233  // Update uses of write back register
1234  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1235 
1236  // Update uses of vector list
1237  SDValue SuperReg = SDValue(Ld, 1);
1238  if (NumVecs == 1)
1239  ReplaceUses(SDValue(N, 0), SuperReg);
1240  else
1241  for (unsigned i = 0; i < NumVecs; ++i)
1242  ReplaceUses(SDValue(N, i),
1243  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1244 
1245  // Update the chain
1246  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1247  CurDAG->RemoveDeadNode(N);
1248 }
1249 
1250 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1251  unsigned Opc) {
1252  SDLoc dl(N);
1253  EVT VT = N->getOperand(2)->getValueType(0);
1254 
1255  // Form a REG_SEQUENCE to force register allocation.
1256  bool Is128Bit = VT.getSizeInBits() == 128;
1257  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1258  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1259 
1260  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1261  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1262 
1263  // Transfer memoperands.
1264  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1265  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1266  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1267 
1268  ReplaceNode(N, St);
1269 }
1270 
1271 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1272  unsigned Opc) {
1273  SDLoc dl(N);
1274  EVT VT = N->getOperand(2)->getValueType(0);
1275  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1276  MVT::Other}; // Type for the Chain
1277 
1278  // Form a REG_SEQUENCE to force register allocation.
1279  bool Is128Bit = VT.getSizeInBits() == 128;
1280  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1281  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1282 
1283  SDValue Ops[] = {RegSeq,
1284  N->getOperand(NumVecs + 1), // base register
1285  N->getOperand(NumVecs + 2), // Incremental
1286  N->getOperand(0)}; // Chain
1287  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1288 
1289  ReplaceNode(N, St);
1290 }
1291 
1292 namespace {
1293 /// WidenVector - Given a value in the V64 register class, produce the
1294 /// equivalent value in the V128 register class.
1295 class WidenVector {
1296  SelectionDAG &DAG;
1297 
1298 public:
1299  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1300 
1301  SDValue operator()(SDValue V64Reg) {
1302  EVT VT = V64Reg.getValueType();
1303  unsigned NarrowSize = VT.getVectorNumElements();
1304  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1305  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1306  SDLoc DL(V64Reg);
1307 
1308  SDValue Undef =
1309  SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1310  return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1311  }
1312 };
1313 } // namespace
1314 
1315 /// NarrowVector - Given a value in the V128 register class, produce the
1316 /// equivalent value in the V64 register class.
1317 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1318  EVT VT = V128Reg.getValueType();
1319  unsigned WideSize = VT.getVectorNumElements();
1320  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1321  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1322 
1323  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1324  V128Reg);
1325 }
1326 
1327 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1328  unsigned Opc) {
1329  SDLoc dl(N);
1330  EVT VT = N->getValueType(0);
1331  bool Narrow = VT.getSizeInBits() == 64;
1332 
1333  // Form a REG_SEQUENCE to force register allocation.
1334  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1335 
1336  if (Narrow)
1337  transform(Regs, Regs.begin(),
1338  WidenVector(*CurDAG));
1339 
1340  SDValue RegSeq = createQTuple(Regs);
1341 
1342  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1343 
1344  unsigned LaneNo =
1345  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1346 
1347  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1348  N->getOperand(NumVecs + 3), N->getOperand(0)};
1349  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1350  SDValue SuperReg = SDValue(Ld, 0);
1351 
1352  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1353  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1354  AArch64::qsub2, AArch64::qsub3 };
1355  for (unsigned i = 0; i < NumVecs; ++i) {
1356  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1357  if (Narrow)
1358  NV = NarrowVector(NV, *CurDAG);
1359  ReplaceUses(SDValue(N, i), NV);
1360  }
1361 
1362  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1363  CurDAG->RemoveDeadNode(N);
1364 }
1365 
1366 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1367  unsigned Opc) {
1368  SDLoc dl(N);
1369  EVT VT = N->getValueType(0);
1370  bool Narrow = VT.getSizeInBits() == 64;
1371 
1372  // Form a REG_SEQUENCE to force register allocation.
1373  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1374 
1375  if (Narrow)
1376  transform(Regs, Regs.begin(),
1377  WidenVector(*CurDAG));
1378 
1379  SDValue RegSeq = createQTuple(Regs);
1380 
1381  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1382  RegSeq->getValueType(0), MVT::Other};
1383 
1384  unsigned LaneNo =
1385  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1386 
1387  SDValue Ops[] = {RegSeq,
1388  CurDAG->getTargetConstant(LaneNo, dl,
1389  MVT::i64), // Lane Number
1390  N->getOperand(NumVecs + 2), // Base register
1391  N->getOperand(NumVecs + 3), // Incremental
1392  N->getOperand(0)};
1393  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1394 
1395  // Update uses of the write back register
1396  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1397 
1398  // Update uses of the vector list
1399  SDValue SuperReg = SDValue(Ld, 1);
1400  if (NumVecs == 1) {
1401  ReplaceUses(SDValue(N, 0),
1402  Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1403  } else {
1404  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1405  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1406  AArch64::qsub2, AArch64::qsub3 };
1407  for (unsigned i = 0; i < NumVecs; ++i) {
1408  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1409  SuperReg);
1410  if (Narrow)
1411  NV = NarrowVector(NV, *CurDAG);
1412  ReplaceUses(SDValue(N, i), NV);
1413  }
1414  }
1415 
1416  // Update the Chain
1417  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1418  CurDAG->RemoveDeadNode(N);
1419 }
1420 
1421 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1422  unsigned Opc) {
1423  SDLoc dl(N);
1424  EVT VT = N->getOperand(2)->getValueType(0);
1425  bool Narrow = VT.getSizeInBits() == 64;
1426 
1427  // Form a REG_SEQUENCE to force register allocation.
1428  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1429 
1430  if (Narrow)
1431  transform(Regs, Regs.begin(),
1432  WidenVector(*CurDAG));
1433 
1434  SDValue RegSeq = createQTuple(Regs);
1435 
1436  unsigned LaneNo =
1437  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1438 
1439  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1440  N->getOperand(NumVecs + 3), N->getOperand(0)};
1441  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1442 
1443  // Transfer memoperands.
1444  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1445  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1446  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1447 
1448  ReplaceNode(N, St);
1449 }
1450 
1451 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1452  unsigned Opc) {
1453  SDLoc dl(N);
1454  EVT VT = N->getOperand(2)->getValueType(0);
1455  bool Narrow = VT.getSizeInBits() == 64;
1456 
1457  // Form a REG_SEQUENCE to force register allocation.
1458  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1459 
1460  if (Narrow)
1461  transform(Regs, Regs.begin(),
1462  WidenVector(*CurDAG));
1463 
1464  SDValue RegSeq = createQTuple(Regs);
1465 
1466  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1467  MVT::Other};
1468 
1469  unsigned LaneNo =
1470  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1471 
1472  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1473  N->getOperand(NumVecs + 2), // Base Register
1474  N->getOperand(NumVecs + 3), // Incremental
1475  N->getOperand(0)};
1476  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1477 
1478  // Transfer memoperands.
1479  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1480  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1481  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1482 
1483  ReplaceNode(N, St);
1484 }
1485 
1487  unsigned &Opc, SDValue &Opd0,
1488  unsigned &LSB, unsigned &MSB,
1489  unsigned NumberOfIgnoredLowBits,
1490  bool BiggerPattern) {
1491  assert(N->getOpcode() == ISD::AND &&
1492  "N must be a AND operation to call this function");
1493 
1494  EVT VT = N->getValueType(0);
1495 
1496  // Here we can test the type of VT and return false when the type does not
1497  // match, but since it is done prior to that call in the current context
1498  // we turned that into an assert to avoid redundant code.
1499  assert((VT == MVT::i32 || VT == MVT::i64) &&
1500  "Type checking must have been done before calling this function");
1501 
1502  // FIXME: simplify-demanded-bits in DAGCombine will probably have
1503  // changed the AND node to a 32-bit mask operation. We'll have to
1504  // undo that as part of the transform here if we want to catch all
1505  // the opportunities.
1506  // Currently the NumberOfIgnoredLowBits argument helps to recover
1507  // form these situations when matching bigger pattern (bitfield insert).
1508 
1509  // For unsigned extracts, check for a shift right and mask
1510  uint64_t AndImm = 0;
1511  if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
1512  return false;
1513 
1514  const SDNode *Op0 = N->getOperand(0).getNode();
1515 
1516  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1517  // simplified. Try to undo that
1518  AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
1519 
1520  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1521  if (AndImm & (AndImm + 1))
1522  return false;
1523 
1524  bool ClampMSB = false;
1525  uint64_t SrlImm = 0;
1526  // Handle the SRL + ANY_EXTEND case.
1527  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1528  isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
1529  // Extend the incoming operand of the SRL to 64-bit.
1530  Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1531  // Make sure to clamp the MSB so that we preserve the semantics of the
1532  // original operations.
1533  ClampMSB = true;
1534  } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1536  SrlImm)) {
1537  // If the shift result was truncated, we can still combine them.
1538  Opd0 = Op0->getOperand(0).getOperand(0);
1539 
1540  // Use the type of SRL node.
1541  VT = Opd0->getValueType(0);
1542  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
1543  Opd0 = Op0->getOperand(0);
1544  } else if (BiggerPattern) {
1545  // Let's pretend a 0 shift right has been performed.
1546  // The resulting code will be at least as good as the original one
1547  // plus it may expose more opportunities for bitfield insert pattern.
1548  // FIXME: Currently we limit this to the bigger pattern, because
1549  // some optimizations expect AND and not UBFM.
1550  Opd0 = N->getOperand(0);
1551  } else
1552  return false;
1553 
1554  // Bail out on large immediates. This happens when no proper
1555  // combining/constant folding was performed.
1556  if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
1557  LLVM_DEBUG(
1558  (dbgs() << N
1559  << ": Found large shift immediate, this should not happen\n"));
1560  return false;
1561  }
1562 
1563  LSB = SrlImm;
1564  MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
1565  : countTrailingOnes<uint64_t>(AndImm)) -
1566  1;
1567  if (ClampMSB)
1568  // Since we're moving the extend before the right shift operation, we need
1569  // to clamp the MSB to make sure we don't shift in undefined bits instead of
1570  // the zeros which would get shifted in with the original right shift
1571  // operation.
1572  MSB = MSB > 31 ? 31 : MSB;
1573 
1574  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1575  return true;
1576 }
1577 
1578 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
1579  SDValue &Opd0, unsigned &Immr,
1580  unsigned &Imms) {
1582 
1583  EVT VT = N->getValueType(0);
1584  unsigned BitWidth = VT.getSizeInBits();
1585  assert((VT == MVT::i32 || VT == MVT::i64) &&
1586  "Type checking must have been done before calling this function");
1587 
1588  SDValue Op = N->getOperand(0);
1589  if (Op->getOpcode() == ISD::TRUNCATE) {
1590  Op = Op->getOperand(0);
1591  VT = Op->getValueType(0);
1592  BitWidth = VT.getSizeInBits();
1593  }
1594 
1595  uint64_t ShiftImm;
1596  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
1597  !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1598  return false;
1599 
1600  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1601  if (ShiftImm + Width > BitWidth)
1602  return false;
1603 
1604  Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
1605  Opd0 = Op.getOperand(0);
1606  Immr = ShiftImm;
1607  Imms = ShiftImm + Width - 1;
1608  return true;
1609 }
1610 
1611 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
1612  SDValue &Opd0, unsigned &LSB,
1613  unsigned &MSB) {
1614  // We are looking for the following pattern which basically extracts several
1615  // continuous bits from the source value and places it from the LSB of the
1616  // destination value, all other bits of the destination value or set to zero:
1617  //
1618  // Value2 = AND Value, MaskImm
1619  // SRL Value2, ShiftImm
1620  //
1621  // with MaskImm >> ShiftImm to search for the bit width.
1622  //
1623  // This gets selected into a single UBFM:
1624  //
1625  // UBFM Value, ShiftImm, BitWide + SrlImm -1
1626  //
1627 
1628  if (N->getOpcode() != ISD::SRL)
1629  return false;
1630 
1631  uint64_t AndMask = 0;
1632  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
1633  return false;
1634 
1635  Opd0 = N->getOperand(0).getOperand(0);
1636 
1637  uint64_t SrlImm = 0;
1638  if (!isIntImmediate(N->getOperand(1), SrlImm))
1639  return false;
1640 
1641  // Check whether we really have several bits extract here.
1642  unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
1643  if (BitWide && isMask_64(AndMask >> SrlImm)) {
1644  if (N->getValueType(0) == MVT::i32)
1645  Opc = AArch64::UBFMWri;
1646  else
1647  Opc = AArch64::UBFMXri;
1648 
1649  LSB = SrlImm;
1650  MSB = BitWide + SrlImm - 1;
1651  return true;
1652  }
1653 
1654  return false;
1655 }
1656 
1657 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1658  unsigned &Immr, unsigned &Imms,
1659  bool BiggerPattern) {
1660  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1661  "N must be a SHR/SRA operation to call this function");
1662 
1663  EVT VT = N->getValueType(0);
1664 
1665  // Here we can test the type of VT and return false when the type does not
1666  // match, but since it is done prior to that call in the current context
1667  // we turned that into an assert to avoid redundant code.
1668  assert((VT == MVT::i32 || VT == MVT::i64) &&
1669  "Type checking must have been done before calling this function");
1670 
1671  // Check for AND + SRL doing several bits extract.
1672  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
1673  return true;
1674 
1675  // We're looking for a shift of a shift.
1676  uint64_t ShlImm = 0;
1677  uint64_t TruncBits = 0;
1678  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
1679  Opd0 = N->getOperand(0).getOperand(0);
1680  } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1681  N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1682  // We are looking for a shift of truncate. Truncate from i64 to i32 could
1683  // be considered as setting high 32 bits as zero. Our strategy here is to
1684  // always generate 64bit UBFM. This consistency will help the CSE pass
1685  // later find more redundancy.
1686  Opd0 = N->getOperand(0).getOperand(0);
1687  TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1688  VT = Opd0.getValueType();
1689  assert(VT == MVT::i64 && "the promoted type should be i64");
1690  } else if (BiggerPattern) {
1691  // Let's pretend a 0 shift left has been performed.
1692  // FIXME: Currently we limit this to the bigger pattern case,
1693  // because some optimizations expect AND and not UBFM
1694  Opd0 = N->getOperand(0);
1695  } else
1696  return false;
1697 
1698  // Missing combines/constant folding may have left us with strange
1699  // constants.
1700  if (ShlImm >= VT.getSizeInBits()) {
1701  LLVM_DEBUG(
1702  (dbgs() << N
1703  << ": Found large shift immediate, this should not happen\n"));
1704  return false;
1705  }
1706 
1707  uint64_t SrlImm = 0;
1708  if (!isIntImmediate(N->getOperand(1), SrlImm))
1709  return false;
1710 
1711  assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
1712  "bad amount in shift node!");
1713  int immr = SrlImm - ShlImm;
1714  Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
1715  Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
1716  // SRA requires a signed extraction
1717  if (VT == MVT::i32)
1718  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1719  else
1720  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1721  return true;
1722 }
1723 
1724 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
1726 
1727  EVT VT = N->getValueType(0);
1728  EVT NarrowVT = N->getOperand(0)->getValueType(0);
1729  if (VT != MVT::i64 || NarrowVT != MVT::i32)
1730  return false;
1731 
1732  uint64_t ShiftImm;
1733  SDValue Op = N->getOperand(0);
1734  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1735  return false;
1736 
1737  SDLoc dl(N);
1738  // Extend the incoming operand of the shift to 64-bits.
1739  SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
1740  unsigned Immr = ShiftImm;
1741  unsigned Imms = NarrowVT.getSizeInBits() - 1;
1742  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1743  CurDAG->getTargetConstant(Imms, dl, VT)};
1744  CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
1745  return true;
1746 }
1747 
1748 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
1749  SDValue &Opd0, unsigned &Immr, unsigned &Imms,
1750  unsigned NumberOfIgnoredLowBits = 0,
1751  bool BiggerPattern = false) {
1752  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
1753  return false;
1754 
1755  switch (N->getOpcode()) {
1756  default:
1757  if (!N->isMachineOpcode())
1758  return false;
1759  break;
1760  case ISD::AND:
1761  return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
1762  NumberOfIgnoredLowBits, BiggerPattern);
1763  case ISD::SRL:
1764  case ISD::SRA:
1765  return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
1766 
1768  return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
1769  }
1770 
1771  unsigned NOpc = N->getMachineOpcode();
1772  switch (NOpc) {
1773  default:
1774  return false;
1775  case AArch64::SBFMWri:
1776  case AArch64::UBFMWri:
1777  case AArch64::SBFMXri:
1778  case AArch64::UBFMXri:
1779  Opc = NOpc;
1780  Opd0 = N->getOperand(0);
1781  Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
1782  Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
1783  return true;
1784  }
1785  // Unreachable
1786  return false;
1787 }
1788 
1789 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
1790  unsigned Opc, Immr, Imms;
1791  SDValue Opd0;
1792  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
1793  return false;
1794 
1795  EVT VT = N->getValueType(0);
1796  SDLoc dl(N);
1797 
1798  // If the bit extract operation is 64bit but the original type is 32bit, we
1799  // need to add one EXTRACT_SUBREG.
1800  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
1801  SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
1802  CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
1803 
1804  SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
1805  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1806  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
1807  MVT::i32, SDValue(BFM, 0), SubReg));
1808  return true;
1809  }
1810 
1811  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1812  CurDAG->getTargetConstant(Imms, dl, VT)};
1813  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1814  return true;
1815 }
1816 
1817 /// Does DstMask form a complementary pair with the mask provided by
1818 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
1819 /// this asks whether DstMask zeroes precisely those bits that will be set by
1820 /// the other half.
1821 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
1822  unsigned NumberOfIgnoredHighBits, EVT VT) {
1823  assert((VT == MVT::i32 || VT == MVT::i64) &&
1824  "i32 or i64 mask type expected!");
1825  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
1826 
1827  APInt SignificantDstMask = APInt(BitWidth, DstMask);
1828  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
1829 
1830  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
1831  (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
1832 }
1833 
1834 // Look for bits that will be useful for later uses.
1835 // A bit is consider useless as soon as it is dropped and never used
1836 // before it as been dropped.
1837 // E.g., looking for useful bit of x
1838 // 1. y = x & 0x7
1839 // 2. z = y >> 2
1840 // After #1, x useful bits are 0x7, then the useful bits of x, live through
1841 // y.
1842 // After #2, the useful bits of x are 0x4.
1843 // However, if x is used on an unpredicatable instruction, then all its bits
1844 // are useful.
1845 // E.g.
1846 // 1. y = x & 0x7
1847 // 2. z = y >> 2
1848 // 3. str x, [@x]
1849 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
1850 
1852  unsigned Depth) {
1853  uint64_t Imm =
1854  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1855  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
1856  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
1857  getUsefulBits(Op, UsefulBits, Depth + 1);
1858 }
1859 
1860 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
1861  uint64_t Imm, uint64_t MSB,
1862  unsigned Depth) {
1863  // inherit the bitwidth value
1864  APInt OpUsefulBits(UsefulBits);
1865  OpUsefulBits = 1;
1866 
1867  if (MSB >= Imm) {
1868  OpUsefulBits <<= MSB - Imm + 1;
1869  --OpUsefulBits;
1870  // The interesting part will be in the lower part of the result
1871  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1872  // The interesting part was starting at Imm in the argument
1873  OpUsefulBits <<= Imm;
1874  } else {
1875  OpUsefulBits <<= MSB + 1;
1876  --OpUsefulBits;
1877  // The interesting part will be shifted in the result
1878  OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
1879  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1880  // The interesting part was at zero in the argument
1881  OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
1882  }
1883 
1884  UsefulBits &= OpUsefulBits;
1885 }
1886 
1887 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
1888  unsigned Depth) {
1889  uint64_t Imm =
1890  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1891  uint64_t MSB =
1892  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1893 
1894  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1895 }
1896 
1898  unsigned Depth) {
1899  uint64_t ShiftTypeAndValue =
1900  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1901  APInt Mask(UsefulBits);
1902  Mask.clearAllBits();
1903  Mask.flipAllBits();
1904 
1905  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
1906  // Shift Left
1907  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1908  Mask <<= ShiftAmt;
1909  getUsefulBits(Op, Mask, Depth + 1);
1910  Mask.lshrInPlace(ShiftAmt);
1911  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
1912  // Shift Right
1913  // We do not handle AArch64_AM::ASR, because the sign will change the
1914  // number of useful bits
1915  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1916  Mask.lshrInPlace(ShiftAmt);
1917  getUsefulBits(Op, Mask, Depth + 1);
1918  Mask <<= ShiftAmt;
1919  } else
1920  return;
1921 
1922  UsefulBits &= Mask;
1923 }
1924 
1925 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
1926  unsigned Depth) {
1927  uint64_t Imm =
1928  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1929  uint64_t MSB =
1930  cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
1931 
1932  APInt OpUsefulBits(UsefulBits);
1933  OpUsefulBits = 1;
1934 
1935  APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
1936  ResultUsefulBits.flipAllBits();
1937  APInt Mask(UsefulBits.getBitWidth(), 0);
1938 
1939  getUsefulBits(Op, ResultUsefulBits, Depth + 1);
1940 
1941  if (MSB >= Imm) {
1942  // The instruction is a BFXIL.
1943  uint64_t Width = MSB - Imm + 1;
1944  uint64_t LSB = Imm;
1945 
1946  OpUsefulBits <<= Width;
1947  --OpUsefulBits;
1948 
1949  if (Op.getOperand(1) == Orig) {
1950  // Copy the low bits from the result to bits starting from LSB.
1951  Mask = ResultUsefulBits & OpUsefulBits;
1952  Mask <<= LSB;
1953  }
1954 
1955  if (Op.getOperand(0) == Orig)
1956  // Bits starting from LSB in the input contribute to the result.
1957  Mask |= (ResultUsefulBits & ~OpUsefulBits);
1958  } else {
1959  // The instruction is a BFI.
1960  uint64_t Width = MSB + 1;
1961  uint64_t LSB = UsefulBits.getBitWidth() - Imm;
1962 
1963  OpUsefulBits <<= Width;
1964  --OpUsefulBits;
1965  OpUsefulBits <<= LSB;
1966 
1967  if (Op.getOperand(1) == Orig) {
1968  // Copy the bits from the result to the zero bits.
1969  Mask = ResultUsefulBits & OpUsefulBits;
1970  Mask.lshrInPlace(LSB);
1971  }
1972 
1973  if (Op.getOperand(0) == Orig)
1974  Mask |= (ResultUsefulBits & ~OpUsefulBits);
1975  }
1976 
1977  UsefulBits &= Mask;
1978 }
1979 
1980 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
1981  SDValue Orig, unsigned Depth) {
1982 
1983  // Users of this node should have already been instruction selected
1984  // FIXME: Can we turn that into an assert?
1985  if (!UserNode->isMachineOpcode())
1986  return;
1987 
1988  switch (UserNode->getMachineOpcode()) {
1989  default:
1990  return;
1991  case AArch64::ANDSWri:
1992  case AArch64::ANDSXri:
1993  case AArch64::ANDWri:
1994  case AArch64::ANDXri:
1995  // We increment Depth only when we call the getUsefulBits
1996  return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
1997  Depth);
1998  case AArch64::UBFMWri:
1999  case AArch64::UBFMXri:
2000  return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2001 
2002  case AArch64::ORRWrs:
2003  case AArch64::ORRXrs:
2004  if (UserNode->getOperand(1) != Orig)
2005  return;
2006  return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2007  Depth);
2008  case AArch64::BFMWri:
2009  case AArch64::BFMXri:
2010  return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2011 
2012  case AArch64::STRBBui:
2013  case AArch64::STURBBi:
2014  if (UserNode->getOperand(0) != Orig)
2015  return;
2016  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2017  return;
2018 
2019  case AArch64::STRHHui:
2020  case AArch64::STURHHi:
2021  if (UserNode->getOperand(0) != Orig)
2022  return;
2023  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2024  return;
2025  }
2026 }
2027 
2028 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2029  if (Depth >= 6)
2030  return;
2031  // Initialize UsefulBits
2032  if (!Depth) {
2033  unsigned Bitwidth = Op.getScalarValueSizeInBits();
2034  // At the beginning, assume every produced bits is useful
2035  UsefulBits = APInt(Bitwidth, 0);
2036  UsefulBits.flipAllBits();
2037  }
2038  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2039 
2040  for (SDNode *Node : Op.getNode()->uses()) {
2041  // A use cannot produce useful bits
2042  APInt UsefulBitsForUse = APInt(UsefulBits);
2043  getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2044  UsersUsefulBits |= UsefulBitsForUse;
2045  }
2046  // UsefulBits contains the produced bits that are meaningful for the
2047  // current definition, thus a user cannot make a bit meaningful at
2048  // this point
2049  UsefulBits &= UsersUsefulBits;
2050 }
2051 
2052 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
2053 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2054 /// 0, return Op unchanged.
2055 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2056  if (ShlAmount == 0)
2057  return Op;
2058 
2059  EVT VT = Op.getValueType();
2060  SDLoc dl(Op);
2061  unsigned BitWidth = VT.getSizeInBits();
2062  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2063 
2064  SDNode *ShiftNode;
2065  if (ShlAmount > 0) {
2066  // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2067  ShiftNode = CurDAG->getMachineNode(
2068  UBFMOpc, dl, VT, Op,
2069  CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2070  CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2071  } else {
2072  // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2073  assert(ShlAmount < 0 && "expected right shift");
2074  int ShrAmount = -ShlAmount;
2075  ShiftNode = CurDAG->getMachineNode(
2076  UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2077  CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2078  }
2079 
2080  return SDValue(ShiftNode, 0);
2081 }
2082 
2083 /// Does this tree qualify as an attempt to move a bitfield into position,
2084 /// essentially "(and (shl VAL, N), Mask)".
2086  bool BiggerPattern,
2087  SDValue &Src, int &ShiftAmount,
2088  int &MaskWidth) {
2089  EVT VT = Op.getValueType();
2090  unsigned BitWidth = VT.getSizeInBits();
2091  (void)BitWidth;
2092  assert(BitWidth == 32 || BitWidth == 64);
2093 
2094  KnownBits Known;
2095  CurDAG->computeKnownBits(Op, Known);
2096 
2097  // Non-zero in the sense that they're not provably zero, which is the key
2098  // point if we want to use this value
2099  uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2100 
2101  // Discard a constant AND mask if present. It's safe because the node will
2102  // already have been factored into the computeKnownBits calculation above.
2103  uint64_t AndImm;
2104  if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
2105  assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
2106  Op = Op.getOperand(0);
2107  }
2108 
2109  // Don't match if the SHL has more than one use, since then we'll end up
2110  // generating SHL+UBFIZ instead of just keeping SHL+AND.
2111  if (!BiggerPattern && !Op.hasOneUse())
2112  return false;
2113 
2114  uint64_t ShlImm;
2115  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2116  return false;
2117  Op = Op.getOperand(0);
2118 
2119  if (!isShiftedMask_64(NonZeroBits))
2120  return false;
2121 
2122  ShiftAmount = countTrailingZeros(NonZeroBits);
2123  MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
2124 
2125  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2126  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2127  // amount. BiggerPattern is true when this pattern is being matched for BFI,
2128  // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2129  // which case it is not profitable to insert an extra shift.
2130  if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
2131  return false;
2132  Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
2133 
2134  return true;
2135 }
2136 
2137 static bool isShiftedMask(uint64_t Mask, EVT VT) {
2138  assert(VT == MVT::i32 || VT == MVT::i64);
2139  if (VT == MVT::i32)
2140  return isShiftedMask_32(Mask);
2141  return isShiftedMask_64(Mask);
2142 }
2143 
2144 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
2145 // inserted only sets known zero bits.
2147  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2148 
2149  EVT VT = N->getValueType(0);
2150  if (VT != MVT::i32 && VT != MVT::i64)
2151  return false;
2152 
2153  unsigned BitWidth = VT.getSizeInBits();
2154 
2155  uint64_t OrImm;
2156  if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
2157  return false;
2158 
2159  // Skip this transformation if the ORR immediate can be encoded in the ORR.
2160  // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2161  // performance neutral.
2162  if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
2163  return false;
2164 
2165  uint64_t MaskImm;
2166  SDValue And = N->getOperand(0);
2167  // Must be a single use AND with an immediate operand.
2168  if (!And.hasOneUse() ||
2169  !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
2170  return false;
2171 
2172  // Compute the Known Zero for the AND as this allows us to catch more general
2173  // cases than just looking for AND with imm.
2174  KnownBits Known;
2175  CurDAG->computeKnownBits(And, Known);
2176 
2177  // Non-zero in the sense that they're not provably zero, which is the key
2178  // point if we want to use this value.
2179  uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
2180 
2181  // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2182  if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
2183  return false;
2184 
2185  // The bits being inserted must only set those bits that are known to be zero.
2186  if ((OrImm & NotKnownZero) != 0) {
2187  // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2188  // currently handle this case.
2189  return false;
2190  }
2191 
2192  // BFI/BFXIL dst, src, #lsb, #width.
2193  int LSB = countTrailingOnes(NotKnownZero);
2194  int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2195 
2196  // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2197  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2198  unsigned ImmS = Width - 1;
2199 
2200  // If we're creating a BFI instruction avoid cases where we need more
2201  // instructions to materialize the BFI constant as compared to the original
2202  // ORR. A BFXIL will use the same constant as the original ORR, so the code
2203  // should be no worse in this case.
2204  bool IsBFI = LSB != 0;
2205  uint64_t BFIImm = OrImm >> LSB;
2206  if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
2207  // We have a BFI instruction and we know the constant can't be materialized
2208  // with a ORR-immediate with the zero register.
2209  unsigned OrChunks = 0, BFIChunks = 0;
2210  for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
2211  if (((OrImm >> Shift) & 0xFFFF) != 0)
2212  ++OrChunks;
2213  if (((BFIImm >> Shift) & 0xFFFF) != 0)
2214  ++BFIChunks;
2215  }
2216  if (BFIChunks > OrChunks)
2217  return false;
2218  }
2219 
2220  // Materialize the constant to be inserted.
2221  SDLoc DL(N);
2222  unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2223  SDNode *MOVI = CurDAG->getMachineNode(
2224  MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2225 
2226  // Create the BFI/BFXIL instruction.
2227  SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2228  CurDAG->getTargetConstant(ImmR, DL, VT),
2229  CurDAG->getTargetConstant(ImmS, DL, VT)};
2230  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2231  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2232  return true;
2233 }
2234 
2235 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
2236  SelectionDAG *CurDAG) {
2237  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2238 
2239  EVT VT = N->getValueType(0);
2240  if (VT != MVT::i32 && VT != MVT::i64)
2241  return false;
2242 
2243  unsigned BitWidth = VT.getSizeInBits();
2244 
2245  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
2246  // have the expected shape. Try to undo that.
2247 
2248  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
2249  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
2250 
2251  // Given a OR operation, check if we have the following pattern
2252  // ubfm c, b, imm, imm2 (or something that does the same jobs, see
2253  // isBitfieldExtractOp)
2254  // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
2255  // countTrailingZeros(mask2) == imm2 - imm + 1
2256  // f = d | c
2257  // if yes, replace the OR instruction with:
2258  // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
2259 
2260  // OR is commutative, check all combinations of operand order and values of
2261  // BiggerPattern, i.e.
2262  // Opd0, Opd1, BiggerPattern=false
2263  // Opd1, Opd0, BiggerPattern=false
2264  // Opd0, Opd1, BiggerPattern=true
2265  // Opd1, Opd0, BiggerPattern=true
2266  // Several of these combinations may match, so check with BiggerPattern=false
2267  // first since that will produce better results by matching more instructions
2268  // and/or inserting fewer extra instructions.
2269  for (int I = 0; I < 4; ++I) {
2270 
2271  SDValue Dst, Src;
2272  unsigned ImmR, ImmS;
2273  bool BiggerPattern = I / 2;
2274  SDValue OrOpd0Val = N->getOperand(I % 2);
2275  SDNode *OrOpd0 = OrOpd0Val.getNode();
2276  SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
2277  SDNode *OrOpd1 = OrOpd1Val.getNode();
2278 
2279  unsigned BFXOpc;
2280  int DstLSB, Width;
2281  if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
2282  NumberOfIgnoredLowBits, BiggerPattern)) {
2283  // Check that the returned opcode is compatible with the pattern,
2284  // i.e., same type and zero extended (U and not S)
2285  if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
2286  (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
2287  continue;
2288 
2289  // Compute the width of the bitfield insertion
2290  DstLSB = 0;
2291  Width = ImmS - ImmR + 1;
2292  // FIXME: This constraint is to catch bitfield insertion we may
2293  // want to widen the pattern if we want to grab general bitfied
2294  // move case
2295  if (Width <= 0)
2296  continue;
2297 
2298  // If the mask on the insertee is correct, we have a BFXIL operation. We
2299  // can share the ImmR and ImmS values from the already-computed UBFM.
2300  } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
2301  BiggerPattern,
2302  Src, DstLSB, Width)) {
2303  ImmR = (BitWidth - DstLSB) % BitWidth;
2304  ImmS = Width - 1;
2305  } else
2306  continue;
2307 
2308  // Check the second part of the pattern
2309  EVT VT = OrOpd1Val.getValueType();
2310  assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
2311 
2312  // Compute the Known Zero for the candidate of the first operand.
2313  // This allows to catch more general case than just looking for
2314  // AND with imm. Indeed, simplify-demanded-bits may have removed
2315  // the AND instruction because it proves it was useless.
2316  KnownBits Known;
2317  CurDAG->computeKnownBits(OrOpd1Val, Known);
2318 
2319  // Check if there is enough room for the second operand to appear
2320  // in the first one
2321  APInt BitsToBeInserted =
2322  APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
2323 
2324  if ((BitsToBeInserted & ~Known.Zero) != 0)
2325  continue;
2326 
2327  // Set the first operand
2328  uint64_t Imm;
2329  if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
2330  isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
2331  // In that case, we can eliminate the AND
2332  Dst = OrOpd1->getOperand(0);
2333  else
2334  // Maybe the AND has been removed by simplify-demanded-bits
2335  // or is useful because it discards more bits
2336  Dst = OrOpd1Val;
2337 
2338  // both parts match
2339  SDLoc DL(N);
2340  SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
2341  CurDAG->getTargetConstant(ImmS, DL, VT)};
2342  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2343  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2344  return true;
2345  }
2346 
2347  // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
2348  // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
2349  // mask (e.g., 0x000ffff0).
2350  uint64_t Mask0Imm, Mask1Imm;
2351  SDValue And0 = N->getOperand(0);
2352  SDValue And1 = N->getOperand(1);
2353  if (And0.hasOneUse() && And1.hasOneUse() &&
2354  isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
2355  isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
2356  APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
2357  (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
2358 
2359  // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
2360  // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
2361  // bits to be inserted.
2362  if (isShiftedMask(Mask0Imm, VT)) {
2363  std::swap(And0, And1);
2364  std::swap(Mask0Imm, Mask1Imm);
2365  }
2366 
2367  SDValue Src = And1->getOperand(0);
2368  SDValue Dst = And0->getOperand(0);
2369  unsigned LSB = countTrailingZeros(Mask1Imm);
2370  int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
2371 
2372  // The BFXIL inserts the low-order bits from a source register, so right
2373  // shift the needed bits into place.
2374  SDLoc DL(N);
2375  unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2376  SDNode *LSR = CurDAG->getMachineNode(
2377  ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
2378  CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
2379 
2380  // BFXIL is an alias of BFM, so translate to BFM operands.
2381  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2382  unsigned ImmS = Width - 1;
2383 
2384  // Create the BFXIL instruction.
2385  SDValue Ops[] = {Dst, SDValue(LSR, 0),
2386  CurDAG->getTargetConstant(ImmR, DL, VT),
2387  CurDAG->getTargetConstant(ImmS, DL, VT)};
2388  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2389  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2390  return true;
2391  }
2392 
2393  return false;
2394 }
2395 
2396 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
2397  if (N->getOpcode() != ISD::OR)
2398  return false;
2399 
2400  APInt NUsefulBits;
2401  getUsefulBits(SDValue(N, 0), NUsefulBits);
2402 
2403  // If all bits are not useful, just return UNDEF.
2404  if (!NUsefulBits) {
2405  CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
2406  return true;
2407  }
2408 
2409  if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
2410  return true;
2411 
2412  return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
2413 }
2414 
2415 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
2416 /// equivalent of a left shift by a constant amount followed by an and masking
2417 /// out a contiguous set of bits.
2418 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
2419  if (N->getOpcode() != ISD::AND)
2420  return false;
2421 
2422  EVT VT = N->getValueType(0);
2423  if (VT != MVT::i32 && VT != MVT::i64)
2424  return false;
2425 
2426  SDValue Op0;
2427  int DstLSB, Width;
2428  if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
2429  Op0, DstLSB, Width))
2430  return false;
2431 
2432  // ImmR is the rotate right amount.
2433  unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
2434  // ImmS is the most significant bit of the source to be moved.
2435  unsigned ImmS = Width - 1;
2436 
2437  SDLoc DL(N);
2438  SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
2439  CurDAG->getTargetConstant(ImmS, DL, VT)};
2440  unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2441  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2442  return true;
2443 }
2444 
2445 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
2446 /// variable shift/rotate instructions.
2447 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
2448  EVT VT = N->getValueType(0);
2449 
2450  unsigned Opc;
2451  switch (N->getOpcode()) {
2452  case ISD::ROTR:
2453  Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
2454  break;
2455  case ISD::SHL:
2456  Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
2457  break;
2458  case ISD::SRL:
2459  Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
2460  break;
2461  case ISD::SRA:
2462  Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
2463  break;
2464  default:
2465  return false;
2466  }
2467 
2468  uint64_t Size;
2469  uint64_t Bits;
2470  if (VT == MVT::i32) {
2471  Bits = 5;
2472  Size = 32;
2473  } else if (VT == MVT::i64) {
2474  Bits = 6;
2475  Size = 64;
2476  } else
2477  return false;
2478 
2479  SDValue ShiftAmt = N->getOperand(1);
2480  SDLoc DL(N);
2481  SDValue NewShiftAmt;
2482 
2483  // Skip over an extend of the shift amount.
2484  if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
2485  ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
2486  ShiftAmt = ShiftAmt->getOperand(0);
2487 
2488  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
2489  SDValue Add0 = ShiftAmt->getOperand(0);
2490  SDValue Add1 = ShiftAmt->getOperand(1);
2491  uint64_t Add0Imm;
2492  uint64_t Add1Imm;
2493  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
2494  // to avoid the ADD/SUB.
2495  if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0))
2496  NewShiftAmt = Add0;
2497  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2498  // generate a NEG instead of a SUB of a constant.
2499  else if (ShiftAmt->getOpcode() == ISD::SUB &&
2500  isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
2501  (Add0Imm % Size == 0)) {
2502  unsigned NegOpc;
2503  unsigned ZeroReg;
2504  EVT SubVT = ShiftAmt->getValueType(0);
2505  if (SubVT == MVT::i32) {
2506  NegOpc = AArch64::SUBWrr;
2507  ZeroReg = AArch64::WZR;
2508  } else {
2509  assert(SubVT == MVT::i64);
2510  NegOpc = AArch64::SUBXrr;
2511  ZeroReg = AArch64::XZR;
2512  }
2513  SDValue Zero =
2514  CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
2515  MachineSDNode *Neg =
2516  CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
2517  NewShiftAmt = SDValue(Neg, 0);
2518  } else
2519  return false;
2520  } else {
2521  // If the shift amount is masked with an AND, check that the mask covers the
2522  // bits that are implicitly ANDed off by the above opcodes and if so, skip
2523  // the AND.
2524  uint64_t MaskImm;
2525  if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm))
2526  return false;
2527 
2528  if (countTrailingOnes(MaskImm) < Bits)
2529  return false;
2530 
2531  NewShiftAmt = ShiftAmt->getOperand(0);
2532  }
2533 
2534  // Narrow/widen the shift amount to match the size of the shift operation.
2535  if (VT == MVT::i32)
2536  NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
2537  else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
2538  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
2539  MachineSDNode *Ext = CurDAG->getMachineNode(
2540  AArch64::SUBREG_TO_REG, DL, VT,
2541  CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
2542  NewShiftAmt = SDValue(Ext, 0);
2543  }
2544 
2545  SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
2546  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2547  return true;
2548 }
2549 
2550 bool
2551 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2552  unsigned RegWidth) {
2553  APFloat FVal(0.0);
2554  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2555  FVal = CN->getValueAPF();
2556  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2557  // Some otherwise illegal constants are allowed in this case.
2558  if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2559  !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2560  return false;
2561 
2562  ConstantPoolSDNode *CN =
2563  dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2564  FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2565  } else
2566  return false;
2567 
2568  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2569  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2570  // x-register.
2571  //
2572  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2573  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2574  // integers.
2575  bool IsExact;
2576 
2577  // fbits is between 1 and 64 in the worst-case, which means the fmul
2578  // could have 2^64 as an actual operand. Need 65 bits of precision.
2579  APSInt IntVal(65, true);
2580  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2581 
2582  // N.b. isPowerOf2 also checks for > 0.
2583  if (!IsExact || !IntVal.isPowerOf2()) return false;
2584  unsigned FBits = IntVal.logBase2();
2585 
2586  // Checks above should have guaranteed that we haven't lost information in
2587  // finding FBits, but it must still be in range.
2588  if (FBits == 0 || FBits > RegWidth) return false;
2589 
2590  FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
2591  return true;
2592 }
2593 
2594 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
2595 // of the string and obtains the integer values from them and combines these
2596 // into a single value to be used in the MRS/MSR instruction.
2599  RegString.split(Fields, ':');
2600 
2601  if (Fields.size() == 1)
2602  return -1;
2603 
2604  assert(Fields.size() == 5
2605  && "Invalid number of fields in read register string");
2606 
2607  SmallVector<int, 5> Ops;
2608  bool AllIntFields = true;
2609 
2610  for (StringRef Field : Fields) {
2611  unsigned IntField;
2612  AllIntFields &= !Field.getAsInteger(10, IntField);
2613  Ops.push_back(IntField);
2614  }
2615 
2616  assert(AllIntFields &&
2617  "Unexpected non-integer value in special register string.");
2618 
2619  // Need to combine the integer fields of the string into a single value
2620  // based on the bit encoding of MRS/MSR instruction.
2621  return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2622  (Ops[3] << 3) | (Ops[4]);
2623 }
2624 
2625 // Lower the read_register intrinsic to an MRS instruction node if the special
2626 // register string argument is either of the form detailed in the ALCE (the
2627 // form described in getIntOperandsFromRegsterString) or is a named register
2628 // known by the MRS SysReg mapper.
2629 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
2630  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2631  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2632  SDLoc DL(N);
2633 
2634  int Reg = getIntOperandFromRegisterString(RegString->getString());
2635  if (Reg != -1) {
2636  ReplaceNode(N, CurDAG->getMachineNode(
2637  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2638  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2639  N->getOperand(0)));
2640  return true;
2641  }
2642 
2643  // Use the sysreg mapper to map the remaining possible strings to the
2644  // value for the register to be used for the instruction operand.
2645  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2646  if (TheReg && TheReg->Readable &&
2647  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2648  Reg = TheReg->Encoding;
2649  else
2650  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2651 
2652  if (Reg != -1) {
2653  ReplaceNode(N, CurDAG->getMachineNode(
2654  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2655  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2656  N->getOperand(0)));
2657  return true;
2658  }
2659 
2660  return false;
2661 }
2662 
2663 // Lower the write_register intrinsic to an MSR instruction node if the special
2664 // register string argument is either of the form detailed in the ALCE (the
2665 // form described in getIntOperandsFromRegsterString) or is a named register
2666 // known by the MSR SysReg mapper.
2667 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
2668  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2669  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2670  SDLoc DL(N);
2671 
2672  int Reg = getIntOperandFromRegisterString(RegString->getString());
2673  if (Reg != -1) {
2674  ReplaceNode(
2675  N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2676  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2677  N->getOperand(2), N->getOperand(0)));
2678  return true;
2679  }
2680 
2681  // Check if the register was one of those allowed as the pstatefield value in
2682  // the MSR (immediate) instruction. To accept the values allowed in the
2683  // pstatefield for the MSR (immediate) instruction, we also require that an
2684  // immediate value has been provided as an argument, we know that this is
2685  // the case as it has been ensured by semantic checking.
2686  auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
2687  if (PMapper) {
2688  assert (isa<ConstantSDNode>(N->getOperand(2))
2689  && "Expected a constant integer expression.");
2690  unsigned Reg = PMapper->Encoding;
2691  uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
2692  unsigned State;
2693  if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO) {
2694  assert(Immed < 2 && "Bad imm");
2695  State = AArch64::MSRpstateImm1;
2696  } else {
2697  assert(Immed < 16 && "Bad imm");
2698  State = AArch64::MSRpstateImm4;
2699  }
2700  ReplaceNode(N, CurDAG->getMachineNode(
2701  State, DL, MVT::Other,
2702  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2703  CurDAG->getTargetConstant(Immed, DL, MVT::i16),
2704  N->getOperand(0)));
2705  return true;
2706  }
2707 
2708  // Use the sysreg mapper to attempt to map the remaining possible strings
2709  // to the value for the register to be used for the MSR (register)
2710  // instruction operand.
2711  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2712  if (TheReg && TheReg->Writeable &&
2713  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2714  Reg = TheReg->Encoding;
2715  else
2716  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2717  if (Reg != -1) {
2718  ReplaceNode(N, CurDAG->getMachineNode(
2719  AArch64::MSR, DL, MVT::Other,
2720  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2721  N->getOperand(2), N->getOperand(0)));
2722  return true;
2723  }
2724 
2725  return false;
2726 }
2727 
2728 /// We've got special pseudo-instructions for these
2729 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2730  unsigned Opcode;
2731  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2732 
2733  // Leave IR for LSE if subtarget supports it.
2734  if (Subtarget->hasLSE()) return false;
2735 
2736  if (MemTy == MVT::i8)
2737  Opcode = AArch64::CMP_SWAP_8;
2738  else if (MemTy == MVT::i16)
2739  Opcode = AArch64::CMP_SWAP_16;
2740  else if (MemTy == MVT::i32)
2741  Opcode = AArch64::CMP_SWAP_32;
2742  else if (MemTy == MVT::i64)
2743  Opcode = AArch64::CMP_SWAP_64;
2744  else
2745  llvm_unreachable("Unknown AtomicCmpSwap type");
2746 
2747  MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
2748  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2749  N->getOperand(0)};
2750  SDNode *CmpSwap = CurDAG->getMachineNode(
2751  Opcode, SDLoc(N),
2752  CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
2753 
2754  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2755  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2756  cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2757 
2758  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2759  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2760  CurDAG->RemoveDeadNode(N);
2761 
2762  return true;
2763 }
2764 
2765 void AArch64DAGToDAGISel::Select(SDNode *Node) {
2766  // If we have a custom node, we already have selected!
2767  if (Node->isMachineOpcode()) {
2768  LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
2769  Node->setNodeId(-1);
2770  return;
2771  }
2772 
2773  // Few custom selection stuff.
2774  EVT VT = Node->getValueType(0);
2775 
2776  switch (Node->getOpcode()) {
2777  default:
2778  break;
2779 
2780  case ISD::ATOMIC_CMP_SWAP:
2781  if (SelectCMP_SWAP(Node))
2782  return;
2783  break;
2784 
2785  case ISD::READ_REGISTER:
2786  if (tryReadRegister(Node))
2787  return;
2788  break;
2789 
2790  case ISD::WRITE_REGISTER:
2791  if (tryWriteRegister(Node))
2792  return;
2793  break;
2794 
2795  case ISD::ADD:
2796  if (tryMLAV64LaneV128(Node))
2797  return;
2798  break;
2799 
2800  case ISD::LOAD: {
2801  // Try to select as an indexed load. Fall through to normal processing
2802  // if we can't.
2803  if (tryIndexedLoad(Node))
2804  return;
2805  break;
2806  }
2807 
2808  case ISD::SRL:
2809  case ISD::AND:
2810  case ISD::SRA:
2812  if (tryBitfieldExtractOp(Node))
2813  return;
2814  if (tryBitfieldInsertInZeroOp(Node))
2815  return;
2817  case ISD::ROTR:
2818  case ISD::SHL:
2819  if (tryShiftAmountMod(Node))
2820  return;
2821  break;
2822 
2823  case ISD::SIGN_EXTEND:
2824  if (tryBitfieldExtractOpFromSExt(Node))
2825  return;
2826  break;
2827 
2828  case ISD::OR:
2829  if (tryBitfieldInsertOp(Node))
2830  return;
2831  break;
2832 
2833  case ISD::EXTRACT_VECTOR_ELT: {
2834  // Extracting lane zero is a special case where we can just use a plain
2835  // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
2836  // the rest of the compiler, especially the register allocator and copyi
2837  // propagation, to reason about, so is preferred when it's possible to
2838  // use it.
2839  ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
2840  // Bail and use the default Select() for non-zero lanes.
2841  if (LaneNode->getZExtValue() != 0)
2842  break;
2843  // If the element type is not the same as the result type, likewise
2844  // bail and use the default Select(), as there's more to do than just
2845  // a cross-class COPY. This catches extracts of i8 and i16 elements
2846  // since they will need an explicit zext.
2847  if (VT != Node->getOperand(0).getValueType().getVectorElementType())
2848  break;
2849  unsigned SubReg;
2850  switch (Node->getOperand(0)
2851  .getValueType()
2853  .getSizeInBits()) {
2854  default:
2855  llvm_unreachable("Unexpected vector element type!");
2856  case 64:
2857  SubReg = AArch64::dsub;
2858  break;
2859  case 32:
2860  SubReg = AArch64::ssub;
2861  break;
2862  case 16:
2863  SubReg = AArch64::hsub;
2864  break;
2865  case 8:
2866  llvm_unreachable("unexpected zext-requiring extract element!");
2867  }
2868  SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
2869  Node->getOperand(0));
2870  LLVM_DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
2871  LLVM_DEBUG(Extract->dumpr(CurDAG));
2872  LLVM_DEBUG(dbgs() << "\n");
2873  ReplaceNode(Node, Extract.getNode());
2874  return;
2875  }
2876  case ISD::Constant: {
2877  // Materialize zero constants as copies from WZR/XZR. This allows
2878  // the coalescer to propagate these into other instructions.
2879  ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
2880  if (ConstNode->isNullValue()) {
2881  if (VT == MVT::i32) {
2882  SDValue New = CurDAG->getCopyFromReg(
2883  CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
2884  ReplaceNode(Node, New.getNode());
2885  return;
2886  } else if (VT == MVT::i64) {
2887  SDValue New = CurDAG->getCopyFromReg(
2888  CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
2889  ReplaceNode(Node, New.getNode());
2890  return;
2891  }
2892  }
2893  break;
2894  }
2895 
2896  case ISD::FrameIndex: {
2897  // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
2898  int FI = cast<FrameIndexSDNode>(Node)->getIndex();
2899  unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
2900  const TargetLowering *TLI = getTargetLowering();
2901  SDValue TFI = CurDAG->getTargetFrameIndex(
2902  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2903  SDLoc DL(Node);
2904  SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
2905  CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
2906  CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
2907  return;
2908  }
2909  case ISD::INTRINSIC_W_CHAIN: {
2910  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2911  switch (IntNo) {
2912  default:
2913  break;
2914  case Intrinsic::aarch64_ldaxp:
2915  case Intrinsic::aarch64_ldxp: {
2916  unsigned Op =
2917  IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
2918  SDValue MemAddr = Node->getOperand(2);
2919  SDLoc DL(Node);
2920  SDValue Chain = Node->getOperand(0);
2921 
2922  SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
2923  MVT::Other, MemAddr, Chain);
2924 
2925  // Transfer memoperands.
2926  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2927  MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2928  cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
2929  ReplaceNode(Node, Ld);
2930  return;
2931  }
2932  case Intrinsic::aarch64_stlxp:
2933  case Intrinsic::aarch64_stxp: {
2934  unsigned Op =
2935  IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
2936  SDLoc DL(Node);
2937  SDValue Chain = Node->getOperand(0);
2938  SDValue ValLo = Node->getOperand(2);
2939  SDValue ValHi = Node->getOperand(3);
2940  SDValue MemAddr = Node->getOperand(4);
2941 
2942  // Place arguments in the right order.
2943  SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
2944 
2945  SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
2946  // Transfer memoperands.
2947  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2948  MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2949  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
2950 
2951  ReplaceNode(Node, St);
2952  return;
2953  }
2954  case Intrinsic::aarch64_neon_ld1x2:
2955  if (VT == MVT::v8i8) {
2956  SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
2957  return;
2958  } else if (VT == MVT::v16i8) {
2959  SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
2960  return;
2961  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2962  SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
2963  return;
2964  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2965  SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
2966  return;
2967  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2968  SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
2969  return;
2970  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2971  SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
2972  return;
2973  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2974  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2975  return;
2976  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2977  SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
2978  return;
2979  }
2980  break;
2981  case Intrinsic::aarch64_neon_ld1x3:
2982  if (VT == MVT::v8i8) {
2983  SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
2984  return;
2985  } else if (VT == MVT::v16i8) {
2986  SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
2987  return;
2988  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2989  SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
2990  return;
2991  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2992  SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
2993  return;
2994  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2995  SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
2996  return;
2997  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2998  SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
2999  return;
3000  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3001  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3002  return;
3003  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3004  SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
3005  return;
3006  }
3007  break;
3008  case Intrinsic::aarch64_neon_ld1x4:
3009  if (VT == MVT::v8i8) {
3010  SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
3011  return;
3012  } else if (VT == MVT::v16i8) {
3013  SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
3014  return;
3015  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3016  SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
3017  return;
3018  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3019  SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
3020  return;
3021  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3022  SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
3023  return;
3024  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3025  SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
3026  return;
3027  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3028  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3029  return;
3030  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3031  SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
3032  return;
3033  }
3034  break;
3035  case Intrinsic::aarch64_neon_ld2:
3036  if (VT == MVT::v8i8) {
3037  SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
3038  return;
3039  } else if (VT == MVT::v16i8) {
3040  SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
3041  return;
3042  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3043  SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
3044  return;
3045  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3046  SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
3047  return;
3048  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3049  SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
3050  return;
3051  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3052  SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
3053  return;
3054  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3055  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3056  return;
3057  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3058  SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
3059  return;
3060  }
3061  break;
3062  case Intrinsic::aarch64_neon_ld3:
3063  if (VT == MVT::v8i8) {
3064  SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
3065  return;
3066  } else if (VT == MVT::v16i8) {
3067  SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
3068  return;
3069  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3070  SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
3071  return;
3072  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3073  SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
3074  return;
3075  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3076  SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
3077  return;
3078  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3079  SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
3080  return;
3081  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3082  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3083  return;
3084  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3085  SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
3086  return;
3087  }
3088  break;
3089  case Intrinsic::aarch64_neon_ld4:
3090  if (VT == MVT::v8i8) {
3091  SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
3092  return;
3093  } else if (VT == MVT::v16i8) {
3094  SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
3095  return;
3096  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3097  SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
3098  return;
3099  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3100  SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
3101  return;
3102  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3103  SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
3104  return;
3105  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3106  SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
3107  return;
3108  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3109  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3110  return;
3111  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3112  SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
3113  return;
3114  }
3115  break;
3116  case Intrinsic::aarch64_neon_ld2r:
3117  if (VT == MVT::v8i8) {
3118  SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
3119  return;
3120  } else if (VT == MVT::v16i8) {
3121  SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
3122  return;
3123  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3124  SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
3125  return;
3126  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3127  SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
3128  return;
3129  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3130  SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
3131  return;
3132  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3133  SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
3134  return;
3135  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3136  SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
3137  return;
3138  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3139  SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
3140  return;
3141  }
3142  break;
3143  case Intrinsic::aarch64_neon_ld3r:
3144  if (VT == MVT::v8i8) {
3145  SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
3146  return;
3147  } else if (VT == MVT::v16i8) {
3148  SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
3149  return;
3150  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3151  SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
3152  return;
3153  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3154  SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
3155  return;
3156  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3157  SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
3158  return;
3159  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3160  SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
3161  return;
3162  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3163  SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
3164  return;
3165  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3166  SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
3167  return;
3168  }
3169  break;
3170  case Intrinsic::aarch64_neon_ld4r:
3171  if (VT == MVT::v8i8) {
3172  SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
3173  return;
3174  } else if (VT == MVT::v16i8) {
3175  SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
3176  return;
3177  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3178  SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
3179  return;
3180  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3181  SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
3182  return;
3183  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3184  SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
3185  return;
3186  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3187  SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
3188  return;
3189  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3190  SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
3191  return;
3192  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3193  SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
3194  return;
3195  }
3196  break;
3197  case Intrinsic::aarch64_neon_ld2lane:
3198  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3199  SelectLoadLane(Node, 2, AArch64::LD2i8);
3200  return;
3201  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3202  VT == MVT::v8f16) {
3203  SelectLoadLane(Node, 2, AArch64::LD2i16);
3204  return;
3205  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3206  VT == MVT::v2f32) {
3207  SelectLoadLane(Node, 2, AArch64::LD2i32);
3208  return;
3209  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3210  VT == MVT::v1f64) {
3211  SelectLoadLane(Node, 2, AArch64::LD2i64);
3212  return;
3213  }
3214  break;
3215  case Intrinsic::aarch64_neon_ld3lane:
3216  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3217  SelectLoadLane(Node, 3, AArch64::LD3i8);
3218  return;
3219  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3220  VT == MVT::v8f16) {
3221  SelectLoadLane(Node, 3, AArch64::LD3i16);
3222  return;
3223  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3224  VT == MVT::v2f32) {
3225  SelectLoadLane(Node, 3, AArch64::LD3i32);
3226  return;
3227  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3228  VT == MVT::v1f64) {
3229  SelectLoadLane(Node, 3, AArch64::LD3i64);
3230  return;
3231  }
3232  break;
3233  case Intrinsic::aarch64_neon_ld4lane:
3234  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3235  SelectLoadLane(Node, 4, AArch64::LD4i8);
3236  return;
3237  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3238  VT == MVT::v8f16) {
3239  SelectLoadLane(Node, 4, AArch64::LD4i16);
3240  return;
3241  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3242  VT == MVT::v2f32) {
3243  SelectLoadLane(Node, 4, AArch64::LD4i32);
3244  return;
3245  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3246  VT == MVT::v1f64) {
3247  SelectLoadLane(Node, 4, AArch64::LD4i64);
3248  return;
3249  }
3250  break;
3251  }
3252  } break;
3253  case ISD::INTRINSIC_WO_CHAIN: {
3254  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
3255  switch (IntNo) {
3256  default:
3257  break;
3258  case Intrinsic::aarch64_neon_tbl2:
3259  SelectTable(Node, 2,
3260  VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
3261  false);
3262  return;
3263  case Intrinsic::aarch64_neon_tbl3:
3264  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
3265  : AArch64::TBLv16i8Three,
3266  false);
3267  return;
3268  case Intrinsic::aarch64_neon_tbl4:
3269  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
3270  : AArch64::TBLv16i8Four,
3271  false);
3272  return;
3273  case Intrinsic::aarch64_neon_tbx2:
3274  SelectTable(Node, 2,
3275  VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
3276  true);
3277  return;
3278  case Intrinsic::aarch64_neon_tbx3:
3279  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
3280  : AArch64::TBXv16i8Three,
3281  true);
3282  return;
3283  case Intrinsic::aarch64_neon_tbx4:
3284  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
3285  : AArch64::TBXv16i8Four,
3286  true);
3287  return;
3288  case Intrinsic::aarch64_neon_smull:
3289  case Intrinsic::aarch64_neon_umull:
3290  if (tryMULLV64LaneV128(IntNo, Node))
3291  return;
3292  break;
3293  }
3294  break;
3295  }
3296  case ISD::INTRINSIC_VOID: {
3297  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3298  if (Node->getNumOperands() >= 3)
3299  VT = Node->getOperand(2)->getValueType(0);
3300  switch (IntNo) {
3301  default:
3302  break;
3303  case Intrinsic::aarch64_neon_st1x2: {
3304  if (VT == MVT::v8i8) {
3305  SelectStore(Node, 2, AArch64::ST1Twov8b);
3306  return;
3307  } else if (VT == MVT::v16i8) {
3308  SelectStore(Node, 2, AArch64::ST1Twov16b);
3309  return;
3310  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3311  SelectStore(Node, 2, AArch64::ST1Twov4h);
3312  return;
3313  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3314  SelectStore(Node, 2, AArch64::ST1Twov8h);
3315  return;
3316  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3317  SelectStore(Node, 2, AArch64::ST1Twov2s);
3318  return;
3319  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3320  SelectStore(Node, 2, AArch64::ST1Twov4s);
3321  return;
3322  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3323  SelectStore(Node, 2, AArch64::ST1Twov2d);
3324  return;
3325  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3326  SelectStore(Node, 2, AArch64::ST1Twov1d);
3327  return;
3328  }
3329  break;
3330  }
3331  case Intrinsic::aarch64_neon_st1x3: {
3332  if (VT == MVT::v8i8) {
3333  SelectStore(Node, 3, AArch64::ST1Threev8b);
3334  return;
3335  } else if (VT == MVT::v16i8) {
3336  SelectStore(Node, 3, AArch64::ST1Threev16b);
3337  return;
3338  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3339  SelectStore(Node, 3, AArch64::ST1Threev4h);
3340  return;
3341  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3342  SelectStore(Node, 3, AArch64::ST1Threev8h);
3343  return;
3344  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3345  SelectStore(Node, 3, AArch64::ST1Threev2s);
3346  return;
3347  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3348  SelectStore(Node, 3, AArch64::ST1Threev4s);
3349  return;
3350  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3351  SelectStore(Node, 3, AArch64::ST1Threev2d);
3352  return;
3353  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3354  SelectStore(Node, 3, AArch64::ST1Threev1d);
3355  return;
3356  }
3357  break;
3358  }
3359  case Intrinsic::aarch64_neon_st1x4: {
3360  if (VT == MVT::v8i8) {
3361  SelectStore(Node, 4, AArch64::ST1Fourv8b);
3362  return;
3363  } else if (VT == MVT::v16i8) {
3364  SelectStore(Node, 4, AArch64::ST1Fourv16b);
3365  return;
3366  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3367  SelectStore(Node, 4, AArch64::ST1Fourv4h);
3368  return;
3369  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3370  SelectStore(Node, 4, AArch64::ST1Fourv8h);
3371  return;
3372  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3373  SelectStore(Node, 4, AArch64::ST1Fourv2s);
3374  return;
3375  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3376  SelectStore(Node, 4, AArch64::ST1Fourv4s);
3377  return;
3378  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3379  SelectStore(Node, 4, AArch64::ST1Fourv2d);
3380  return;
3381  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3382  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3383  return;
3384  }
3385  break;
3386  }
3387  case Intrinsic::aarch64_neon_st2: {
3388  if (VT == MVT::v8i8) {
3389  SelectStore(Node, 2, AArch64::ST2Twov8b);
3390  return;
3391  } else if (VT == MVT::v16i8) {
3392  SelectStore(Node, 2, AArch64::ST2Twov16b);
3393  return;
3394  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3395  SelectStore(Node, 2, AArch64::ST2Twov4h);
3396  return;
3397  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3398  SelectStore(Node, 2, AArch64::ST2Twov8h);
3399  return;
3400  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3401  SelectStore(Node, 2, AArch64::ST2Twov2s);
3402  return;
3403  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3404  SelectStore(Node, 2, AArch64::ST2Twov4s);
3405  return;
3406  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3407  SelectStore(Node, 2, AArch64::ST2Twov2d);
3408  return;
3409  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3410  SelectStore(Node, 2, AArch64::ST1Twov1d);
3411  return;
3412  }
3413  break;
3414  }
3415  case Intrinsic::aarch64_neon_st3: {
3416  if (VT == MVT::v8i8) {
3417  SelectStore(Node, 3, AArch64::ST3Threev8b);
3418  return;
3419  } else if (VT == MVT::v16i8) {
3420  SelectStore(Node, 3, AArch64::ST3Threev16b);
3421  return;
3422  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3423  SelectStore(Node, 3, AArch64::ST3Threev4h);
3424  return;
3425  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3426  SelectStore(Node, 3, AArch64::ST3Threev8h);
3427  return;
3428  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3429  SelectStore(Node, 3, AArch64::ST3Threev2s);
3430  return;
3431  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3432  SelectStore(Node, 3, AArch64::ST3Threev4s);
3433  return;
3434  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3435  SelectStore(Node, 3, AArch64::ST3Threev2d);
3436  return;
3437  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3438  SelectStore(Node, 3, AArch64::ST1Threev1d);
3439  return;
3440  }
3441  break;
3442  }
3443  case Intrinsic::aarch64_neon_st4: {
3444  if (VT == MVT::v8i8) {
3445  SelectStore(Node, 4, AArch64::ST4Fourv8b);
3446  return;
3447  } else if (VT == MVT::v16i8) {
3448  SelectStore(Node, 4, AArch64::ST4Fourv16b);
3449  return;
3450  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3451  SelectStore(Node, 4, AArch64::ST4Fourv4h);
3452  return;
3453  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3454  SelectStore(Node, 4, AArch64::ST4Fourv8h);
3455  return;
3456  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3457  SelectStore(Node, 4, AArch64::ST4Fourv2s);
3458  return;
3459  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3460  SelectStore(Node, 4, AArch64::ST4Fourv4s);
3461  return;
3462  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3463  SelectStore(Node, 4, AArch64::ST4Fourv2d);
3464  return;
3465  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3466  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3467  return;
3468  }
3469  break;
3470  }
3471  case Intrinsic::aarch64_neon_st2lane: {
3472  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3473  SelectStoreLane(Node, 2, AArch64::ST2i8);
3474  return;
3475  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3476  VT == MVT::v8f16) {
3477  SelectStoreLane(Node, 2, AArch64::ST2i16);
3478  return;
3479  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3480  VT == MVT::v2f32) {
3481  SelectStoreLane(Node, 2, AArch64::ST2i32);
3482  return;
3483  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3484  VT == MVT::v1f64) {
3485  SelectStoreLane(Node, 2, AArch64::ST2i64);
3486  return;
3487  }
3488  break;
3489  }
3490  case Intrinsic::aarch64_neon_st3lane: {
3491  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3492  SelectStoreLane(Node, 3, AArch64::ST3i8);
3493  return;
3494  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3495  VT == MVT::v8f16) {
3496  SelectStoreLane(Node, 3, AArch64::ST3i16);
3497  return;
3498  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3499  VT == MVT::v2f32) {
3500  SelectStoreLane(Node, 3, AArch64::ST3i32);
3501  return;
3502  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3503  VT == MVT::v1f64) {
3504  SelectStoreLane(Node, 3, AArch64::ST3i64);
3505  return;
3506  }
3507  break;
3508  }
3509  case Intrinsic::aarch64_neon_st4lane: {
3510  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3511  SelectStoreLane(Node, 4, AArch64::ST4i8);
3512  return;
3513  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3514  VT == MVT::v8f16) {
3515  SelectStoreLane(Node, 4, AArch64::ST4i16);
3516  return;
3517  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3518  VT == MVT::v2f32) {
3519  SelectStoreLane(Node, 4, AArch64::ST4i32);
3520  return;
3521  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3522  VT == MVT::v1f64) {
3523  SelectStoreLane(Node, 4, AArch64::ST4i64);
3524  return;
3525  }
3526  break;
3527  }
3528  }
3529  break;
3530  }
3531  case AArch64ISD::LD2post: {
3532  if (VT == MVT::v8i8) {
3533  SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
3534  return;
3535  } else if (VT == MVT::v16i8) {
3536  SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
3537  return;
3538  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3539  SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
3540  return;
3541  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3542  SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
3543  return;
3544  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3545  SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
3546  return;
3547  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3548  SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
3549  return;
3550  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3551  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3552  return;
3553  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3554  SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
3555  return;
3556  }
3557  break;
3558  }
3559  case AArch64ISD::LD3post: {
3560  if (VT == MVT::v8i8) {
3561  SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
3562  return;
3563  } else if (VT == MVT::v16i8) {
3564  SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
3565  return;
3566  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3567  SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
3568  return;
3569  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3570  SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
3571  return;
3572  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3573  SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
3574  return;
3575  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3576  SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
3577  return;
3578  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3579  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3580  return;
3581  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3582  SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
3583  return;
3584  }
3585  break;
3586  }
3587  case AArch64ISD::LD4post: {
3588  if (VT == MVT::v8i8) {
3589  SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
3590  return;
3591  } else if (VT == MVT::v16i8) {
3592  SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
3593  return;
3594  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3595  SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
3596  return;
3597  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3598  SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
3599  return;
3600  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3601  SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
3602  return;
3603  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3604  SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
3605  return;
3606  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3607  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3608  return;
3609  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3610  SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
3611  return;
3612  }
3613  break;
3614  }
3615  case AArch64ISD::LD1x2post: {
3616  if (VT == MVT::v8i8) {
3617  SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
3618  return;
3619  } else if (VT == MVT::v16i8) {
3620  SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
3621  return;
3622  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3623  SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
3624  return;
3625  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3626  SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
3627  return;
3628  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3629  SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
3630  return;
3631  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3632  SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
3633  return;
3634  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3635  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3636  return;
3637  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3638  SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
3639  return;
3640  }
3641  break;
3642  }
3643  case AArch64ISD::LD1x3post: {
3644  if (VT == MVT::v8i8) {
3645  SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
3646  return;
3647  } else if (VT == MVT::v16i8) {
3648  SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
3649  return;
3650  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3651  SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
3652  return;
3653  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3654  SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
3655  return;
3656  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3657  SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
3658  return;
3659  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3660  SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
3661  return;
3662  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3663  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3664  return;
3665  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3666  SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
3667  return;
3668  }
3669  break;
3670  }
3671  case AArch64ISD::LD1x4post: {
3672  if (VT == MVT::v8i8) {
3673  SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
3674  return;
3675  } else if (VT == MVT::v16i8) {
3676  SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
3677  return;
3678  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3679  SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
3680  return;
3681  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3682  SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
3683  return;
3684  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3685  SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
3686  return;
3687  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3688  SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
3689  return;
3690  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3691  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3692  return;
3693  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3694  SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
3695  return;
3696  }
3697  break;
3698  }
3699  case AArch64ISD::LD1DUPpost: {
3700  if (VT == MVT::v8i8) {
3701  SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
3702  return;
3703  } else if (VT == MVT::v16i8) {
3704  SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
3705  return;
3706  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3707  SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
3708  return;
3709  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3710  SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
3711  return;
3712  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3713  SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
3714  return;
3715  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3716  SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
3717  return;
3718  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3719  SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
3720  return;
3721  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3722  SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
3723  return;
3724  }
3725  break;
3726  }
3727  case AArch64ISD::LD2DUPpost: {
3728  if (VT == MVT::v8i8) {
3729  SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
3730  return;
3731  } else if (VT == MVT::v16i8) {
3732  SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
3733  return;
3734  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3735  SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
3736  return;
3737  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3738  SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
3739  return;
3740  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3741  SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
3742  return;
3743  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3744  SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
3745  return;
3746  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3747  SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
3748  return;
3749  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3750  SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
3751  return;
3752  }
3753  break;
3754  }
3755  case AArch64ISD::LD3DUPpost: {
3756  if (VT == MVT::v8i8) {
3757  SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
3758  return;
3759  } else if (VT == MVT::v16i8) {
3760  SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
3761  return;
3762  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3763  SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
3764  return;
3765  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3766  SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
3767  return;
3768  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3769  SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
3770  return;
3771  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3772  SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
3773  return;
3774  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3775  SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
3776  return;
3777  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3778  SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
3779  return;
3780  }
3781  break;
3782  }
3783  case AArch64ISD::LD4DUPpost: {
3784  if (VT == MVT::v8i8) {
3785  SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
3786  return;
3787  } else if (VT == MVT::v16i8) {
3788  SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
3789  return;
3790  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3791  SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
3792  return;
3793  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3794  SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
3795  return;
3796  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3797  SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
3798  return;
3799  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3800  SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
3801  return;
3802  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3803  SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
3804  return;
3805  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3806  SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
3807  return;
3808  }
3809  break;
3810  }
3811  case AArch64ISD::LD1LANEpost: {
3812  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3813  SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
3814  return;
3815  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3816  VT == MVT::v8f16) {
3817  SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
3818  return;
3819  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3820  VT == MVT::v2f32) {
3821  SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
3822  return;
3823  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3824  VT == MVT::v1f64) {
3825  SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
3826  return;
3827  }
3828  break;
3829  }
3830  case AArch64ISD::LD2LANEpost: {
3831  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3832  SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
3833  return;
3834  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3835  VT == MVT::v8f16) {
3836  SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
3837  return;
3838  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3839  VT == MVT::v2f32) {
3840  SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
3841  return;
3842  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3843  VT == MVT::v1f64) {
3844  SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
3845  return;
3846  }
3847  break;
3848  }
3849  case AArch64ISD::LD3LANEpost: {
3850  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3851  SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
3852  return;
3853  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3854  VT == MVT::v8f16) {
3855  SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
3856  return;
3857  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3858  VT == MVT::v2f32) {
3859  SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
3860  return;
3861  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3862  VT == MVT::v1f64) {
3863  SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
3864  return;
3865  }
3866  break;
3867  }
3868  case AArch64ISD::LD4LANEpost: {
3869  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3870  SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
3871  return;
3872  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3873  VT == MVT::v8f16) {
3874  SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
3875  return;
3876  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3877  VT == MVT::v2f32) {
3878  SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
3879  return;
3880  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3881  VT == MVT::v1f64) {
3882  SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
3883  return;
3884  }
3885  break;
3886  }
3887  case AArch64ISD::ST2post: {
3888  VT = Node->getOperand(1).getValueType();
3889  if (VT == MVT::v8i8) {
3890  SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
3891  return;
3892  } else if (VT == MVT::v16i8) {
3893  SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
3894  return;
3895  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3896  SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
3897  return;
3898  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3899  SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
3900  return;
3901  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3902  SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
3903  return;
3904  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3905  SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
3906  return;
3907  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3908  SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
3909  return;
3910  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3911  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3912  return;
3913  }
3914  break;
3915  }
3916  case AArch64ISD::ST3post: {
3917  VT = Node->getOperand(1).getValueType();
3918  if (VT == MVT::v8i8) {
3919  SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
3920  return;
3921  } else if (VT == MVT::v16i8) {
3922  SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
3923  return;
3924  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3925  SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
3926  return;
3927  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3928  SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
3929  return;
3930  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3931  SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
3932  return;
3933  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3934  SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
3935  return;
3936  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3937  SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
3938  return;
3939  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3940  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
3941  return;
3942  }
3943  break;
3944  }
3945  case AArch64ISD::ST4post: {
3946  VT = Node->getOperand(1).getValueType();
3947  if (VT == MVT::v8i8) {
3948  SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
3949  return;
3950  } else if (VT == MVT::v16i8) {
3951  SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
3952  return;
3953  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3954  SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
3955  return;
3956  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3957  SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
3958  return;
3959  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3960  SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
3961  return;
3962  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3963  SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
3964  return;
3965  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3966  SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
3967  return;
3968  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3969  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
3970  return;
3971  }
3972  break;
3973  }
3974  case AArch64ISD::ST1x2post: {
3975  VT = Node->getOperand(1).getValueType();
3976  if (VT == MVT::v8i8) {
3977  SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
3978  return;
3979  } else if (VT == MVT::v16i8) {
3980  SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
3981  return;
3982  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3983  SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
3984  return;
3985  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3986  SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
3987  return;
3988  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3989  SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
3990  return;
3991  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3992  SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
3993  return;
3994  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3995  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3996  return;
3997  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3998  SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
3999  return;
4000  }
4001  break;
4002  }
4003  case AArch64ISD::ST1x3post: {
4004  VT = Node->getOperand(1).getValueType();
4005  if (VT == MVT::v8i8) {
4006  SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
4007  return;
4008  } else if (VT == MVT::v16i8) {
4009  SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
4010  return;
4011  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4012  SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
4013  return;
4014  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4015  SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
4016  return;
4017  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4018  SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
4019  return;
4020  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4021  SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
4022  return;
4023  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4024  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4025  return;
4026  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4027  SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
4028  return;
4029  }
4030  break;
4031  }
4032  case AArch64ISD::ST1x4post: {
4033  VT = Node->getOperand(1).getValueType();
4034  if (VT == MVT::v8i8) {
4035  SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
4036  return;
4037  } else if (VT == MVT::v16i8) {
4038  SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
4039  return;
4040  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4041  SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
4042  return;
4043  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4044  SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
4045  return;
4046  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4047  SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
4048  return;
4049  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4050  SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
4051  return;
4052  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4053  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4054  return;
4055  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4056  SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
4057  return;
4058  }
4059  break;
4060  }
4061  case AArch64ISD::ST2LANEpost: {
4062  VT = Node->getOperand(1).getValueType();
4063  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4064  SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
4065  return;
4066  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4067  VT == MVT::v8f16) {
4068  SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
4069  return;
4070  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4071  VT == MVT::v2f32) {
4072  SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
4073  return;
4074  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4075  VT == MVT::v1f64) {
4076  SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
4077  return;
4078  }
4079  break;
4080  }
4081  case AArch64ISD::ST3LANEpost: {
4082  VT = Node->getOperand(1).getValueType();
4083  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4084  SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
4085  return;
4086  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4087  VT == MVT::v8f16) {
4088  SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
4089  return;
4090  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4091  VT == MVT::v2f32) {
4092  SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
4093  return;
4094  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4095  VT == MVT::v1f64) {
4096  SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
4097  return;
4098  }
4099  break;
4100  }
4101  case AArch64ISD::ST4LANEpost: {
4102  VT = Node->getOperand(1).getValueType();
4103  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4104  SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
4105  return;
4106  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4107  VT == MVT::v8f16) {
4108  SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
4109  return;
4110  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4111  VT == MVT::v2f32) {
4112  SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
4113  return;
4114  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4115  VT == MVT::v1f64) {
4116  SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
4117  return;
4118  }
4119  break;
4120  }
4121  }
4122 
4123  // Select the default instruction
4124  SelectCode(Node);
4125 }
4126 
4127 /// createAArch64ISelDag - This pass converts a legalized DAG into a
4128 /// AArch64-specific DAG, ready for instruction scheduling.
4130  CodeGenOpt::Level OptLevel) {
4131  return new AArch64DAGToDAGISel(TM, OptLevel);
4132 }
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1436
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
uint64_t CallInst * C
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &ShiftAmount, int &MaskWidth)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL...
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOffset() const
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1461
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1547
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
DiagnosticInfoOptimizationBase::Argument NV
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:334
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:262
iterator begin() const
Definition: ArrayRef.h:137
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:137
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isWorthFoldingADDlow(SDValue N)
If there&#39;s a use of this ADDlow that&#39;s not itself a load/store then we&#39;ll need to create a real ADD i...
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
const SDValue & getChain() const
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1067
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:882
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:329
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1493
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1069
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1616
A description of a memory reference used in the backend.
static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, SDValue &LaneOp, int &LaneIdx)
static bool isPreferredADD(int64_t ImmOff)
Shift and rotation operations.
Definition: ISDOpcodes.h:380
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:476
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
unsigned SubReg
const MDNode * getMD() const
unsigned getScalarValueSizeInBits() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:971
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
int64_t getSExtValue() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:409
const RegList & Regs
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
op_iterator op_begin() const
unsigned getAlignment() const
Definition: Globals.cpp:97
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:571
bool isStrongerThanMonotonic(AtomicOrdering ao)
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:85
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:118
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1642
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:886
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:584
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32...
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:757
Extended Value Type.
Definition: ValueTypes.h:34
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
void dump() const
Dump this node, for debugging.
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:315
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
SDNode * SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type...
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:727
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:722
An SDNode that represents everything that will be needed to construct a MachineInstr.
const Constant * getConstVal() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
unsigned logBase2() const
Definition: APInt.h:1732
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:537
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< use_iterator > uses()
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:457
const SysReg * lookupSysRegByName(StringRef)
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:438
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:441
uint32_t parseGenericRegister(StringRef Name)
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOpt::Level OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG...
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:600
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:177
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:421
static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx)
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:182
static int getIntOperandFromRegisterString(StringRef RegString)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:456
void computeKnownBits(SDValue Op, KnownBits &Known, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:575
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted, suitable for use in a BFI instruction.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Type * getValueType() const
Definition: GlobalValue.h:275
unsigned getOpcode() const
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:990
static bool isShiftedMask(uint64_t Mask, EVT VT)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void dumpr() const
Dump (recursively) this node and its use-def subgraph.
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:415
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A single uniqued string.
Definition: Metadata.h:602
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1580
Conversion operators.
Definition: ISDOpcodes.h:435
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:750
uint64_t getZExtValue() const
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:444
#define LLVM_DEBUG(X)
Definition: Debug.h:119
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
std::size_t countLeadingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the most significant bit to the first zero bit.
Definition: MathExtras.h:460
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:746
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
virtual const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const
Returns a TargetRegisterClass used for pointer values.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:866
This class is used to represent ISD::LOAD nodes.