LLVM  4.0.0
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the AArch64 target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64TargetMachine.h"
16 #include "llvm/ADT/APSInt.h"
18 #include "llvm/IR/Function.h" // To access function attributes.
19 #include "llvm/IR/GlobalValue.h"
20 #include "llvm/IR/Intrinsics.h"
21 #include "llvm/Support/Debug.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "aarch64-isel"
29 
30 //===--------------------------------------------------------------------===//
31 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
32 /// instructions for SelectionDAG operations.
33 ///
34 namespace {
35 
36 class AArch64DAGToDAGISel : public SelectionDAGISel {
37 
38  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
39  /// make the right decision when generating code for different targets.
40  const AArch64Subtarget *Subtarget;
41 
42  bool ForCodeSize;
43 
44 public:
45  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
46  CodeGenOpt::Level OptLevel)
47  : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
48  ForCodeSize(false) {}
49 
50  StringRef getPassName() const override {
51  return "AArch64 Instruction Selection";
52  }
53 
54  bool runOnMachineFunction(MachineFunction &MF) override {
55  ForCodeSize = MF.getFunction()->optForSize();
56  Subtarget = &MF.getSubtarget<AArch64Subtarget>();
58  }
59 
60  void Select(SDNode *Node) override;
61 
62  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
63  /// inline asm expressions.
64  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
65  unsigned ConstraintID,
66  std::vector<SDValue> &OutOps) override;
67 
68  bool tryMLAV64LaneV128(SDNode *N);
69  bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
70  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
71  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
72  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
74  return SelectShiftedRegister(N, false, Reg, Shift);
75  }
76  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77  return SelectShiftedRegister(N, true, Reg, Shift);
78  }
79  bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
80  return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
81  }
82  bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
83  return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
84  }
85  bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
86  return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
87  }
88  bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
89  return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
90  }
91  bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
92  return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
93  }
94  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
95  return SelectAddrModeIndexed(N, 1, Base, OffImm);
96  }
97  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
98  return SelectAddrModeIndexed(N, 2, Base, OffImm);
99  }
100  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
101  return SelectAddrModeIndexed(N, 4, Base, OffImm);
102  }
103  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
104  return SelectAddrModeIndexed(N, 8, Base, OffImm);
105  }
106  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
107  return SelectAddrModeIndexed(N, 16, Base, OffImm);
108  }
109  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
110  return SelectAddrModeUnscaled(N, 1, Base, OffImm);
111  }
112  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
113  return SelectAddrModeUnscaled(N, 2, Base, OffImm);
114  }
115  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
116  return SelectAddrModeUnscaled(N, 4, Base, OffImm);
117  }
118  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
119  return SelectAddrModeUnscaled(N, 8, Base, OffImm);
120  }
121  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
122  return SelectAddrModeUnscaled(N, 16, Base, OffImm);
123  }
124 
125  template<int Width>
126  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
127  SDValue &SignExtend, SDValue &DoShift) {
128  return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
129  }
130 
131  template<int Width>
132  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
133  SDValue &SignExtend, SDValue &DoShift) {
134  return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
135  }
136 
137 
138  /// Form sequences of consecutive 64/128-bit registers for use in NEON
139  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
140  /// between 1 and 4 elements. If it contains a single element that is returned
141  /// unchanged; otherwise a REG_SEQUENCE value is returned.
142  SDValue createDTuple(ArrayRef<SDValue> Vecs);
143  SDValue createQTuple(ArrayRef<SDValue> Vecs);
144 
145  /// Generic helper for the createDTuple/createQTuple
146  /// functions. Those should almost always be called instead.
147  SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
148  const unsigned SubRegs[]);
149 
150  void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
151 
152  bool tryIndexedLoad(SDNode *N);
153 
154  void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
155  unsigned SubRegIdx);
156  void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
157  unsigned SubRegIdx);
158  void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
159  void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
160 
161  void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
162  void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
163  void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
164  void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
165 
166  bool tryBitfieldExtractOp(SDNode *N);
167  bool tryBitfieldExtractOpFromSExt(SDNode *N);
168  bool tryBitfieldInsertOp(SDNode *N);
169  bool tryBitfieldInsertInZeroOp(SDNode *N);
170 
171  bool tryReadRegister(SDNode *N);
172  bool tryWriteRegister(SDNode *N);
173 
174 // Include the pieces autogenerated from the target description.
175 #include "AArch64GenDAGISel.inc"
176 
177 private:
178  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
179  SDValue &Shift);
180  bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
181  SDValue &OffImm);
182  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
183  SDValue &OffImm);
184  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
185  SDValue &OffImm);
186  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
187  SDValue &Offset, SDValue &SignExtend,
188  SDValue &DoShift);
189  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
190  SDValue &Offset, SDValue &SignExtend,
191  SDValue &DoShift);
192  bool isWorthFolding(SDValue V) const;
193  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
194  SDValue &Offset, SDValue &SignExtend);
195 
196  template<unsigned RegWidth>
197  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
198  return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
199  }
200 
201  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
202 
203  void SelectCMP_SWAP(SDNode *N);
204 
205 };
206 } // end anonymous namespace
207 
208 /// isIntImmediate - This method tests to see if the node is a constant
209 /// operand. If so Imm will receive the 32-bit value.
210 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
211  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
212  Imm = C->getZExtValue();
213  return true;
214  }
215  return false;
216 }
217 
218 // isIntImmediate - This method tests to see if a constant operand.
219 // If so Imm will receive the value.
220 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
221  return isIntImmediate(N.getNode(), Imm);
222 }
223 
224 // isOpcWithIntImmediate - This method tests to see if the node is a specific
225 // opcode and that it has a immediate integer right operand.
226 // If so Imm will receive the 32 bit value.
227 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
228  uint64_t &Imm) {
229  return N->getOpcode() == Opc &&
230  isIntImmediate(N->getOperand(1).getNode(), Imm);
231 }
232 
233 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
234  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
235  switch(ConstraintID) {
236  default:
237  llvm_unreachable("Unexpected asm memory constraint");
241  // Require the address to be in a register. That is safe for all AArch64
242  // variants and it is hard to do anything much smarter without knowing
243  // how the operand is used.
244  OutOps.push_back(Op);
245  return false;
246  }
247  return true;
248 }
249 
250 /// SelectArithImmed - Select an immediate value that can be represented as
251 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
252 /// Val set to the 12-bit value and Shift set to the shifter operand.
253 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
254  SDValue &Shift) {
255  // This function is called from the addsub_shifted_imm ComplexPattern,
256  // which lists [imm] as the list of opcode it's interested in, however
257  // we still need to check whether the operand is actually an immediate
258  // here because the ComplexPattern opcode list is only used in
259  // root-level opcode matching.
260  if (!isa<ConstantSDNode>(N.getNode()))
261  return false;
262 
263  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
264  unsigned ShiftAmt;
265 
266  if (Immed >> 12 == 0) {
267  ShiftAmt = 0;
268  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
269  ShiftAmt = 12;
270  Immed = Immed >> 12;
271  } else
272  return false;
273 
274  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
275  SDLoc dl(N);
276  Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
277  Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
278  return true;
279 }
280 
281 /// SelectNegArithImmed - As above, but negates the value before trying to
282 /// select it.
283 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
284  SDValue &Shift) {
285  // This function is called from the addsub_shifted_imm ComplexPattern,
286  // which lists [imm] as the list of opcode it's interested in, however
287  // we still need to check whether the operand is actually an immediate
288  // here because the ComplexPattern opcode list is only used in
289  // root-level opcode matching.
290  if (!isa<ConstantSDNode>(N.getNode()))
291  return false;
292 
293  // The immediate operand must be a 24-bit zero-extended immediate.
294  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
295 
296  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
297  // have the opposite effect on the C flag, so this pattern mustn't match under
298  // those circumstances.
299  if (Immed == 0)
300  return false;
301 
302  if (N.getValueType() == MVT::i32)
303  Immed = ~((uint32_t)Immed) + 1;
304  else
305  Immed = ~Immed + 1ULL;
306  if (Immed & 0xFFFFFFFFFF000000ULL)
307  return false;
308 
309  Immed &= 0xFFFFFFULL;
310  return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
311  Shift);
312 }
313 
314 /// getShiftTypeForNode - Translate a shift node to the corresponding
315 /// ShiftType value.
317  switch (N.getOpcode()) {
318  default:
320  case ISD::SHL:
321  return AArch64_AM::LSL;
322  case ISD::SRL:
323  return AArch64_AM::LSR;
324  case ISD::SRA:
325  return AArch64_AM::ASR;
326  case ISD::ROTR:
327  return AArch64_AM::ROR;
328  }
329 }
330 
331 /// \brief Determine whether it is worth to fold V into an extended register.
332 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
333  // it hurts if the value is used at least twice, unless we are optimizing
334  // for code size.
335  return ForCodeSize || V.hasOneUse();
336 }
337 
338 /// SelectShiftedRegister - Select a "shifted register" operand. If the value
339 /// is not shifted, set the Shift operand to default of "LSL 0". The logical
340 /// instructions allow the shifted register to be rotated, but the arithmetic
341 /// instructions do not. The AllowROR parameter specifies whether ROR is
342 /// supported.
343 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
344  SDValue &Reg, SDValue &Shift) {
346  if (ShType == AArch64_AM::InvalidShiftExtend)
347  return false;
348  if (!AllowROR && ShType == AArch64_AM::ROR)
349  return false;
350 
351  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
352  unsigned BitSize = N.getValueSizeInBits();
353  unsigned Val = RHS->getZExtValue() & (BitSize - 1);
354  unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
355 
356  Reg = N.getOperand(0);
357  Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
358  return isWorthFolding(N);
359  }
360 
361  return false;
362 }
363 
364 /// getExtendTypeForNode - Translate an extend node to the corresponding
365 /// ExtendType value.
367 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
368  if (N.getOpcode() == ISD::SIGN_EXTEND ||
370  EVT SrcVT;
372  SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
373  else
374  SrcVT = N.getOperand(0).getValueType();
375 
376  if (!IsLoadStore && SrcVT == MVT::i8)
377  return AArch64_AM::SXTB;
378  else if (!IsLoadStore && SrcVT == MVT::i16)
379  return AArch64_AM::SXTH;
380  else if (SrcVT == MVT::i32)
381  return AArch64_AM::SXTW;
382  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
383 
385  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
386  N.getOpcode() == ISD::ANY_EXTEND) {
387  EVT SrcVT = N.getOperand(0).getValueType();
388  if (!IsLoadStore && SrcVT == MVT::i8)
389  return AArch64_AM::UXTB;
390  else if (!IsLoadStore && SrcVT == MVT::i16)
391  return AArch64_AM::UXTH;
392  else if (SrcVT == MVT::i32)
393  return AArch64_AM::UXTW;
394  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
395 
397  } else if (N.getOpcode() == ISD::AND) {
399  if (!CSD)
401  uint64_t AndMask = CSD->getZExtValue();
402 
403  switch (AndMask) {
404  default:
406  case 0xFF:
407  return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
408  case 0xFFFF:
409  return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
410  case 0xFFFFFFFF:
411  return AArch64_AM::UXTW;
412  }
413  }
414 
416 }
417 
418 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
419 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
420  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
422  return false;
423 
424  SDValue SV = DL->getOperand(0);
425  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
426  return false;
427 
428  SDValue EV = SV.getOperand(1);
429  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
430  return false;
431 
432  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
433  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
434  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
435  LaneOp = EV.getOperand(0);
436 
437  return true;
438 }
439 
440 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
441 // high lane extract.
442 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
443  SDValue &LaneOp, int &LaneIdx) {
444 
445  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
446  std::swap(Op0, Op1);
447  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
448  return false;
449  }
450  StdOp = Op1;
451  return true;
452 }
453 
454 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
455 /// is a lane in the upper half of a 128-bit vector. Recognize and select this
456 /// so that we don't emit unnecessary lane extracts.
457 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
458  SDLoc dl(N);
459  SDValue Op0 = N->getOperand(0);
460  SDValue Op1 = N->getOperand(1);
461  SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
462  SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
463  int LaneIdx = -1; // Will hold the lane index.
464 
465  if (Op1.getOpcode() != ISD::MUL ||
466  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
467  LaneIdx)) {
468  std::swap(Op0, Op1);
469  if (Op1.getOpcode() != ISD::MUL ||
470  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
471  LaneIdx))
472  return false;
473  }
474 
475  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
476 
477  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
478 
479  unsigned MLAOpc = ~0U;
480 
481  switch (N->getSimpleValueType(0).SimpleTy) {
482  default:
483  llvm_unreachable("Unrecognized MLA.");
484  case MVT::v4i16:
485  MLAOpc = AArch64::MLAv4i16_indexed;
486  break;
487  case MVT::v8i16:
488  MLAOpc = AArch64::MLAv8i16_indexed;
489  break;
490  case MVT::v2i32:
491  MLAOpc = AArch64::MLAv2i32_indexed;
492  break;
493  case MVT::v4i32:
494  MLAOpc = AArch64::MLAv4i32_indexed;
495  break;
496  }
497 
498  ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
499  return true;
500 }
501 
502 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
503  SDLoc dl(N);
504  SDValue SMULLOp0;
505  SDValue SMULLOp1;
506  int LaneIdx;
507 
508  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
509  LaneIdx))
510  return false;
511 
512  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
513 
514  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
515 
516  unsigned SMULLOpc = ~0U;
517 
518  if (IntNo == Intrinsic::aarch64_neon_smull) {
519  switch (N->getSimpleValueType(0).SimpleTy) {
520  default:
521  llvm_unreachable("Unrecognized SMULL.");
522  case MVT::v4i32:
523  SMULLOpc = AArch64::SMULLv4i16_indexed;
524  break;
525  case MVT::v2i64:
526  SMULLOpc = AArch64::SMULLv2i32_indexed;
527  break;
528  }
529  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
530  switch (N->getSimpleValueType(0).SimpleTy) {
531  default:
532  llvm_unreachable("Unrecognized SMULL.");
533  case MVT::v4i32:
534  SMULLOpc = AArch64::UMULLv4i16_indexed;
535  break;
536  case MVT::v2i64:
537  SMULLOpc = AArch64::UMULLv2i32_indexed;
538  break;
539  }
540  } else
541  llvm_unreachable("Unrecognized intrinsic.");
542 
543  ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
544  return true;
545 }
546 
547 /// Instructions that accept extend modifiers like UXTW expect the register
548 /// being extended to be a GPR32, but the incoming DAG might be acting on a
549 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
550 /// this is the case.
552  if (N.getValueType() == MVT::i32)
553  return N;
554 
555  SDLoc dl(N);
556  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
557  MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
558  dl, MVT::i32, N, SubReg);
559  return SDValue(Node, 0);
560 }
561 
562 
563 /// SelectArithExtendedRegister - Select a "extended register" operand. This
564 /// operand folds in an extend followed by an optional left shift.
565 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
566  SDValue &Shift) {
567  unsigned ShiftVal = 0;
569 
570  if (N.getOpcode() == ISD::SHL) {
572  if (!CSD)
573  return false;
574  ShiftVal = CSD->getZExtValue();
575  if (ShiftVal > 4)
576  return false;
577 
578  Ext = getExtendTypeForNode(N.getOperand(0));
580  return false;
581 
582  Reg = N.getOperand(0).getOperand(0);
583  } else {
584  Ext = getExtendTypeForNode(N);
586  return false;
587 
588  Reg = N.getOperand(0);
589 
590  // Don't match if free 32-bit -> 64-bit zext can be used instead.
591  if (Ext == AArch64_AM::UXTW &&
592  Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
593  return false;
594  }
595 
596  // AArch64 mandates that the RHS of the operation must use the smallest
597  // register class that could contain the size being extended from. Thus,
598  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
599  // there might not be an actual 32-bit value in the program. We can
600  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
601  assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
602  Reg = narrowIfNeeded(CurDAG, Reg);
603  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
604  MVT::i32);
605  return isWorthFolding(N);
606 }
607 
608 /// If there's a use of this ADDlow that's not itself a load/store then we'll
609 /// need to create a real ADD instruction from it anyway and there's no point in
610 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
611 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
612 /// leads to duplicated ADRP instructions.
614  for (auto Use : N->uses()) {
615  if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
616  Use->getOpcode() != ISD::ATOMIC_LOAD &&
617  Use->getOpcode() != ISD::ATOMIC_STORE)
618  return false;
619 
620  // ldar and stlr have much more restrictive addressing modes (just a
621  // register).
622  if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering()))
623  return false;
624  }
625 
626  return true;
627 }
628 
629 /// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
630 /// immediate" address. The "Size" argument is the size in bytes of the memory
631 /// reference, which determines the scale.
632 bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
633  SDValue &Base,
634  SDValue &OffImm) {
635  SDLoc dl(N);
636  const DataLayout &DL = CurDAG->getDataLayout();
637  const TargetLowering *TLI = getTargetLowering();
638  if (N.getOpcode() == ISD::FrameIndex) {
639  int FI = cast<FrameIndexSDNode>(N)->getIndex();
640  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
641  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
642  return true;
643  }
644 
645  // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
646  // selected here doesn't support labels/immediates, only base+offset.
647 
648  if (CurDAG->isBaseWithConstantOffset(N)) {
649  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
650  int64_t RHSC = RHS->getSExtValue();
651  unsigned Scale = Log2_32(Size);
652  if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) &&
653  RHSC < (0x40 << Scale)) {
654  Base = N.getOperand(0);
655  if (Base.getOpcode() == ISD::FrameIndex) {
656  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
657  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
658  }
659  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
660  return true;
661  }
662  }
663  }
664 
665  // Base only. The address will be materialized into a register before
666  // the memory is accessed.
667  // add x0, Xbase, #offset
668  // stp x1, x2, [x0]
669  Base = N;
670  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
671  return true;
672 }
673 
674 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
675 /// immediate" address. The "Size" argument is the size in bytes of the memory
676 /// reference, which determines the scale.
677 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
678  SDValue &Base, SDValue &OffImm) {
679  SDLoc dl(N);
680  const DataLayout &DL = CurDAG->getDataLayout();
681  const TargetLowering *TLI = getTargetLowering();
682  if (N.getOpcode() == ISD::FrameIndex) {
683  int FI = cast<FrameIndexSDNode>(N)->getIndex();
684  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
685  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
686  return true;
687  }
688 
690  GlobalAddressSDNode *GAN =
692  Base = N.getOperand(0);
693  OffImm = N.getOperand(1);
694  if (!GAN)
695  return true;
696 
697  const GlobalValue *GV = GAN->getGlobal();
698  unsigned Alignment = GV->getAlignment();
699  Type *Ty = GV->getValueType();
700  if (Alignment == 0 && Ty->isSized())
701  Alignment = DL.getABITypeAlignment(Ty);
702 
703  if (Alignment >= Size)
704  return true;
705  }
706 
707  if (CurDAG->isBaseWithConstantOffset(N)) {
708  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
709  int64_t RHSC = (int64_t)RHS->getZExtValue();
710  unsigned Scale = Log2_32(Size);
711  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
712  Base = N.getOperand(0);
713  if (Base.getOpcode() == ISD::FrameIndex) {
714  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
715  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
716  }
717  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
718  return true;
719  }
720  }
721  }
722 
723  // Before falling back to our general case, check if the unscaled
724  // instructions can handle this. If so, that's preferable.
725  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
726  return false;
727 
728  // Base only. The address will be materialized into a register before
729  // the memory is accessed.
730  // add x0, Xbase, #offset
731  // ldr x0, [x0]
732  Base = N;
733  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
734  return true;
735 }
736 
737 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
738 /// immediate" address. This should only match when there is an offset that
739 /// is not valid for a scaled immediate addressing mode. The "Size" argument
740 /// is the size in bytes of the memory reference, which is needed here to know
741 /// what is valid for a scaled immediate.
742 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
743  SDValue &Base,
744  SDValue &OffImm) {
745  if (!CurDAG->isBaseWithConstantOffset(N))
746  return false;
747  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
748  int64_t RHSC = RHS->getSExtValue();
749  // If the offset is valid as a scaled immediate, don't match here.
750  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
751  RHSC < (0x1000 << Log2_32(Size)))
752  return false;
753  if (RHSC >= -256 && RHSC < 256) {
754  Base = N.getOperand(0);
755  if (Base.getOpcode() == ISD::FrameIndex) {
756  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
757  const TargetLowering *TLI = getTargetLowering();
758  Base = CurDAG->getTargetFrameIndex(
759  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
760  }
761  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
762  return true;
763  }
764  }
765  return false;
766 }
767 
768 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
769  SDLoc dl(N);
770  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
771  SDValue ImpDef = SDValue(
772  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
773  MachineSDNode *Node = CurDAG->getMachineNode(
774  TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
775  return SDValue(Node, 0);
776 }
777 
778 /// \brief Check if the given SHL node (\p N), can be used to form an
779 /// extended register for an addressing mode.
780 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
781  bool WantExtend, SDValue &Offset,
782  SDValue &SignExtend) {
783  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
785  if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
786  return false;
787 
788  SDLoc dl(N);
789  if (WantExtend) {
791  getExtendTypeForNode(N.getOperand(0), true);
793  return false;
794 
795  Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
796  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
797  MVT::i32);
798  } else {
799  Offset = N.getOperand(0);
800  SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
801  }
802 
803  unsigned LegalShiftVal = Log2_32(Size);
804  unsigned ShiftVal = CSD->getZExtValue();
805 
806  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
807  return false;
808 
809  return isWorthFolding(N);
810 }
811 
812 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
813  SDValue &Base, SDValue &Offset,
814  SDValue &SignExtend,
815  SDValue &DoShift) {
816  if (N.getOpcode() != ISD::ADD)
817  return false;
818  SDValue LHS = N.getOperand(0);
819  SDValue RHS = N.getOperand(1);
820  SDLoc dl(N);
821 
822  // We don't want to match immediate adds here, because they are better lowered
823  // to the register-immediate addressing modes.
824  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
825  return false;
826 
827  // Check if this particular node is reused in any non-memory related
828  // operation. If yes, do not try to fold this node into the address
829  // computation, since the computation will be kept.
830  const SDNode *Node = N.getNode();
831  for (SDNode *UI : Node->uses()) {
832  if (!isa<MemSDNode>(*UI))
833  return false;
834  }
835 
836  // Remember if it is worth folding N when it produces extended register.
837  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
838 
839  // Try to match a shifted extend on the RHS.
840  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
841  SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
842  Base = LHS;
843  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
844  return true;
845  }
846 
847  // Try to match a shifted extend on the LHS.
848  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
849  SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
850  Base = RHS;
851  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
852  return true;
853  }
854 
855  // There was no shift, whatever else we find.
856  DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
857 
859  // Try to match an unshifted extend on the LHS.
860  if (IsExtendedRegisterWorthFolding &&
861  (Ext = getExtendTypeForNode(LHS, true)) !=
863  Base = RHS;
864  Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
865  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
866  MVT::i32);
867  if (isWorthFolding(LHS))
868  return true;
869  }
870 
871  // Try to match an unshifted extend on the RHS.
872  if (IsExtendedRegisterWorthFolding &&
873  (Ext = getExtendTypeForNode(RHS, true)) !=
875  Base = LHS;
876  Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
877  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
878  MVT::i32);
879  if (isWorthFolding(RHS))
880  return true;
881  }
882 
883  return false;
884 }
885 
886 // Check if the given immediate is preferred by ADD. If an immediate can be
887 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
888 // encoded by one MOVZ, return true.
889 static bool isPreferredADD(int64_t ImmOff) {
890  // Constant in [0x0, 0xfff] can be encoded in ADD.
891  if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
892  return true;
893  // Check if it can be encoded in an "ADD LSL #12".
894  if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
895  // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
896  return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
897  (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
898  return false;
899 }
900 
901 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
902  SDValue &Base, SDValue &Offset,
903  SDValue &SignExtend,
904  SDValue &DoShift) {
905  if (N.getOpcode() != ISD::ADD)
906  return false;
907  SDValue LHS = N.getOperand(0);
908  SDValue RHS = N.getOperand(1);
909  SDLoc DL(N);
910 
911  // Check if this particular node is reused in any non-memory related
912  // operation. If yes, do not try to fold this node into the address
913  // computation, since the computation will be kept.
914  const SDNode *Node = N.getNode();
915  for (SDNode *UI : Node->uses()) {
916  if (!isa<MemSDNode>(*UI))
917  return false;
918  }
919 
920  // Watch out if RHS is a wide immediate, it can not be selected into
921  // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
922  // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
923  // instructions like:
924  // MOV X0, WideImmediate
925  // ADD X1, BaseReg, X0
926  // LDR X2, [X1, 0]
927  // For such situation, using [BaseReg, XReg] addressing mode can save one
928  // ADD/SUB:
929  // MOV X0, WideImmediate
930  // LDR X2, [BaseReg, X0]
931  if (isa<ConstantSDNode>(RHS)) {
932  int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
933  unsigned Scale = Log2_32(Size);
934  // Skip the immediate can be selected by load/store addressing mode.
935  // Also skip the immediate can be encoded by a single ADD (SUB is also
936  // checked by using -ImmOff).
937  if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
938  isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
939  return false;
940 
941  SDValue Ops[] = { RHS };
942  SDNode *MOVI =
943  CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
944  SDValue MOVIV = SDValue(MOVI, 0);
945  // This ADD of two X register will be selected into [Reg+Reg] mode.
946  N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
947  }
948 
949  // Remember if it is worth folding N when it produces extended register.
950  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
951 
952  // Try to match a shifted extend on the RHS.
953  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
954  SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
955  Base = LHS;
956  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
957  return true;
958  }
959 
960  // Try to match a shifted extend on the LHS.
961  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
962  SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
963  Base = RHS;
964  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
965  return true;
966  }
967 
968  // Match any non-shifted, non-extend, non-immediate add expression.
969  Base = LHS;
970  Offset = RHS;
971  SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
972  DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
973  // Reg1 + Reg2 is free: no check needed.
974  return true;
975 }
976 
977 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
978  static const unsigned RegClassIDs[] = {
979  AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
980  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
981  AArch64::dsub2, AArch64::dsub3};
982 
983  return createTuple(Regs, RegClassIDs, SubRegs);
984 }
985 
986 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
987  static const unsigned RegClassIDs[] = {
988  AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
989  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
990  AArch64::qsub2, AArch64::qsub3};
991 
992  return createTuple(Regs, RegClassIDs, SubRegs);
993 }
994 
995 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
996  const unsigned RegClassIDs[],
997  const unsigned SubRegs[]) {
998  // There's no special register-class for a vector-list of 1 element: it's just
999  // a vector.
1000  if (Regs.size() == 1)
1001  return Regs[0];
1002 
1003  assert(Regs.size() >= 2 && Regs.size() <= 4);
1004 
1005  SDLoc DL(Regs[0]);
1006 
1008 
1009  // First operand of REG_SEQUENCE is the desired RegClass.
1010  Ops.push_back(
1011  CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1012 
1013  // Then we get pairs of source & subregister-position for the components.
1014  for (unsigned i = 0; i < Regs.size(); ++i) {
1015  Ops.push_back(Regs[i]);
1016  Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1017  }
1018 
1019  SDNode *N =
1020  CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1021  return SDValue(N, 0);
1022 }
1023 
1024 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1025  bool isExt) {
1026  SDLoc dl(N);
1027  EVT VT = N->getValueType(0);
1028 
1029  unsigned ExtOff = isExt;
1030 
1031  // Form a REG_SEQUENCE to force register allocation.
1032  unsigned Vec0Off = ExtOff + 1;
1033  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1034  N->op_begin() + Vec0Off + NumVecs);
1035  SDValue RegSeq = createQTuple(Regs);
1036 
1038  if (isExt)
1039  Ops.push_back(N->getOperand(1));
1040  Ops.push_back(RegSeq);
1041  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1042  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1043 }
1044 
1045 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1046  LoadSDNode *LD = cast<LoadSDNode>(N);
1047  if (LD->isUnindexed())
1048  return false;
1049  EVT VT = LD->getMemoryVT();
1050  EVT DstVT = N->getValueType(0);
1052  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1053 
1054  // We're not doing validity checking here. That was done when checking
1055  // if we should mark the load as indexed or not. We're just selecting
1056  // the right instruction.
1057  unsigned Opcode = 0;
1058 
1059  ISD::LoadExtType ExtType = LD->getExtensionType();
1060  bool InsertTo64 = false;
1061  if (VT == MVT::i64)
1062  Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1063  else if (VT == MVT::i32) {
1064  if (ExtType == ISD::NON_EXTLOAD)
1065  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1066  else if (ExtType == ISD::SEXTLOAD)
1067  Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1068  else {
1069  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1070  InsertTo64 = true;
1071  // The result of the load is only i32. It's the subreg_to_reg that makes
1072  // it into an i64.
1073  DstVT = MVT::i32;
1074  }
1075  } else if (VT == MVT::i16) {
1076  if (ExtType == ISD::SEXTLOAD) {
1077  if (DstVT == MVT::i64)
1078  Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1079  else
1080  Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1081  } else {
1082  Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1083  InsertTo64 = DstVT == MVT::i64;
1084  // The result of the load is only i32. It's the subreg_to_reg that makes
1085  // it into an i64.
1086  DstVT = MVT::i32;
1087  }
1088  } else if (VT == MVT::i8) {
1089  if (ExtType == ISD::SEXTLOAD) {
1090  if (DstVT == MVT::i64)
1091  Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1092  else
1093  Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1094  } else {
1095  Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1096  InsertTo64 = DstVT == MVT::i64;
1097  // The result of the load is only i32. It's the subreg_to_reg that makes
1098  // it into an i64.
1099  DstVT = MVT::i32;
1100  }
1101  } else if (VT == MVT::f16) {
1102  Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1103  } else if (VT == MVT::f32) {
1104  Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1105  } else if (VT == MVT::f64 || VT.is64BitVector()) {
1106  Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1107  } else if (VT.is128BitVector()) {
1108  Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1109  } else
1110  return false;
1111  SDValue Chain = LD->getChain();
1112  SDValue Base = LD->getBasePtr();
1113  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1114  int OffsetVal = (int)OffsetOp->getZExtValue();
1115  SDLoc dl(N);
1116  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1117  SDValue Ops[] = { Base, Offset, Chain };
1118  SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1119  MVT::Other, Ops);
1120  // Either way, we're replacing the node, so tell the caller that.
1121  SDValue LoadedVal = SDValue(Res, 1);
1122  if (InsertTo64) {
1123  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1124  LoadedVal =
1125  SDValue(CurDAG->getMachineNode(
1126  AArch64::SUBREG_TO_REG, dl, MVT::i64,
1127  CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1128  SubReg),
1129  0);
1130  }
1131 
1132  ReplaceUses(SDValue(N, 0), LoadedVal);
1133  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1134  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1135  CurDAG->RemoveDeadNode(N);
1136  return true;
1137 }
1138 
1139 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1140  unsigned SubRegIdx) {
1141  SDLoc dl(N);
1142  EVT VT = N->getValueType(0);
1143  SDValue Chain = N->getOperand(0);
1144 
1145  SDValue Ops[] = {N->getOperand(2), // Mem operand;
1146  Chain};
1147 
1148  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1149 
1150  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1151  SDValue SuperReg = SDValue(Ld, 0);
1152  for (unsigned i = 0; i < NumVecs; ++i)
1153  ReplaceUses(SDValue(N, i),
1154  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1155 
1156  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1157 
1158  // Transfer memoperands.
1159  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1160  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1161  cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
1162 
1163  CurDAG->RemoveDeadNode(N);
1164 }
1165 
1166 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1167  unsigned Opc, unsigned SubRegIdx) {
1168  SDLoc dl(N);
1169  EVT VT = N->getValueType(0);
1170  SDValue Chain = N->getOperand(0);
1171 
1172  SDValue Ops[] = {N->getOperand(1), // Mem operand
1173  N->getOperand(2), // Incremental
1174  Chain};
1175 
1176  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1178 
1179  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1180 
1181  // Update uses of write back register
1182  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1183 
1184  // Update uses of vector list
1185  SDValue SuperReg = SDValue(Ld, 1);
1186  if (NumVecs == 1)
1187  ReplaceUses(SDValue(N, 0), SuperReg);
1188  else
1189  for (unsigned i = 0; i < NumVecs; ++i)
1190  ReplaceUses(SDValue(N, i),
1191  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1192 
1193  // Update the chain
1194  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1195  CurDAG->RemoveDeadNode(N);
1196 }
1197 
1198 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1199  unsigned Opc) {
1200  SDLoc dl(N);
1201  EVT VT = N->getOperand(2)->getValueType(0);
1202 
1203  // Form a REG_SEQUENCE to force register allocation.
1204  bool Is128Bit = VT.getSizeInBits() == 128;
1205  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1206  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1207 
1208  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1209  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1210 
1211  // Transfer memoperands.
1212  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1213  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1214  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1215 
1216  ReplaceNode(N, St);
1217 }
1218 
1219 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1220  unsigned Opc) {
1221  SDLoc dl(N);
1222  EVT VT = N->getOperand(2)->getValueType(0);
1223  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1224  MVT::Other}; // Type for the Chain
1225 
1226  // Form a REG_SEQUENCE to force register allocation.
1227  bool Is128Bit = VT.getSizeInBits() == 128;
1228  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1229  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1230 
1231  SDValue Ops[] = {RegSeq,
1232  N->getOperand(NumVecs + 1), // base register
1233  N->getOperand(NumVecs + 2), // Incremental
1234  N->getOperand(0)}; // Chain
1235  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1236 
1237  ReplaceNode(N, St);
1238 }
1239 
1240 namespace {
1241 /// WidenVector - Given a value in the V64 register class, produce the
1242 /// equivalent value in the V128 register class.
1243 class WidenVector {
1244  SelectionDAG &DAG;
1245 
1246 public:
1247  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1248 
1249  SDValue operator()(SDValue V64Reg) {
1250  EVT VT = V64Reg.getValueType();
1251  unsigned NarrowSize = VT.getVectorNumElements();
1252  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1253  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1254  SDLoc DL(V64Reg);
1255 
1256  SDValue Undef =
1257  SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1258  return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1259  }
1260 };
1261 } // namespace
1262 
1263 /// NarrowVector - Given a value in the V128 register class, produce the
1264 /// equivalent value in the V64 register class.
1265 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1266  EVT VT = V128Reg.getValueType();
1267  unsigned WideSize = VT.getVectorNumElements();
1268  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1269  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1270 
1271  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1272  V128Reg);
1273 }
1274 
1275 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1276  unsigned Opc) {
1277  SDLoc dl(N);
1278  EVT VT = N->getValueType(0);
1279  bool Narrow = VT.getSizeInBits() == 64;
1280 
1281  // Form a REG_SEQUENCE to force register allocation.
1282  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1283 
1284  if (Narrow)
1285  transform(Regs, Regs.begin(),
1286  WidenVector(*CurDAG));
1287 
1288  SDValue RegSeq = createQTuple(Regs);
1289 
1290  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1291 
1292  unsigned LaneNo =
1293  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1294 
1295  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1296  N->getOperand(NumVecs + 3), N->getOperand(0)};
1297  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1298  SDValue SuperReg = SDValue(Ld, 0);
1299 
1300  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1301  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1302  AArch64::qsub2, AArch64::qsub3 };
1303  for (unsigned i = 0; i < NumVecs; ++i) {
1304  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1305  if (Narrow)
1306  NV = NarrowVector(NV, *CurDAG);
1307  ReplaceUses(SDValue(N, i), NV);
1308  }
1309 
1310  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1311  CurDAG->RemoveDeadNode(N);
1312 }
1313 
1314 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1315  unsigned Opc) {
1316  SDLoc dl(N);
1317  EVT VT = N->getValueType(0);
1318  bool Narrow = VT.getSizeInBits() == 64;
1319 
1320  // Form a REG_SEQUENCE to force register allocation.
1321  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1322 
1323  if (Narrow)
1324  transform(Regs, Regs.begin(),
1325  WidenVector(*CurDAG));
1326 
1327  SDValue RegSeq = createQTuple(Regs);
1328 
1329  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1330  RegSeq->getValueType(0), MVT::Other};
1331 
1332  unsigned LaneNo =
1333  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1334 
1335  SDValue Ops[] = {RegSeq,
1336  CurDAG->getTargetConstant(LaneNo, dl,
1337  MVT::i64), // Lane Number
1338  N->getOperand(NumVecs + 2), // Base register
1339  N->getOperand(NumVecs + 3), // Incremental
1340  N->getOperand(0)};
1341  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1342 
1343  // Update uses of the write back register
1344  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1345 
1346  // Update uses of the vector list
1347  SDValue SuperReg = SDValue(Ld, 1);
1348  if (NumVecs == 1) {
1349  ReplaceUses(SDValue(N, 0),
1350  Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1351  } else {
1352  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1353  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1354  AArch64::qsub2, AArch64::qsub3 };
1355  for (unsigned i = 0; i < NumVecs; ++i) {
1356  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1357  SuperReg);
1358  if (Narrow)
1359  NV = NarrowVector(NV, *CurDAG);
1360  ReplaceUses(SDValue(N, i), NV);
1361  }
1362  }
1363 
1364  // Update the Chain
1365  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1366  CurDAG->RemoveDeadNode(N);
1367 }
1368 
1369 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1370  unsigned Opc) {
1371  SDLoc dl(N);
1372  EVT VT = N->getOperand(2)->getValueType(0);
1373  bool Narrow = VT.getSizeInBits() == 64;
1374 
1375  // Form a REG_SEQUENCE to force register allocation.
1376  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1377 
1378  if (Narrow)
1379  transform(Regs, Regs.begin(),
1380  WidenVector(*CurDAG));
1381 
1382  SDValue RegSeq = createQTuple(Regs);
1383 
1384  unsigned LaneNo =
1385  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1386 
1387  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1388  N->getOperand(NumVecs + 3), N->getOperand(0)};
1389  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1390 
1391  // Transfer memoperands.
1392  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1393  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1394  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1395 
1396  ReplaceNode(N, St);
1397 }
1398 
1399 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1400  unsigned Opc) {
1401  SDLoc dl(N);
1402  EVT VT = N->getOperand(2)->getValueType(0);
1403  bool Narrow = VT.getSizeInBits() == 64;
1404 
1405  // Form a REG_SEQUENCE to force register allocation.
1406  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1407 
1408  if (Narrow)
1409  transform(Regs, Regs.begin(),
1410  WidenVector(*CurDAG));
1411 
1412  SDValue RegSeq = createQTuple(Regs);
1413 
1414  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1415  MVT::Other};
1416 
1417  unsigned LaneNo =
1418  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1419 
1420  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1421  N->getOperand(NumVecs + 2), // Base Register
1422  N->getOperand(NumVecs + 3), // Incremental
1423  N->getOperand(0)};
1424  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1425 
1426  // Transfer memoperands.
1427  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1428  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1429  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1430 
1431  ReplaceNode(N, St);
1432 }
1433 
1435  unsigned &Opc, SDValue &Opd0,
1436  unsigned &LSB, unsigned &MSB,
1437  unsigned NumberOfIgnoredLowBits,
1438  bool BiggerPattern) {
1439  assert(N->getOpcode() == ISD::AND &&
1440  "N must be a AND operation to call this function");
1441 
1442  EVT VT = N->getValueType(0);
1443 
1444  // Here we can test the type of VT and return false when the type does not
1445  // match, but since it is done prior to that call in the current context
1446  // we turned that into an assert to avoid redundant code.
1447  assert((VT == MVT::i32 || VT == MVT::i64) &&
1448  "Type checking must have been done before calling this function");
1449 
1450  // FIXME: simplify-demanded-bits in DAGCombine will probably have
1451  // changed the AND node to a 32-bit mask operation. We'll have to
1452  // undo that as part of the transform here if we want to catch all
1453  // the opportunities.
1454  // Currently the NumberOfIgnoredLowBits argument helps to recover
1455  // form these situations when matching bigger pattern (bitfield insert).
1456 
1457  // For unsigned extracts, check for a shift right and mask
1458  uint64_t AndImm = 0;
1459  if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
1460  return false;
1461 
1462  const SDNode *Op0 = N->getOperand(0).getNode();
1463 
1464  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1465  // simplified. Try to undo that
1466  AndImm |= (1 << NumberOfIgnoredLowBits) - 1;
1467 
1468  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1469  if (AndImm & (AndImm + 1))
1470  return false;
1471 
1472  bool ClampMSB = false;
1473  uint64_t SrlImm = 0;
1474  // Handle the SRL + ANY_EXTEND case.
1475  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1476  isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
1477  // Extend the incoming operand of the SRL to 64-bit.
1478  Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1479  // Make sure to clamp the MSB so that we preserve the semantics of the
1480  // original operations.
1481  ClampMSB = true;
1482  } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1484  SrlImm)) {
1485  // If the shift result was truncated, we can still combine them.
1486  Opd0 = Op0->getOperand(0).getOperand(0);
1487 
1488  // Use the type of SRL node.
1489  VT = Opd0->getValueType(0);
1490  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
1491  Opd0 = Op0->getOperand(0);
1492  } else if (BiggerPattern) {
1493  // Let's pretend a 0 shift right has been performed.
1494  // The resulting code will be at least as good as the original one
1495  // plus it may expose more opportunities for bitfield insert pattern.
1496  // FIXME: Currently we limit this to the bigger pattern, because
1497  // some optimizations expect AND and not UBFM.
1498  Opd0 = N->getOperand(0);
1499  } else
1500  return false;
1501 
1502  // Bail out on large immediates. This happens when no proper
1503  // combining/constant folding was performed.
1504  if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
1505  DEBUG((dbgs() << N
1506  << ": Found large shift immediate, this should not happen\n"));
1507  return false;
1508  }
1509 
1510  LSB = SrlImm;
1511  MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
1512  : countTrailingOnes<uint64_t>(AndImm)) -
1513  1;
1514  if (ClampMSB)
1515  // Since we're moving the extend before the right shift operation, we need
1516  // to clamp the MSB to make sure we don't shift in undefined bits instead of
1517  // the zeros which would get shifted in with the original right shift
1518  // operation.
1519  MSB = MSB > 31 ? 31 : MSB;
1520 
1521  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1522  return true;
1523 }
1524 
1525 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
1526  SDValue &Opd0, unsigned &Immr,
1527  unsigned &Imms) {
1529 
1530  EVT VT = N->getValueType(0);
1531  unsigned BitWidth = VT.getSizeInBits();
1532  assert((VT == MVT::i32 || VT == MVT::i64) &&
1533  "Type checking must have been done before calling this function");
1534 
1535  SDValue Op = N->getOperand(0);
1536  if (Op->getOpcode() == ISD::TRUNCATE) {
1537  Op = Op->getOperand(0);
1538  VT = Op->getValueType(0);
1539  BitWidth = VT.getSizeInBits();
1540  }
1541 
1542  uint64_t ShiftImm;
1543  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
1544  !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1545  return false;
1546 
1547  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1548  if (ShiftImm + Width > BitWidth)
1549  return false;
1550 
1551  Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
1552  Opd0 = Op.getOperand(0);
1553  Immr = ShiftImm;
1554  Imms = ShiftImm + Width - 1;
1555  return true;
1556 }
1557 
1558 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
1559  SDValue &Opd0, unsigned &LSB,
1560  unsigned &MSB) {
1561  // We are looking for the following pattern which basically extracts several
1562  // continuous bits from the source value and places it from the LSB of the
1563  // destination value, all other bits of the destination value or set to zero:
1564  //
1565  // Value2 = AND Value, MaskImm
1566  // SRL Value2, ShiftImm
1567  //
1568  // with MaskImm >> ShiftImm to search for the bit width.
1569  //
1570  // This gets selected into a single UBFM:
1571  //
1572  // UBFM Value, ShiftImm, BitWide + SrlImm -1
1573  //
1574 
1575  if (N->getOpcode() != ISD::SRL)
1576  return false;
1577 
1578  uint64_t AndMask = 0;
1579  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
1580  return false;
1581 
1582  Opd0 = N->getOperand(0).getOperand(0);
1583 
1584  uint64_t SrlImm = 0;
1585  if (!isIntImmediate(N->getOperand(1), SrlImm))
1586  return false;
1587 
1588  // Check whether we really have several bits extract here.
1589  unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
1590  if (BitWide && isMask_64(AndMask >> SrlImm)) {
1591  if (N->getValueType(0) == MVT::i32)
1592  Opc = AArch64::UBFMWri;
1593  else
1594  Opc = AArch64::UBFMXri;
1595 
1596  LSB = SrlImm;
1597  MSB = BitWide + SrlImm - 1;
1598  return true;
1599  }
1600 
1601  return false;
1602 }
1603 
1604 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1605  unsigned &Immr, unsigned &Imms,
1606  bool BiggerPattern) {
1607  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1608  "N must be a SHR/SRA operation to call this function");
1609 
1610  EVT VT = N->getValueType(0);
1611 
1612  // Here we can test the type of VT and return false when the type does not
1613  // match, but since it is done prior to that call in the current context
1614  // we turned that into an assert to avoid redundant code.
1615  assert((VT == MVT::i32 || VT == MVT::i64) &&
1616  "Type checking must have been done before calling this function");
1617 
1618  // Check for AND + SRL doing several bits extract.
1619  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
1620  return true;
1621 
1622  // We're looking for a shift of a shift.
1623  uint64_t ShlImm = 0;
1624  uint64_t TruncBits = 0;
1625  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
1626  Opd0 = N->getOperand(0).getOperand(0);
1627  } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1628  N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1629  // We are looking for a shift of truncate. Truncate from i64 to i32 could
1630  // be considered as setting high 32 bits as zero. Our strategy here is to
1631  // always generate 64bit UBFM. This consistency will help the CSE pass
1632  // later find more redundancy.
1633  Opd0 = N->getOperand(0).getOperand(0);
1634  TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1635  VT = Opd0->getValueType(0);
1636  assert(VT == MVT::i64 && "the promoted type should be i64");
1637  } else if (BiggerPattern) {
1638  // Let's pretend a 0 shift left has been performed.
1639  // FIXME: Currently we limit this to the bigger pattern case,
1640  // because some optimizations expect AND and not UBFM
1641  Opd0 = N->getOperand(0);
1642  } else
1643  return false;
1644 
1645  // Missing combines/constant folding may have left us with strange
1646  // constants.
1647  if (ShlImm >= VT.getSizeInBits()) {
1648  DEBUG((dbgs() << N
1649  << ": Found large shift immediate, this should not happen\n"));
1650  return false;
1651  }
1652 
1653  uint64_t SrlImm = 0;
1654  if (!isIntImmediate(N->getOperand(1), SrlImm))
1655  return false;
1656 
1657  assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
1658  "bad amount in shift node!");
1659  int immr = SrlImm - ShlImm;
1660  Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
1661  Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
1662  // SRA requires a signed extraction
1663  if (VT == MVT::i32)
1664  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1665  else
1666  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1667  return true;
1668 }
1669 
1670 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
1672 
1673  EVT VT = N->getValueType(0);
1674  EVT NarrowVT = N->getOperand(0)->getValueType(0);
1675  if (VT != MVT::i64 || NarrowVT != MVT::i32)
1676  return false;
1677 
1678  uint64_t ShiftImm;
1679  SDValue Op = N->getOperand(0);
1680  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1681  return false;
1682 
1683  SDLoc dl(N);
1684  // Extend the incoming operand of the shift to 64-bits.
1685  SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
1686  unsigned Immr = ShiftImm;
1687  unsigned Imms = NarrowVT.getSizeInBits() - 1;
1688  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1689  CurDAG->getTargetConstant(Imms, dl, VT)};
1690  CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
1691  return true;
1692 }
1693 
1694 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
1695  SDValue &Opd0, unsigned &Immr, unsigned &Imms,
1696  unsigned NumberOfIgnoredLowBits = 0,
1697  bool BiggerPattern = false) {
1698  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
1699  return false;
1700 
1701  switch (N->getOpcode()) {
1702  default:
1703  if (!N->isMachineOpcode())
1704  return false;
1705  break;
1706  case ISD::AND:
1707  return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
1708  NumberOfIgnoredLowBits, BiggerPattern);
1709  case ISD::SRL:
1710  case ISD::SRA:
1711  return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
1712 
1714  return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
1715  }
1716 
1717  unsigned NOpc = N->getMachineOpcode();
1718  switch (NOpc) {
1719  default:
1720  return false;
1721  case AArch64::SBFMWri:
1722  case AArch64::UBFMWri:
1723  case AArch64::SBFMXri:
1724  case AArch64::UBFMXri:
1725  Opc = NOpc;
1726  Opd0 = N->getOperand(0);
1727  Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
1728  Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
1729  return true;
1730  }
1731  // Unreachable
1732  return false;
1733 }
1734 
1735 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
1736  unsigned Opc, Immr, Imms;
1737  SDValue Opd0;
1738  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
1739  return false;
1740 
1741  EVT VT = N->getValueType(0);
1742  SDLoc dl(N);
1743 
1744  // If the bit extract operation is 64bit but the original type is 32bit, we
1745  // need to add one EXTRACT_SUBREG.
1746  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
1747  SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
1748  CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
1749 
1750  SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
1751  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1752  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
1753  MVT::i32, SDValue(BFM, 0), SubReg));
1754  return true;
1755  }
1756 
1757  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1758  CurDAG->getTargetConstant(Imms, dl, VT)};
1759  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1760  return true;
1761 }
1762 
1763 /// Does DstMask form a complementary pair with the mask provided by
1764 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
1765 /// this asks whether DstMask zeroes precisely those bits that will be set by
1766 /// the other half.
1767 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
1768  unsigned NumberOfIgnoredHighBits, EVT VT) {
1769  assert((VT == MVT::i32 || VT == MVT::i64) &&
1770  "i32 or i64 mask type expected!");
1771  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
1772 
1773  APInt SignificantDstMask = APInt(BitWidth, DstMask);
1774  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
1775 
1776  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
1777  (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
1778 }
1779 
1780 // Look for bits that will be useful for later uses.
1781 // A bit is consider useless as soon as it is dropped and never used
1782 // before it as been dropped.
1783 // E.g., looking for useful bit of x
1784 // 1. y = x & 0x7
1785 // 2. z = y >> 2
1786 // After #1, x useful bits are 0x7, then the useful bits of x, live through
1787 // y.
1788 // After #2, the useful bits of x are 0x4.
1789 // However, if x is used on an unpredicatable instruction, then all its bits
1790 // are useful.
1791 // E.g.
1792 // 1. y = x & 0x7
1793 // 2. z = y >> 2
1794 // 3. str x, [@x]
1795 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
1796 
1798  unsigned Depth) {
1799  uint64_t Imm =
1800  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1801  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
1802  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
1803  getUsefulBits(Op, UsefulBits, Depth + 1);
1804 }
1805 
1806 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
1807  uint64_t Imm, uint64_t MSB,
1808  unsigned Depth) {
1809  // inherit the bitwidth value
1810  APInt OpUsefulBits(UsefulBits);
1811  OpUsefulBits = 1;
1812 
1813  if (MSB >= Imm) {
1814  OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
1815  --OpUsefulBits;
1816  // The interesting part will be in the lower part of the result
1817  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1818  // The interesting part was starting at Imm in the argument
1819  OpUsefulBits = OpUsefulBits.shl(Imm);
1820  } else {
1821  OpUsefulBits = OpUsefulBits.shl(MSB + 1);
1822  --OpUsefulBits;
1823  // The interesting part will be shifted in the result
1824  OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm);
1825  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1826  // The interesting part was at zero in the argument
1827  OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm);
1828  }
1829 
1830  UsefulBits &= OpUsefulBits;
1831 }
1832 
1833 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
1834  unsigned Depth) {
1835  uint64_t Imm =
1836  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1837  uint64_t MSB =
1838  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1839 
1840  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1841 }
1842 
1844  unsigned Depth) {
1845  uint64_t ShiftTypeAndValue =
1846  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1847  APInt Mask(UsefulBits);
1848  Mask.clearAllBits();
1849  Mask.flipAllBits();
1850 
1851  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
1852  // Shift Left
1853  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1854  Mask = Mask.shl(ShiftAmt);
1855  getUsefulBits(Op, Mask, Depth + 1);
1856  Mask = Mask.lshr(ShiftAmt);
1857  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
1858  // Shift Right
1859  // We do not handle AArch64_AM::ASR, because the sign will change the
1860  // number of useful bits
1861  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1862  Mask = Mask.lshr(ShiftAmt);
1863  getUsefulBits(Op, Mask, Depth + 1);
1864  Mask = Mask.shl(ShiftAmt);
1865  } else
1866  return;
1867 
1868  UsefulBits &= Mask;
1869 }
1870 
1871 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
1872  unsigned Depth) {
1873  uint64_t Imm =
1874  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1875  uint64_t MSB =
1876  cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
1877 
1878  APInt OpUsefulBits(UsefulBits);
1879  OpUsefulBits = 1;
1880 
1881  APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
1882  ResultUsefulBits.flipAllBits();
1883  APInt Mask(UsefulBits.getBitWidth(), 0);
1884 
1885  getUsefulBits(Op, ResultUsefulBits, Depth + 1);
1886 
1887  if (MSB >= Imm) {
1888  // The instruction is a BFXIL.
1889  uint64_t Width = MSB - Imm + 1;
1890  uint64_t LSB = Imm;
1891 
1892  OpUsefulBits = OpUsefulBits.shl(Width);
1893  --OpUsefulBits;
1894 
1895  if (Op.getOperand(1) == Orig) {
1896  // Copy the low bits from the result to bits starting from LSB.
1897  Mask = ResultUsefulBits & OpUsefulBits;
1898  Mask = Mask.shl(LSB);
1899  }
1900 
1901  if (Op.getOperand(0) == Orig)
1902  // Bits starting from LSB in the input contribute to the result.
1903  Mask |= (ResultUsefulBits & ~OpUsefulBits);
1904  } else {
1905  // The instruction is a BFI.
1906  uint64_t Width = MSB + 1;
1907  uint64_t LSB = UsefulBits.getBitWidth() - Imm;
1908 
1909  OpUsefulBits = OpUsefulBits.shl(Width);
1910  --OpUsefulBits;
1911  OpUsefulBits = OpUsefulBits.shl(LSB);
1912 
1913  if (Op.getOperand(1) == Orig) {
1914  // Copy the bits from the result to the zero bits.
1915  Mask = ResultUsefulBits & OpUsefulBits;
1916  Mask = Mask.lshr(LSB);
1917  }
1918 
1919  if (Op.getOperand(0) == Orig)
1920  Mask |= (ResultUsefulBits & ~OpUsefulBits);
1921  }
1922 
1923  UsefulBits &= Mask;
1924 }
1925 
1926 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
1927  SDValue Orig, unsigned Depth) {
1928 
1929  // Users of this node should have already been instruction selected
1930  // FIXME: Can we turn that into an assert?
1931  if (!UserNode->isMachineOpcode())
1932  return;
1933 
1934  switch (UserNode->getMachineOpcode()) {
1935  default:
1936  return;
1937  case AArch64::ANDSWri:
1938  case AArch64::ANDSXri:
1939  case AArch64::ANDWri:
1940  case AArch64::ANDXri:
1941  // We increment Depth only when we call the getUsefulBits
1942  return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
1943  Depth);
1944  case AArch64::UBFMWri:
1945  case AArch64::UBFMXri:
1946  return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
1947 
1948  case AArch64::ORRWrs:
1949  case AArch64::ORRXrs:
1950  if (UserNode->getOperand(1) != Orig)
1951  return;
1952  return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
1953  Depth);
1954  case AArch64::BFMWri:
1955  case AArch64::BFMXri:
1956  return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
1957 
1958  case AArch64::STRBBui:
1959  case AArch64::STURBBi:
1960  if (UserNode->getOperand(0) != Orig)
1961  return;
1962  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
1963  return;
1964 
1965  case AArch64::STRHHui:
1966  case AArch64::STURHHi:
1967  if (UserNode->getOperand(0) != Orig)
1968  return;
1969  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
1970  return;
1971  }
1972 }
1973 
1974 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
1975  if (Depth >= 6)
1976  return;
1977  // Initialize UsefulBits
1978  if (!Depth) {
1979  unsigned Bitwidth = Op.getScalarValueSizeInBits();
1980  // At the beginning, assume every produced bits is useful
1981  UsefulBits = APInt(Bitwidth, 0);
1982  UsefulBits.flipAllBits();
1983  }
1984  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
1985 
1986  for (SDNode *Node : Op.getNode()->uses()) {
1987  // A use cannot produce useful bits
1988  APInt UsefulBitsForUse = APInt(UsefulBits);
1989  getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
1990  UsersUsefulBits |= UsefulBitsForUse;
1991  }
1992  // UsefulBits contains the produced bits that are meaningful for the
1993  // current definition, thus a user cannot make a bit meaningful at
1994  // this point
1995  UsefulBits &= UsersUsefulBits;
1996 }
1997 
1998 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
1999 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2000 /// 0, return Op unchanged.
2001 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2002  if (ShlAmount == 0)
2003  return Op;
2004 
2005  EVT VT = Op.getValueType();
2006  SDLoc dl(Op);
2007  unsigned BitWidth = VT.getSizeInBits();
2008  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2009 
2010  SDNode *ShiftNode;
2011  if (ShlAmount > 0) {
2012  // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2013  ShiftNode = CurDAG->getMachineNode(
2014  UBFMOpc, dl, VT, Op,
2015  CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2016  CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2017  } else {
2018  // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2019  assert(ShlAmount < 0 && "expected right shift");
2020  int ShrAmount = -ShlAmount;
2021  ShiftNode = CurDAG->getMachineNode(
2022  UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2023  CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2024  }
2025 
2026  return SDValue(ShiftNode, 0);
2027 }
2028 
2029 /// Does this tree qualify as an attempt to move a bitfield into position,
2030 /// essentially "(and (shl VAL, N), Mask)".
2032  bool BiggerPattern,
2033  SDValue &Src, int &ShiftAmount,
2034  int &MaskWidth) {
2035  EVT VT = Op.getValueType();
2036  unsigned BitWidth = VT.getSizeInBits();
2037  (void)BitWidth;
2038  assert(BitWidth == 32 || BitWidth == 64);
2039 
2040  APInt KnownZero, KnownOne;
2041  CurDAG->computeKnownBits(Op, KnownZero, KnownOne);
2042 
2043  // Non-zero in the sense that they're not provably zero, which is the key
2044  // point if we want to use this value
2045  uint64_t NonZeroBits = (~KnownZero).getZExtValue();
2046 
2047  // Discard a constant AND mask if present. It's safe because the node will
2048  // already have been factored into the computeKnownBits calculation above.
2049  uint64_t AndImm;
2050  if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
2051  assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0);
2052  Op = Op.getOperand(0);
2053  }
2054 
2055  // Don't match if the SHL has more than one use, since then we'll end up
2056  // generating SHL+UBFIZ instead of just keeping SHL+AND.
2057  if (!BiggerPattern && !Op.hasOneUse())
2058  return false;
2059 
2060  uint64_t ShlImm;
2061  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2062  return false;
2063  Op = Op.getOperand(0);
2064 
2065  if (!isShiftedMask_64(NonZeroBits))
2066  return false;
2067 
2068  ShiftAmount = countTrailingZeros(NonZeroBits);
2069  MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
2070 
2071  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2072  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2073  // amount. BiggerPattern is true when this pattern is being matched for BFI,
2074  // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2075  // which case it is not profitable to insert an extra shift.
2076  if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
2077  return false;
2078  Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
2079 
2080  return true;
2081 }
2082 
2083 static bool isShiftedMask(uint64_t Mask, EVT VT) {
2084  assert(VT == MVT::i32 || VT == MVT::i64);
2085  if (VT == MVT::i32)
2086  return isShiftedMask_32(Mask);
2087  return isShiftedMask_64(Mask);
2088 }
2089 
2090 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
2091 // inserted only sets known zero bits.
2093  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2094 
2095  EVT VT = N->getValueType(0);
2096  if (VT != MVT::i32 && VT != MVT::i64)
2097  return false;
2098 
2099  unsigned BitWidth = VT.getSizeInBits();
2100 
2101  uint64_t OrImm;
2102  if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
2103  return false;
2104 
2105  // Skip this transformation if the ORR immediate can be encoded in the ORR.
2106  // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2107  // performance neutral.
2108  if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
2109  return false;
2110 
2111  uint64_t MaskImm;
2112  SDValue And = N->getOperand(0);
2113  // Must be a single use AND with an immediate operand.
2114  if (!And.hasOneUse() ||
2115  !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
2116  return false;
2117 
2118  // Compute the Known Zero for the AND as this allows us to catch more general
2119  // cases than just looking for AND with imm.
2120  APInt KnownZero, KnownOne;
2121  CurDAG->computeKnownBits(And, KnownZero, KnownOne);
2122 
2123  // Non-zero in the sense that they're not provably zero, which is the key
2124  // point if we want to use this value.
2125  uint64_t NotKnownZero = (~KnownZero).getZExtValue();
2126 
2127  // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2128  if (!isShiftedMask(KnownZero.getZExtValue(), VT))
2129  return false;
2130 
2131  // The bits being inserted must only set those bits that are known to be zero.
2132  if ((OrImm & NotKnownZero) != 0) {
2133  // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2134  // currently handle this case.
2135  return false;
2136  }
2137 
2138  // BFI/BFXIL dst, src, #lsb, #width.
2139  int LSB = countTrailingOnes(NotKnownZero);
2140  int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2141 
2142  // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2143  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2144  unsigned ImmS = Width - 1;
2145 
2146  // If we're creating a BFI instruction avoid cases where we need more
2147  // instructions to materialize the BFI constant as compared to the original
2148  // ORR. A BFXIL will use the same constant as the original ORR, so the code
2149  // should be no worse in this case.
2150  bool IsBFI = LSB != 0;
2151  uint64_t BFIImm = OrImm >> LSB;
2152  if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
2153  // We have a BFI instruction and we know the constant can't be materialized
2154  // with a ORR-immediate with the zero register.
2155  unsigned OrChunks = 0, BFIChunks = 0;
2156  for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
2157  if (((OrImm >> Shift) & 0xFFFF) != 0)
2158  ++OrChunks;
2159  if (((BFIImm >> Shift) & 0xFFFF) != 0)
2160  ++BFIChunks;
2161  }
2162  if (BFIChunks > OrChunks)
2163  return false;
2164  }
2165 
2166  // Materialize the constant to be inserted.
2167  SDLoc DL(N);
2168  unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2169  SDNode *MOVI = CurDAG->getMachineNode(
2170  MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2171 
2172  // Create the BFI/BFXIL instruction.
2173  SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2174  CurDAG->getTargetConstant(ImmR, DL, VT),
2175  CurDAG->getTargetConstant(ImmS, DL, VT)};
2176  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2177  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2178  return true;
2179 }
2180 
2181 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
2182  SelectionDAG *CurDAG) {
2183  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2184 
2185  EVT VT = N->getValueType(0);
2186  if (VT != MVT::i32 && VT != MVT::i64)
2187  return false;
2188 
2189  unsigned BitWidth = VT.getSizeInBits();
2190 
2191  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
2192  // have the expected shape. Try to undo that.
2193 
2194  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
2195  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
2196 
2197  // Given a OR operation, check if we have the following pattern
2198  // ubfm c, b, imm, imm2 (or something that does the same jobs, see
2199  // isBitfieldExtractOp)
2200  // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
2201  // countTrailingZeros(mask2) == imm2 - imm + 1
2202  // f = d | c
2203  // if yes, replace the OR instruction with:
2204  // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
2205 
2206  // OR is commutative, check all combinations of operand order and values of
2207  // BiggerPattern, i.e.
2208  // Opd0, Opd1, BiggerPattern=false
2209  // Opd1, Opd0, BiggerPattern=false
2210  // Opd0, Opd1, BiggerPattern=true
2211  // Opd1, Opd0, BiggerPattern=true
2212  // Several of these combinations may match, so check with BiggerPattern=false
2213  // first since that will produce better results by matching more instructions
2214  // and/or inserting fewer extra instructions.
2215  for (int I = 0; I < 4; ++I) {
2216 
2217  SDValue Dst, Src;
2218  unsigned ImmR, ImmS;
2219  bool BiggerPattern = I / 2;
2220  SDValue OrOpd0Val = N->getOperand(I % 2);
2221  SDNode *OrOpd0 = OrOpd0Val.getNode();
2222  SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
2223  SDNode *OrOpd1 = OrOpd1Val.getNode();
2224 
2225  unsigned BFXOpc;
2226  int DstLSB, Width;
2227  if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
2228  NumberOfIgnoredLowBits, BiggerPattern)) {
2229  // Check that the returned opcode is compatible with the pattern,
2230  // i.e., same type and zero extended (U and not S)
2231  if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
2232  (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
2233  continue;
2234 
2235  // Compute the width of the bitfield insertion
2236  DstLSB = 0;
2237  Width = ImmS - ImmR + 1;
2238  // FIXME: This constraint is to catch bitfield insertion we may
2239  // want to widen the pattern if we want to grab general bitfied
2240  // move case
2241  if (Width <= 0)
2242  continue;
2243 
2244  // If the mask on the insertee is correct, we have a BFXIL operation. We
2245  // can share the ImmR and ImmS values from the already-computed UBFM.
2246  } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
2247  BiggerPattern,
2248  Src, DstLSB, Width)) {
2249  ImmR = (BitWidth - DstLSB) % BitWidth;
2250  ImmS = Width - 1;
2251  } else
2252  continue;
2253 
2254  // Check the second part of the pattern
2255  EVT VT = OrOpd1->getValueType(0);
2256  assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
2257 
2258  // Compute the Known Zero for the candidate of the first operand.
2259  // This allows to catch more general case than just looking for
2260  // AND with imm. Indeed, simplify-demanded-bits may have removed
2261  // the AND instruction because it proves it was useless.
2262  APInt KnownZero, KnownOne;
2263  CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne);
2264 
2265  // Check if there is enough room for the second operand to appear
2266  // in the first one
2267  APInt BitsToBeInserted =
2268  APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width);
2269 
2270  if ((BitsToBeInserted & ~KnownZero) != 0)
2271  continue;
2272 
2273  // Set the first operand
2274  uint64_t Imm;
2275  if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
2276  isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
2277  // In that case, we can eliminate the AND
2278  Dst = OrOpd1->getOperand(0);
2279  else
2280  // Maybe the AND has been removed by simplify-demanded-bits
2281  // or is useful because it discards more bits
2282  Dst = OrOpd1Val;
2283 
2284  // both parts match
2285  SDLoc DL(N);
2286  SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
2287  CurDAG->getTargetConstant(ImmS, DL, VT)};
2288  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2289  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2290  return true;
2291  }
2292 
2293  // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
2294  // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
2295  // mask (e.g., 0x000ffff0).
2296  uint64_t Mask0Imm, Mask1Imm;
2297  SDValue And0 = N->getOperand(0);
2298  SDValue And1 = N->getOperand(1);
2299  if (And0.hasOneUse() && And1.hasOneUse() &&
2300  isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
2301  isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
2302  APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
2303  (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
2304 
2305  // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
2306  // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
2307  // bits to be inserted.
2308  if (isShiftedMask(Mask0Imm, VT)) {
2309  std::swap(And0, And1);
2310  std::swap(Mask0Imm, Mask1Imm);
2311  }
2312 
2313  SDValue Src = And1->getOperand(0);
2314  SDValue Dst = And0->getOperand(0);
2315  unsigned LSB = countTrailingZeros(Mask1Imm);
2316  int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
2317 
2318  // The BFXIL inserts the low-order bits from a source register, so right
2319  // shift the needed bits into place.
2320  SDLoc DL(N);
2321  unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2322  SDNode *LSR = CurDAG->getMachineNode(
2323  ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
2324  CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
2325 
2326  // BFXIL is an alias of BFM, so translate to BFM operands.
2327  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2328  unsigned ImmS = Width - 1;
2329 
2330  // Create the BFXIL instruction.
2331  SDValue Ops[] = {Dst, SDValue(LSR, 0),
2332  CurDAG->getTargetConstant(ImmR, DL, VT),
2333  CurDAG->getTargetConstant(ImmS, DL, VT)};
2334  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2335  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2336  return true;
2337  }
2338 
2339  return false;
2340 }
2341 
2342 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
2343  if (N->getOpcode() != ISD::OR)
2344  return false;
2345 
2346  APInt NUsefulBits;
2347  getUsefulBits(SDValue(N, 0), NUsefulBits);
2348 
2349  // If all bits are not useful, just return UNDEF.
2350  if (!NUsefulBits) {
2351  CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
2352  return true;
2353  }
2354 
2355  if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
2356  return true;
2357 
2358  return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
2359 }
2360 
2361 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
2362 /// equivalent of a left shift by a constant amount followed by an and masking
2363 /// out a contiguous set of bits.
2364 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
2365  if (N->getOpcode() != ISD::AND)
2366  return false;
2367 
2368  EVT VT = N->getValueType(0);
2369  if (VT != MVT::i32 && VT != MVT::i64)
2370  return false;
2371 
2372  SDValue Op0;
2373  int DstLSB, Width;
2374  if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
2375  Op0, DstLSB, Width))
2376  return false;
2377 
2378  // ImmR is the rotate right amount.
2379  unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
2380  // ImmS is the most significant bit of the source to be moved.
2381  unsigned ImmS = Width - 1;
2382 
2383  SDLoc DL(N);
2384  SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
2385  CurDAG->getTargetConstant(ImmS, DL, VT)};
2386  unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2387  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2388  return true;
2389 }
2390 
2391 bool
2392 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2393  unsigned RegWidth) {
2394  APFloat FVal(0.0);
2395  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2396  FVal = CN->getValueAPF();
2397  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2398  // Some otherwise illegal constants are allowed in this case.
2399  if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2400  !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2401  return false;
2402 
2403  ConstantPoolSDNode *CN =
2404  dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2405  FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2406  } else
2407  return false;
2408 
2409  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2410  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2411  // x-register.
2412  //
2413  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2414  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2415  // integers.
2416  bool IsExact;
2417 
2418  // fbits is between 1 and 64 in the worst-case, which means the fmul
2419  // could have 2^64 as an actual operand. Need 65 bits of precision.
2420  APSInt IntVal(65, true);
2421  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2422 
2423  // N.b. isPowerOf2 also checks for > 0.
2424  if (!IsExact || !IntVal.isPowerOf2()) return false;
2425  unsigned FBits = IntVal.logBase2();
2426 
2427  // Checks above should have guaranteed that we haven't lost information in
2428  // finding FBits, but it must still be in range.
2429  if (FBits == 0 || FBits > RegWidth) return false;
2430 
2431  FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
2432  return true;
2433 }
2434 
2435 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
2436 // of the string and obtains the integer values from them and combines these
2437 // into a single value to be used in the MRS/MSR instruction.
2440  RegString.split(Fields, ':');
2441 
2442  if (Fields.size() == 1)
2443  return -1;
2444 
2445  assert(Fields.size() == 5
2446  && "Invalid number of fields in read register string");
2447 
2448  SmallVector<int, 5> Ops;
2449  bool AllIntFields = true;
2450 
2451  for (StringRef Field : Fields) {
2452  unsigned IntField;
2453  AllIntFields &= !Field.getAsInteger(10, IntField);
2454  Ops.push_back(IntField);
2455  }
2456 
2457  assert(AllIntFields &&
2458  "Unexpected non-integer value in special register string.");
2459 
2460  // Need to combine the integer fields of the string into a single value
2461  // based on the bit encoding of MRS/MSR instruction.
2462  return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2463  (Ops[3] << 3) | (Ops[4]);
2464 }
2465 
2466 // Lower the read_register intrinsic to an MRS instruction node if the special
2467 // register string argument is either of the form detailed in the ALCE (the
2468 // form described in getIntOperandsFromRegsterString) or is a named register
2469 // known by the MRS SysReg mapper.
2470 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
2471  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2472  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2473  SDLoc DL(N);
2474 
2475  int Reg = getIntOperandFromRegisterString(RegString->getString());
2476  if (Reg != -1) {
2477  ReplaceNode(N, CurDAG->getMachineNode(
2478  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2479  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2480  N->getOperand(0)));
2481  return true;
2482  }
2483 
2484  // Use the sysreg mapper to map the remaining possible strings to the
2485  // value for the register to be used for the instruction operand.
2486  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2487  if (TheReg && TheReg->Readable &&
2488  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2489  Reg = TheReg->Encoding;
2490  else
2491  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2492 
2493  if (Reg != -1) {
2494  ReplaceNode(N, CurDAG->getMachineNode(
2495  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2496  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2497  N->getOperand(0)));
2498  return true;
2499  }
2500 
2501  return false;
2502 }
2503 
2504 // Lower the write_register intrinsic to an MSR instruction node if the special
2505 // register string argument is either of the form detailed in the ALCE (the
2506 // form described in getIntOperandsFromRegsterString) or is a named register
2507 // known by the MSR SysReg mapper.
2508 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
2509  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2510  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2511  SDLoc DL(N);
2512 
2513  int Reg = getIntOperandFromRegisterString(RegString->getString());
2514  if (Reg != -1) {
2515  ReplaceNode(
2516  N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2517  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2518  N->getOperand(2), N->getOperand(0)));
2519  return true;
2520  }
2521 
2522  // Check if the register was one of those allowed as the pstatefield value in
2523  // the MSR (immediate) instruction. To accept the values allowed in the
2524  // pstatefield for the MSR (immediate) instruction, we also require that an
2525  // immediate value has been provided as an argument, we know that this is
2526  // the case as it has been ensured by semantic checking.
2527  auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());;
2528  if (PMapper) {
2529  assert (isa<ConstantSDNode>(N->getOperand(2))
2530  && "Expected a constant integer expression.");
2531  unsigned Reg = PMapper->Encoding;
2532  uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
2533  unsigned State;
2534  if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO) {
2535  assert(Immed < 2 && "Bad imm");
2536  State = AArch64::MSRpstateImm1;
2537  } else {
2538  assert(Immed < 16 && "Bad imm");
2539  State = AArch64::MSRpstateImm4;
2540  }
2541  ReplaceNode(N, CurDAG->getMachineNode(
2542  State, DL, MVT::Other,
2543  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2544  CurDAG->getTargetConstant(Immed, DL, MVT::i16),
2545  N->getOperand(0)));
2546  return true;
2547  }
2548 
2549  // Use the sysreg mapper to attempt to map the remaining possible strings
2550  // to the value for the register to be used for the MSR (register)
2551  // instruction operand.
2552  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2553  if (TheReg && TheReg->Writeable &&
2554  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2555  Reg = TheReg->Encoding;
2556  else
2557  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2558  if (Reg != -1) {
2559  ReplaceNode(N, CurDAG->getMachineNode(
2560  AArch64::MSR, DL, MVT::Other,
2561  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2562  N->getOperand(2), N->getOperand(0)));
2563  return true;
2564  }
2565 
2566  return false;
2567 }
2568 
2569 /// We've got special pseudo-instructions for these
2570 void AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2571  unsigned Opcode;
2572  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2573  if (MemTy == MVT::i8)
2574  Opcode = AArch64::CMP_SWAP_8;
2575  else if (MemTy == MVT::i16)
2576  Opcode = AArch64::CMP_SWAP_16;
2577  else if (MemTy == MVT::i32)
2578  Opcode = AArch64::CMP_SWAP_32;
2579  else if (MemTy == MVT::i64)
2580  Opcode = AArch64::CMP_SWAP_64;
2581  else
2582  llvm_unreachable("Unknown AtomicCmpSwap type");
2583 
2584  MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
2585  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2586  N->getOperand(0)};
2587  SDNode *CmpSwap = CurDAG->getMachineNode(
2588  Opcode, SDLoc(N),
2589  CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
2590 
2591  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2592  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2593  cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2594 
2595  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2596  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2597  CurDAG->RemoveDeadNode(N);
2598 }
2599 
2600 void AArch64DAGToDAGISel::Select(SDNode *Node) {
2601  // Dump information about the Node being selected
2602  DEBUG(errs() << "Selecting: ");
2603  DEBUG(Node->dump(CurDAG));
2604  DEBUG(errs() << "\n");
2605 
2606  // If we have a custom node, we already have selected!
2607  if (Node->isMachineOpcode()) {
2608  DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
2609  Node->setNodeId(-1);
2610  return;
2611  }
2612 
2613  // Few custom selection stuff.
2614  EVT VT = Node->getValueType(0);
2615 
2616  switch (Node->getOpcode()) {
2617  default:
2618  break;
2619 
2620  case ISD::ATOMIC_CMP_SWAP:
2621  SelectCMP_SWAP(Node);
2622  return;
2623 
2624  case ISD::READ_REGISTER:
2625  if (tryReadRegister(Node))
2626  return;
2627  break;
2628 
2629  case ISD::WRITE_REGISTER:
2630  if (tryWriteRegister(Node))
2631  return;
2632  break;
2633 
2634  case ISD::ADD:
2635  if (tryMLAV64LaneV128(Node))
2636  return;
2637  break;
2638 
2639  case ISD::LOAD: {
2640  // Try to select as an indexed load. Fall through to normal processing
2641  // if we can't.
2642  if (tryIndexedLoad(Node))
2643  return;
2644  break;
2645  }
2646 
2647  case ISD::SRL:
2648  case ISD::AND:
2649  case ISD::SRA:
2651  if (tryBitfieldExtractOp(Node))
2652  return;
2653  if (tryBitfieldInsertInZeroOp(Node))
2654  return;
2655  break;
2656 
2657  case ISD::SIGN_EXTEND:
2658  if (tryBitfieldExtractOpFromSExt(Node))
2659  return;
2660  break;
2661 
2662  case ISD::OR:
2663  if (tryBitfieldInsertOp(Node))
2664  return;
2665  break;
2666 
2667  case ISD::EXTRACT_VECTOR_ELT: {
2668  // Extracting lane zero is a special case where we can just use a plain
2669  // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
2670  // the rest of the compiler, especially the register allocator and copyi
2671  // propagation, to reason about, so is preferred when it's possible to
2672  // use it.
2673  ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
2674  // Bail and use the default Select() for non-zero lanes.
2675  if (LaneNode->getZExtValue() != 0)
2676  break;
2677  // If the element type is not the same as the result type, likewise
2678  // bail and use the default Select(), as there's more to do than just
2679  // a cross-class COPY. This catches extracts of i8 and i16 elements
2680  // since they will need an explicit zext.
2681  if (VT != Node->getOperand(0).getValueType().getVectorElementType())
2682  break;
2683  unsigned SubReg;
2684  switch (Node->getOperand(0)
2685  .getValueType()
2687  .getSizeInBits()) {
2688  default:
2689  llvm_unreachable("Unexpected vector element type!");
2690  case 64:
2691  SubReg = AArch64::dsub;
2692  break;
2693  case 32:
2694  SubReg = AArch64::ssub;
2695  break;
2696  case 16:
2697  SubReg = AArch64::hsub;
2698  break;
2699  case 8:
2700  llvm_unreachable("unexpected zext-requiring extract element!");
2701  }
2702  SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
2703  Node->getOperand(0));
2704  DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
2705  DEBUG(Extract->dumpr(CurDAG));
2706  DEBUG(dbgs() << "\n");
2707  ReplaceNode(Node, Extract.getNode());
2708  return;
2709  }
2710  case ISD::Constant: {
2711  // Materialize zero constants as copies from WZR/XZR. This allows
2712  // the coalescer to propagate these into other instructions.
2713  ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
2714  if (ConstNode->isNullValue()) {
2715  if (VT == MVT::i32) {
2716  SDValue New = CurDAG->getCopyFromReg(
2717  CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
2718  ReplaceNode(Node, New.getNode());
2719  return;
2720  } else if (VT == MVT::i64) {
2721  SDValue New = CurDAG->getCopyFromReg(
2722  CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
2723  ReplaceNode(Node, New.getNode());
2724  return;
2725  }
2726  }
2727  break;
2728  }
2729 
2730  case ISD::FrameIndex: {
2731  // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
2732  int FI = cast<FrameIndexSDNode>(Node)->getIndex();
2733  unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
2734  const TargetLowering *TLI = getTargetLowering();
2735  SDValue TFI = CurDAG->getTargetFrameIndex(
2736  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2737  SDLoc DL(Node);
2738  SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
2739  CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
2740  CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
2741  return;
2742  }
2743  case ISD::INTRINSIC_W_CHAIN: {
2744  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2745  switch (IntNo) {
2746  default:
2747  break;
2748  case Intrinsic::aarch64_ldaxp:
2749  case Intrinsic::aarch64_ldxp: {
2750  unsigned Op =
2751  IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
2752  SDValue MemAddr = Node->getOperand(2);
2753  SDLoc DL(Node);
2754  SDValue Chain = Node->getOperand(0);
2755 
2756  SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
2757  MVT::Other, MemAddr, Chain);
2758 
2759  // Transfer memoperands.
2760  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2761  MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2762  cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
2763  ReplaceNode(Node, Ld);
2764  return;
2765  }
2766  case Intrinsic::aarch64_stlxp:
2767  case Intrinsic::aarch64_stxp: {
2768  unsigned Op =
2769  IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
2770  SDLoc DL(Node);
2771  SDValue Chain = Node->getOperand(0);
2772  SDValue ValLo = Node->getOperand(2);
2773  SDValue ValHi = Node->getOperand(3);
2774  SDValue MemAddr = Node->getOperand(4);
2775 
2776  // Place arguments in the right order.
2777  SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
2778 
2779  SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
2780  // Transfer memoperands.
2781  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2782  MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2783  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
2784 
2785  ReplaceNode(Node, St);
2786  return;
2787  }
2788  case Intrinsic::aarch64_neon_ld1x2:
2789  if (VT == MVT::v8i8) {
2790  SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
2791  return;
2792  } else if (VT == MVT::v16i8) {
2793  SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
2794  return;
2795  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2796  SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
2797  return;
2798  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2799  SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
2800  return;
2801  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2802  SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
2803  return;
2804  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2805  SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
2806  return;
2807  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2808  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2809  return;
2810  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2811  SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
2812  return;
2813  }
2814  break;
2815  case Intrinsic::aarch64_neon_ld1x3:
2816  if (VT == MVT::v8i8) {
2817  SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
2818  return;
2819  } else if (VT == MVT::v16i8) {
2820  SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
2821  return;
2822  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2823  SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
2824  return;
2825  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2826  SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
2827  return;
2828  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2829  SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
2830  return;
2831  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2832  SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
2833  return;
2834  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2835  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
2836  return;
2837  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2838  SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
2839  return;
2840  }
2841  break;
2842  case Intrinsic::aarch64_neon_ld1x4:
2843  if (VT == MVT::v8i8) {
2844  SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
2845  return;
2846  } else if (VT == MVT::v16i8) {
2847  SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
2848  return;
2849  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2850  SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
2851  return;
2852  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2853  SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
2854  return;
2855  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2856  SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
2857  return;
2858  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2859  SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
2860  return;
2861  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2862  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
2863  return;
2864  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2865  SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
2866  return;
2867  }
2868  break;
2869  case Intrinsic::aarch64_neon_ld2:
2870  if (VT == MVT::v8i8) {
2871  SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
2872  return;
2873  } else if (VT == MVT::v16i8) {
2874  SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
2875  return;
2876  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2877  SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
2878  return;
2879  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2880  SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
2881  return;
2882  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2883  SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
2884  return;
2885  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2886  SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
2887  return;
2888  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2889  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2890  return;
2891  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2892  SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
2893  return;
2894  }
2895  break;
2896  case Intrinsic::aarch64_neon_ld3:
2897  if (VT == MVT::v8i8) {
2898  SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
2899  return;
2900  } else if (VT == MVT::v16i8) {
2901  SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
2902  return;
2903  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2904  SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
2905  return;
2906  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2907  SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
2908  return;
2909  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2910  SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
2911  return;
2912  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2913  SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
2914  return;
2915  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2916  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
2917  return;
2918  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2919  SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
2920  return;
2921  }
2922  break;
2923  case Intrinsic::aarch64_neon_ld4:
2924  if (VT == MVT::v8i8) {
2925  SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
2926  return;
2927  } else if (VT == MVT::v16i8) {
2928  SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
2929  return;
2930  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2931  SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
2932  return;
2933  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2934  SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
2935  return;
2936  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2937  SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
2938  return;
2939  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2940  SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
2941  return;
2942  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2943  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
2944  return;
2945  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2946  SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
2947  return;
2948  }
2949  break;
2950  case Intrinsic::aarch64_neon_ld2r:
2951  if (VT == MVT::v8i8) {
2952  SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
2953  return;
2954  } else if (VT == MVT::v16i8) {
2955  SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
2956  return;
2957  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2958  SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
2959  return;
2960  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2961  SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
2962  return;
2963  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2964  SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
2965  return;
2966  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2967  SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
2968  return;
2969  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2970  SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
2971  return;
2972  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2973  SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
2974  return;
2975  }
2976  break;
2977  case Intrinsic::aarch64_neon_ld3r:
2978  if (VT == MVT::v8i8) {
2979  SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
2980  return;
2981  } else if (VT == MVT::v16i8) {
2982  SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
2983  return;
2984  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2985  SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
2986  return;
2987  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2988  SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
2989  return;
2990  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2991  SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
2992  return;
2993  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2994  SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
2995  return;
2996  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2997  SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
2998  return;
2999  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3000  SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
3001  return;
3002  }
3003  break;
3004  case Intrinsic::aarch64_neon_ld4r:
3005  if (VT == MVT::v8i8) {
3006  SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
3007  return;
3008  } else if (VT == MVT::v16i8) {
3009  SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
3010  return;
3011  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3012  SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
3013  return;
3014  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3015  SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
3016  return;
3017  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3018  SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
3019  return;
3020  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3021  SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
3022  return;
3023  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3024  SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
3025  return;
3026  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3027  SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
3028  return;
3029  }
3030  break;
3031  case Intrinsic::aarch64_neon_ld2lane:
3032  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3033  SelectLoadLane(Node, 2, AArch64::LD2i8);
3034  return;
3035  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3036  VT == MVT::v8f16) {
3037  SelectLoadLane(Node, 2, AArch64::LD2i16);
3038  return;
3039  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3040  VT == MVT::v2f32) {
3041  SelectLoadLane(Node, 2, AArch64::LD2i32);
3042  return;
3043  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3044  VT == MVT::v1f64) {
3045  SelectLoadLane(Node, 2, AArch64::LD2i64);
3046  return;
3047  }
3048  break;
3049  case Intrinsic::aarch64_neon_ld3lane:
3050  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3051  SelectLoadLane(Node, 3, AArch64::LD3i8);
3052  return;
3053  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3054  VT == MVT::v8f16) {
3055  SelectLoadLane(Node, 3, AArch64::LD3i16);
3056  return;
3057  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3058  VT == MVT::v2f32) {
3059  SelectLoadLane(Node, 3, AArch64::LD3i32);
3060  return;
3061  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3062  VT == MVT::v1f64) {
3063  SelectLoadLane(Node, 3, AArch64::LD3i64);
3064  return;
3065  }
3066  break;
3067  case Intrinsic::aarch64_neon_ld4lane:
3068  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3069  SelectLoadLane(Node, 4, AArch64::LD4i8);
3070  return;
3071  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3072  VT == MVT::v8f16) {
3073  SelectLoadLane(Node, 4, AArch64::LD4i16);
3074  return;
3075  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3076  VT == MVT::v2f32) {
3077  SelectLoadLane(Node, 4, AArch64::LD4i32);
3078  return;
3079  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3080  VT == MVT::v1f64) {
3081  SelectLoadLane(Node, 4, AArch64::LD4i64);
3082  return;
3083  }
3084  break;
3085  }
3086  } break;
3087  case ISD::INTRINSIC_WO_CHAIN: {
3088  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
3089  switch (IntNo) {
3090  default:
3091  break;
3092  case Intrinsic::aarch64_neon_tbl2:
3093  SelectTable(Node, 2,
3094  VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
3095  false);
3096  return;
3097  case Intrinsic::aarch64_neon_tbl3:
3098  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
3099  : AArch64::TBLv16i8Three,
3100  false);
3101  return;
3102  case Intrinsic::aarch64_neon_tbl4:
3103  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
3104  : AArch64::TBLv16i8Four,
3105  false);
3106  return;
3107  case Intrinsic::aarch64_neon_tbx2:
3108  SelectTable(Node, 2,
3109  VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
3110  true);
3111  return;
3112  case Intrinsic::aarch64_neon_tbx3:
3113  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
3114  : AArch64::TBXv16i8Three,
3115  true);
3116  return;
3117  case Intrinsic::aarch64_neon_tbx4:
3118  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
3119  : AArch64::TBXv16i8Four,
3120  true);
3121  return;
3122  case Intrinsic::aarch64_neon_smull:
3123  case Intrinsic::aarch64_neon_umull:
3124  if (tryMULLV64LaneV128(IntNo, Node))
3125  return;
3126  break;
3127  }
3128  break;
3129  }
3130  case ISD::INTRINSIC_VOID: {
3131  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3132  if (Node->getNumOperands() >= 3)
3133  VT = Node->getOperand(2)->getValueType(0);
3134  switch (IntNo) {
3135  default:
3136  break;
3137  case Intrinsic::aarch64_neon_st1x2: {
3138  if (VT == MVT::v8i8) {
3139  SelectStore(Node, 2, AArch64::ST1Twov8b);
3140  return;
3141  } else if (VT == MVT::v16i8) {
3142  SelectStore(Node, 2, AArch64::ST1Twov16b);
3143  return;
3144  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3145  SelectStore(Node, 2, AArch64::ST1Twov4h);
3146  return;
3147  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3148  SelectStore(Node, 2, AArch64::ST1Twov8h);
3149  return;
3150  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3151  SelectStore(Node, 2, AArch64::ST1Twov2s);
3152  return;
3153  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3154  SelectStore(Node, 2, AArch64::ST1Twov4s);
3155  return;
3156  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3157  SelectStore(Node, 2, AArch64::ST1Twov2d);
3158  return;
3159  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3160  SelectStore(Node, 2, AArch64::ST1Twov1d);
3161  return;
3162  }
3163  break;
3164  }
3165  case Intrinsic::aarch64_neon_st1x3: {
3166  if (VT == MVT::v8i8) {
3167  SelectStore(Node, 3, AArch64::ST1Threev8b);
3168  return;
3169  } else if (VT == MVT::v16i8) {
3170  SelectStore(Node, 3, AArch64::ST1Threev16b);
3171  return;
3172  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3173  SelectStore(Node, 3, AArch64::ST1Threev4h);
3174  return;
3175  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3176  SelectStore(Node, 3, AArch64::ST1Threev8h);
3177  return;
3178  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3179  SelectStore(Node, 3, AArch64::ST1Threev2s);
3180  return;
3181  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3182  SelectStore(Node, 3, AArch64::ST1Threev4s);
3183  return;
3184  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3185  SelectStore(Node, 3, AArch64::ST1Threev2d);
3186  return;
3187  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3188  SelectStore(Node, 3, AArch64::ST1Threev1d);
3189  return;
3190  }
3191  break;
3192  }
3193  case Intrinsic::aarch64_neon_st1x4: {
3194  if (VT == MVT::v8i8) {
3195  SelectStore(Node, 4, AArch64::ST1Fourv8b);
3196  return;
3197  } else if (VT == MVT::v16i8) {
3198  SelectStore(Node, 4, AArch64::ST1Fourv16b);
3199  return;
3200  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3201  SelectStore(Node, 4, AArch64::ST1Fourv4h);
3202  return;
3203  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3204  SelectStore(Node, 4, AArch64::ST1Fourv8h);
3205  return;
3206  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3207  SelectStore(Node, 4, AArch64::ST1Fourv2s);
3208  return;
3209  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3210  SelectStore(Node, 4, AArch64::ST1Fourv4s);
3211  return;
3212  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3213  SelectStore(Node, 4, AArch64::ST1Fourv2d);
3214  return;
3215  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3216  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3217  return;
3218  }
3219  break;
3220  }
3221  case Intrinsic::aarch64_neon_st2: {
3222  if (VT == MVT::v8i8) {
3223  SelectStore(Node, 2, AArch64::ST2Twov8b);
3224  return;
3225  } else if (VT == MVT::v16i8) {
3226  SelectStore(Node, 2, AArch64::ST2Twov16b);
3227  return;
3228  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3229  SelectStore(Node, 2, AArch64::ST2Twov4h);
3230  return;
3231  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3232  SelectStore(Node, 2, AArch64::ST2Twov8h);
3233  return;
3234  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3235  SelectStore(Node, 2, AArch64::ST2Twov2s);
3236  return;
3237  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3238  SelectStore(Node, 2, AArch64::ST2Twov4s);
3239  return;
3240  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3241  SelectStore(Node, 2, AArch64::ST2Twov2d);
3242  return;
3243  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3244  SelectStore(Node, 2, AArch64::ST1Twov1d);
3245  return;
3246  }
3247  break;
3248  }
3249  case Intrinsic::aarch64_neon_st3: {
3250  if (VT == MVT::v8i8) {
3251  SelectStore(Node, 3, AArch64::ST3Threev8b);
3252  return;
3253  } else if (VT == MVT::v16i8) {
3254  SelectStore(Node, 3, AArch64::ST3Threev16b);
3255  return;
3256  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3257  SelectStore(Node, 3, AArch64::ST3Threev4h);
3258  return;
3259  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3260  SelectStore(Node, 3, AArch64::ST3Threev8h);
3261  return;
3262  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3263  SelectStore(Node, 3, AArch64::ST3Threev2s);
3264  return;
3265  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3266  SelectStore(Node, 3, AArch64::ST3Threev4s);
3267  return;
3268  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3269  SelectStore(Node, 3, AArch64::ST3Threev2d);
3270  return;
3271  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3272  SelectStore(Node, 3, AArch64::ST1Threev1d);
3273  return;
3274  }
3275  break;
3276  }
3277  case Intrinsic::aarch64_neon_st4: {
3278  if (VT == MVT::v8i8) {
3279  SelectStore(Node, 4, AArch64::ST4Fourv8b);
3280  return;
3281  } else if (VT == MVT::v16i8) {
3282  SelectStore(Node, 4, AArch64::ST4Fourv16b);
3283  return;
3284  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3285  SelectStore(Node, 4, AArch64::ST4Fourv4h);
3286  return;
3287  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3288  SelectStore(Node, 4, AArch64::ST4Fourv8h);
3289  return;
3290  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3291  SelectStore(Node, 4, AArch64::ST4Fourv2s);
3292  return;
3293  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3294  SelectStore(Node, 4, AArch64::ST4Fourv4s);
3295  return;
3296  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3297  SelectStore(Node, 4, AArch64::ST4Fourv2d);
3298  return;
3299  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3300  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3301  return;
3302  }
3303  break;
3304  }
3305  case Intrinsic::aarch64_neon_st2lane: {
3306  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3307  SelectStoreLane(Node, 2, AArch64::ST2i8);
3308  return;
3309  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3310  VT == MVT::v8f16) {
3311  SelectStoreLane(Node, 2, AArch64::ST2i16);
3312  return;
3313  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3314  VT == MVT::v2f32) {
3315  SelectStoreLane(Node, 2, AArch64::ST2i32);
3316  return;
3317  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3318  VT == MVT::v1f64) {
3319  SelectStoreLane(Node, 2, AArch64::ST2i64);
3320  return;
3321  }
3322  break;
3323  }
3324  case Intrinsic::aarch64_neon_st3lane: {
3325  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3326  SelectStoreLane(Node, 3, AArch64::ST3i8);
3327  return;
3328  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3329  VT == MVT::v8f16) {
3330  SelectStoreLane(Node, 3, AArch64::ST3i16);
3331  return;
3332  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3333  VT == MVT::v2f32) {
3334  SelectStoreLane(Node, 3, AArch64::ST3i32);
3335  return;
3336  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3337  VT == MVT::v1f64) {
3338  SelectStoreLane(Node, 3, AArch64::ST3i64);
3339  return;
3340  }
3341  break;
3342  }
3343  case Intrinsic::aarch64_neon_st4lane: {
3344  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3345  SelectStoreLane(Node, 4, AArch64::ST4i8);
3346  return;
3347  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3348  VT == MVT::v8f16) {
3349  SelectStoreLane(Node, 4, AArch64::ST4i16);
3350  return;
3351  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3352  VT == MVT::v2f32) {
3353  SelectStoreLane(Node, 4, AArch64::ST4i32);
3354  return;
3355  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3356  VT == MVT::v1f64) {
3357  SelectStoreLane(Node, 4, AArch64::ST4i64);
3358  return;
3359  }
3360  break;
3361  }
3362  }
3363  break;
3364  }
3365  case AArch64ISD::LD2post: {
3366  if (VT == MVT::v8i8) {
3367  SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
3368  return;
3369  } else if (VT == MVT::v16i8) {
3370  SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
3371  return;
3372  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3373  SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
3374  return;
3375  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3376  SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
3377  return;
3378  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3379  SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
3380  return;
3381  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3382  SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
3383  return;
3384  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3385  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3386  return;
3387  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3388  SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
3389  return;
3390  }
3391  break;
3392  }
3393  case AArch64ISD::LD3post: {
3394  if (VT == MVT::v8i8) {
3395  SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
3396  return;
3397  } else if (VT == MVT::v16i8) {
3398  SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
3399  return;
3400  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3401  SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
3402  return;
3403  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3404  SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
3405  return;
3406  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3407  SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
3408  return;
3409  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3410  SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
3411  return;
3412  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3413  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3414  return;
3415  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3416  SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
3417  return;
3418  }
3419  break;
3420  }
3421  case AArch64ISD::LD4post: {
3422  if (VT == MVT::v8i8) {
3423  SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
3424  return;
3425  } else if (VT == MVT::v16i8) {
3426  SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
3427  return;
3428  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3429  SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
3430  return;
3431  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3432  SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
3433  return;
3434  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3435  SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
3436  return;
3437  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3438  SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
3439  return;
3440  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3441  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3442  return;
3443  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3444  SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
3445  return;
3446  }
3447  break;
3448  }
3449  case AArch64ISD::LD1x2post: {
3450  if (VT == MVT::v8i8) {
3451  SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
3452  return;
3453  } else if (VT == MVT::v16i8) {
3454  SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
3455  return;
3456  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3457  SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
3458  return;
3459  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3460  SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
3461  return;
3462  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3463  SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
3464  return;
3465  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3466  SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
3467  return;
3468  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3469  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3470  return;
3471  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3472  SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
3473  return;
3474  }
3475  break;
3476  }
3477  case AArch64ISD::LD1x3post: {
3478  if (VT == MVT::v8i8) {
3479  SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
3480  return;
3481  } else if (VT == MVT::v16i8) {
3482  SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
3483  return;
3484  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3485  SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
3486  return;
3487  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3488  SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
3489  return;
3490  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3491  SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
3492  return;
3493  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3494  SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
3495  return;
3496  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3497  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3498  return;
3499  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3500  SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
3501  return;
3502  }
3503  break;
3504  }
3505  case AArch64ISD::LD1x4post: {
3506  if (VT == MVT::v8i8) {
3507  SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
3508  return;
3509  } else if (VT == MVT::v16i8) {
3510  SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
3511  return;
3512  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3513  SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
3514  return;
3515  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3516  SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
3517  return;
3518  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3519  SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
3520  return;
3521  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3522  SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
3523  return;
3524  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3525  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3526  return;
3527  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3528  SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
3529  return;
3530  }
3531  break;
3532  }
3533  case AArch64ISD::LD1DUPpost: {
3534  if (VT == MVT::v8i8) {
3535  SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
3536  return;
3537  } else if (VT == MVT::v16i8) {
3538  SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
3539  return;
3540  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3541  SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
3542  return;
3543  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3544  SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
3545  return;
3546  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3547  SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
3548  return;
3549  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3550  SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
3551  return;
3552  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3553  SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
3554  return;
3555  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3556  SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
3557  return;
3558  }
3559  break;
3560  }
3561  case AArch64ISD::LD2DUPpost: {
3562  if (VT == MVT::v8i8) {
3563  SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
3564  return;
3565  } else if (VT == MVT::v16i8) {
3566  SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
3567  return;
3568  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3569  SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
3570  return;
3571  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3572  SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
3573  return;
3574  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3575  SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
3576  return;
3577  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3578  SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
3579  return;
3580  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3581  SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
3582  return;
3583  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3584  SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
3585  return;
3586  }
3587  break;
3588  }
3589  case AArch64ISD::LD3DUPpost: {
3590  if (VT == MVT::v8i8) {
3591  SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
3592  return;
3593  } else if (VT == MVT::v16i8) {
3594  SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
3595  return;
3596  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3597  SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
3598  return;
3599  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3600  SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
3601  return;
3602  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3603  SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
3604  return;
3605  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3606  SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
3607  return;
3608  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3609  SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
3610  return;
3611  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3612  SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
3613  return;
3614  }
3615  break;
3616  }
3617  case AArch64ISD::LD4DUPpost: {
3618  if (VT == MVT::v8i8) {
3619  SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
3620  return;
3621  } else if (VT == MVT::v16i8) {
3622  SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
3623  return;
3624  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3625  SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
3626  return;
3627  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3628  SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
3629  return;
3630  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3631  SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
3632  return;
3633  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3634  SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
3635  return;
3636  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3637  SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
3638  return;
3639  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3640  SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
3641  return;
3642  }
3643  break;
3644  }
3645  case AArch64ISD::LD1LANEpost: {
3646  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3647  SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
3648  return;
3649  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3650  VT == MVT::v8f16) {
3651  SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
3652  return;
3653  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3654  VT == MVT::v2f32) {
3655  SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
3656  return;
3657  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3658  VT == MVT::v1f64) {
3659  SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
3660  return;
3661  }
3662  break;
3663  }
3664  case AArch64ISD::LD2LANEpost: {
3665  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3666  SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
3667  return;
3668  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3669  VT == MVT::v8f16) {
3670  SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
3671  return;
3672  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3673  VT == MVT::v2f32) {
3674  SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
3675  return;
3676  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3677  VT == MVT::v1f64) {
3678  SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
3679  return;
3680  }
3681  break;
3682  }
3683  case AArch64ISD::LD3LANEpost: {
3684  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3685  SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
3686  return;
3687  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3688  VT == MVT::v8f16) {
3689  SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
3690  return;
3691  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3692  VT == MVT::v2f32) {
3693  SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
3694  return;
3695  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3696  VT == MVT::v1f64) {
3697  SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
3698  return;
3699  }
3700  break;
3701  }
3702  case AArch64ISD::LD4LANEpost: {
3703  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3704  SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
3705  return;
3706  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3707  VT == MVT::v8f16) {
3708  SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
3709  return;
3710  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3711  VT == MVT::v2f32) {
3712  SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
3713  return;
3714  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3715  VT == MVT::v1f64) {
3716  SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
3717  return;
3718  }
3719  break;
3720  }
3721  case AArch64ISD::ST2post: {
3722  VT = Node->getOperand(1).getValueType();
3723  if (VT == MVT::v8i8) {
3724  SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
3725  return;
3726  } else if (VT == MVT::v16i8) {
3727  SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
3728  return;
3729  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3730  SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
3731  return;
3732  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3733  SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
3734  return;
3735  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3736  SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
3737  return;
3738  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3739  SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
3740  return;
3741  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3742  SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
3743  return;
3744  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3745  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3746  return;
3747  }
3748  break;
3749  }
3750  case AArch64ISD::ST3post: {
3751  VT = Node->getOperand(1).getValueType();
3752  if (VT == MVT::v8i8) {
3753  SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
3754  return;
3755  } else if (VT == MVT::v16i8) {
3756  SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
3757  return;
3758  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3759  SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
3760  return;
3761  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3762  SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
3763  return;
3764  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3765  SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
3766  return;
3767  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3768  SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
3769  return;
3770  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3771  SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
3772  return;
3773  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3774  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
3775  return;
3776  }
3777  break;
3778  }
3779  case AArch64ISD::ST4post: {
3780  VT = Node->getOperand(1).getValueType();
3781  if (VT == MVT::v8i8) {
3782  SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
3783  return;
3784  } else if (VT == MVT::v16i8) {
3785  SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
3786  return;
3787  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3788  SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
3789  return;
3790  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3791  SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
3792  return;
3793  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3794  SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
3795  return;
3796  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3797  SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
3798  return;
3799  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3800  SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
3801  return;
3802  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3803  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
3804  return;
3805  }
3806  break;
3807  }
3808  case AArch64ISD::ST1x2post: {
3809  VT = Node->getOperand(1).getValueType();
3810  if (VT == MVT::v8i8) {
3811  SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
3812  return;
3813  } else if (VT == MVT::v16i8) {
3814  SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
3815  return;
3816  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3817  SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
3818  return;
3819  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3820  SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
3821  return;
3822  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3823  SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
3824  return;
3825  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3826  SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
3827  return;
3828  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3829  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3830  return;
3831  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3832  SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
3833  return;
3834  }
3835  break;
3836  }
3837  case AArch64ISD::ST1x3post: {
3838  VT = Node->getOperand(1).getValueType();
3839  if (VT == MVT::v8i8) {
3840  SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
3841  return;
3842  } else if (VT == MVT::v16i8) {
3843  SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
3844  return;
3845  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3846  SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
3847  return;
3848  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3849  SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
3850  return;
3851  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3852  SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
3853  return;
3854  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3855  SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
3856  return;
3857  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3858  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
3859  return;
3860  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3861  SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
3862  return;
3863  }
3864  break;
3865  }
3866  case AArch64ISD::ST1x4post: {
3867  VT = Node->getOperand(1).getValueType();
3868  if (VT == MVT::v8i8) {
3869  SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
3870  return;
3871  } else if (VT == MVT::v16i8) {
3872  SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
3873  return;
3874  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3875  SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
3876  return;
3877  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3878  SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
3879  return;
3880  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3881  SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
3882  return;
3883  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3884  SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
3885  return;
3886  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3887  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
3888  return;
3889  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3890  SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
3891  return;
3892  }
3893  break;
3894  }
3895  case AArch64ISD::ST2LANEpost: {
3896  VT = Node->getOperand(1).getValueType();
3897  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3898  SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
3899  return;
3900  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3901  VT == MVT::v8f16) {
3902  SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
3903  return;
3904  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3905  VT == MVT::v2f32) {
3906  SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
3907  return;
3908  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3909  VT == MVT::v1f64) {
3910  SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
3911  return;
3912  }
3913  break;
3914  }
3915  case AArch64ISD::ST3LANEpost: {
3916  VT = Node->getOperand(1).getValueType();
3917  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3918  SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
3919  return;
3920  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3921  VT == MVT::v8f16) {
3922  SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
3923  return;
3924  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3925  VT == MVT::v2f32) {
3926  SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
3927  return;
3928  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3929  VT == MVT::v1f64) {
3930  SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
3931  return;
3932  }
3933  break;
3934  }
3935  case AArch64ISD::ST4LANEpost: {
3936  VT = Node->getOperand(1).getValueType();
3937  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3938  SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
3939  return;
3940  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3941  VT == MVT::v8f16) {
3942  SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
3943  return;
3944  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3945  VT == MVT::v2f32) {
3946  SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
3947  return;
3948  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3949  VT == MVT::v1f64) {
3950  SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
3951  return;
3952  }
3953  break;
3954  }
3955  }
3956 
3957  // Select the default instruction
3958  SelectCode(Node);
3959 }
3960 
3961 /// createAArch64ISelDag - This pass converts a legalized DAG into a
3962 /// AArch64-specific DAG, ready for instruction scheduling.
3964  CodeGenOpt::Level OptLevel) {
3965  return new AArch64DAGToDAGISel(TM, OptLevel);
3966 }
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1221
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &ShiftAmount, int &MaskWidth)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL...
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1234
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
DiagnosticInfoOptimizationBase::Argument NV
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:304
size_t i
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
Type * getValueType() const
Definition: GlobalValue.h:261
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in the KnownZero/KnownO...
const GlobalValue * getGlobal() const
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned Num) const
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
void setNodeId(int Id)
Set unique node id.
const SDValue & getBasePtr() const
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:299
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:999
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:464
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
void dumpr() const
Dump (recursively) this node and its use-def subgraph.
A description of a memory reference used in the backend.
static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, SDValue &LaneOp, int &LaneIdx)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static bool isPreferredADD(int64_t ImmOff)
Shift and rotation operations.
Definition: ISDOpcodes.h:344
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:452
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.cpp:1122
unsigned SubReg
Reg
All possible values of the reg field in the ModR/M byte.
SimpleValueType SimpleTy
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:239
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:850
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
constexpr bool isMask_64(uint64_t Value)
isMask_64 - This function returns true if the argument is a non-empty sequence of ones starting at th...
Definition: MathExtras.h:380
const RegList & Regs
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
EVT getMemoryVT() const
Return the type of the in-memory value.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:487
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:141
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:254
SDNode * getNode() const
get the SDNode which holds the desired result
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:85
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:111
MVT - Machine Value Type.
const SDValue & getOperand(unsigned i) const
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:818
const Constant * getConstVal() const
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
unsigned getScalarValueSizeInBits() const
uint32_t Offset
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1255
iterator begin() const
Definition: ArrayRef.h:129
unsigned getOpcode() const
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32...
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1397
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:705
EVT - Extended Value Type.
Definition: ValueTypes.h:31
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
const SDValue & getOffset() const
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.cpp:703
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:689
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1034
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
void dump() const
Dump this node, for debugging.
static bool isStrongerThanMonotonic(AtomicOrdering ao)
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:285
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:166
SDNode * SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type...
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Log2_32 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
Definition: MathExtras.h:513
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Class for arbitrary precision integers.
Definition: APInt.h:77
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:716
iterator_range< use_iterator > uses()
int64_t getSExtValue() const
op_iterator op_begin() const
const SysReg * lookupSysRegByName(StringRef)
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:400
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:403
uint32_t parseGenericRegister(StringRef Name)
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOpt::Level OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG...
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1942
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:503
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
constexpr bool isShiftedMask_64(uint64_t Value)
isShiftedMask_64 - This function returns true if the argument contains a non-empty sequence of ones w...
Definition: MathExtras.h:393
static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx)
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:333
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static int getIntOperandFromRegisterString(StringRef RegString)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:418
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:536
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:256
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted, suitable for use in a BFI instruction.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
EVT getValueType() const
Return the ValueType of the referenced return value.
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:807
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:153
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
const MDNode * getMD() const
unsigned getAlignment() const
Definition: Globals.cpp:72
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
#define DEBUG(X)
Definition: Debug.h:100
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1343
constexpr bool isShiftedMask_32(uint32_t Value)
isShiftedMask_32 - This function returns true if the argument contains a non-empty sequence of ones w...
Definition: MathExtras.h:387
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
A single uniqued string.
Definition: Metadata.h:586
Conversion operators.
Definition: ISDOpcodes.h:397
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:698
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:406
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
bool is64BitVector() const
is64BitVector - Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:148
std::size_t countLeadingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the most significant bit to the first zero bit.
Definition: MathExtras.h:436
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:694
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:226
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
uint64_t getZExtValue() const
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:799
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:248
This class is used to represent ISD::LOAD nodes.