LLVM  3.7.0
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the AArch64 target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64TargetMachine.h"
16 #include "llvm/ADT/APSInt.h"
18 #include "llvm/IR/Function.h" // To access function attributes.
19 #include "llvm/IR/GlobalValue.h"
20 #include "llvm/IR/Intrinsics.h"
21 #include "llvm/Support/Debug.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "aarch64-isel"
29 
30 //===--------------------------------------------------------------------===//
31 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
32 /// instructions for SelectionDAG operations.
33 ///
34 namespace {
35 
36 class AArch64DAGToDAGISel : public SelectionDAGISel {
38 
39  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
40  /// make the right decision when generating code for different targets.
41  const AArch64Subtarget *Subtarget;
42 
43  bool ForCodeSize;
44 
45 public:
46  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
47  CodeGenOpt::Level OptLevel)
48  : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr),
49  ForCodeSize(false) {}
50 
51  const char *getPassName() const override {
52  return "AArch64 Instruction Selection";
53  }
54 
55  bool runOnMachineFunction(MachineFunction &MF) override {
56  ForCodeSize =
59  Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61  }
62 
63  SDNode *Select(SDNode *Node) override;
64 
65  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66  /// inline asm expressions.
67  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
68  unsigned ConstraintID,
69  std::vector<SDValue> &OutOps) override;
70 
71  SDNode *SelectMLAV64LaneV128(SDNode *N);
72  SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N);
73  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
74  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
75  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
76  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77  return SelectShiftedRegister(N, false, Reg, Shift);
78  }
79  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
80  return SelectShiftedRegister(N, true, Reg, Shift);
81  }
82  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
83  return SelectAddrModeIndexed(N, 1, Base, OffImm);
84  }
85  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
86  return SelectAddrModeIndexed(N, 2, Base, OffImm);
87  }
88  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
89  return SelectAddrModeIndexed(N, 4, Base, OffImm);
90  }
91  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
92  return SelectAddrModeIndexed(N, 8, Base, OffImm);
93  }
94  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
95  return SelectAddrModeIndexed(N, 16, Base, OffImm);
96  }
97  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
98  return SelectAddrModeUnscaled(N, 1, Base, OffImm);
99  }
100  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
101  return SelectAddrModeUnscaled(N, 2, Base, OffImm);
102  }
103  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
104  return SelectAddrModeUnscaled(N, 4, Base, OffImm);
105  }
106  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
107  return SelectAddrModeUnscaled(N, 8, Base, OffImm);
108  }
109  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
110  return SelectAddrModeUnscaled(N, 16, Base, OffImm);
111  }
112 
113  template<int Width>
114  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
115  SDValue &SignExtend, SDValue &DoShift) {
116  return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
117  }
118 
119  template<int Width>
120  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
121  SDValue &SignExtend, SDValue &DoShift) {
122  return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
123  }
124 
125 
126  /// Form sequences of consecutive 64/128-bit registers for use in NEON
127  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
128  /// between 1 and 4 elements. If it contains a single element that is returned
129  /// unchanged; otherwise a REG_SEQUENCE value is returned.
130  SDValue createDTuple(ArrayRef<SDValue> Vecs);
131  SDValue createQTuple(ArrayRef<SDValue> Vecs);
132 
133  /// Generic helper for the createDTuple/createQTuple
134  /// functions. Those should almost always be called instead.
135  SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
136  const unsigned SubRegs[]);
137 
138  SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
139 
140  SDNode *SelectIndexedLoad(SDNode *N, bool &Done);
141 
142  SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
143  unsigned SubRegIdx);
144  SDNode *SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
145  unsigned SubRegIdx);
146  SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
147  SDNode *SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
148 
149  SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
150  SDNode *SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
151  SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
152  SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
153 
154  SDNode *SelectBitfieldExtractOp(SDNode *N);
155  SDNode *SelectBitfieldInsertOp(SDNode *N);
156 
157  SDNode *SelectLIBM(SDNode *N);
158 
159  SDNode *SelectReadRegister(SDNode *N);
160  SDNode *SelectWriteRegister(SDNode *N);
161 
162 // Include the pieces autogenerated from the target description.
163 #include "AArch64GenDAGISel.inc"
164 
165 private:
166  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
167  SDValue &Shift);
168  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
169  SDValue &OffImm);
170  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
171  SDValue &OffImm);
172  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
173  SDValue &Offset, SDValue &SignExtend,
174  SDValue &DoShift);
175  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
176  SDValue &Offset, SDValue &SignExtend,
177  SDValue &DoShift);
178  bool isWorthFolding(SDValue V) const;
179  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
180  SDValue &Offset, SDValue &SignExtend);
181 
182  template<unsigned RegWidth>
183  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
184  return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
185  }
186 
187  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
188 };
189 } // end anonymous namespace
190 
191 /// isIntImmediate - This method tests to see if the node is a constant
192 /// operand. If so Imm will receive the 32-bit value.
193 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
194  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
195  Imm = C->getZExtValue();
196  return true;
197  }
198  return false;
199 }
200 
201 // isIntImmediate - This method tests to see if a constant operand.
202 // If so Imm will receive the value.
203 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
204  return isIntImmediate(N.getNode(), Imm);
205 }
206 
207 // isOpcWithIntImmediate - This method tests to see if the node is a specific
208 // opcode and that it has a immediate integer right operand.
209 // If so Imm will receive the 32 bit value.
210 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
211  uint64_t &Imm) {
212  return N->getOpcode() == Opc &&
213  isIntImmediate(N->getOperand(1).getNode(), Imm);
214 }
215 
216 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
217  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
218  switch(ConstraintID) {
219  default:
220  llvm_unreachable("Unexpected asm memory constraint");
224  // Require the address to be in a register. That is safe for all AArch64
225  // variants and it is hard to do anything much smarter without knowing
226  // how the operand is used.
227  OutOps.push_back(Op);
228  return false;
229  }
230  return true;
231 }
232 
233 /// SelectArithImmed - Select an immediate value that can be represented as
234 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
235 /// Val set to the 12-bit value and Shift set to the shifter operand.
236 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
237  SDValue &Shift) {
238  // This function is called from the addsub_shifted_imm ComplexPattern,
239  // which lists [imm] as the list of opcode it's interested in, however
240  // we still need to check whether the operand is actually an immediate
241  // here because the ComplexPattern opcode list is only used in
242  // root-level opcode matching.
243  if (!isa<ConstantSDNode>(N.getNode()))
244  return false;
245 
246  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
247  unsigned ShiftAmt;
248 
249  if (Immed >> 12 == 0) {
250  ShiftAmt = 0;
251  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
252  ShiftAmt = 12;
253  Immed = Immed >> 12;
254  } else
255  return false;
256 
257  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
258  SDLoc dl(N);
259  Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
260  Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
261  return true;
262 }
263 
264 /// SelectNegArithImmed - As above, but negates the value before trying to
265 /// select it.
266 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
267  SDValue &Shift) {
268  // This function is called from the addsub_shifted_imm ComplexPattern,
269  // which lists [imm] as the list of opcode it's interested in, however
270  // we still need to check whether the operand is actually an immediate
271  // here because the ComplexPattern opcode list is only used in
272  // root-level opcode matching.
273  if (!isa<ConstantSDNode>(N.getNode()))
274  return false;
275 
276  // The immediate operand must be a 24-bit zero-extended immediate.
277  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
278 
279  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
280  // have the opposite effect on the C flag, so this pattern mustn't match under
281  // those circumstances.
282  if (Immed == 0)
283  return false;
284 
285  if (N.getValueType() == MVT::i32)
286  Immed = ~((uint32_t)Immed) + 1;
287  else
288  Immed = ~Immed + 1ULL;
289  if (Immed & 0xFFFFFFFFFF000000ULL)
290  return false;
291 
292  Immed &= 0xFFFFFFULL;
293  return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
294  Shift);
295 }
296 
297 /// getShiftTypeForNode - Translate a shift node to the corresponding
298 /// ShiftType value.
300  switch (N.getOpcode()) {
301  default:
303  case ISD::SHL:
304  return AArch64_AM::LSL;
305  case ISD::SRL:
306  return AArch64_AM::LSR;
307  case ISD::SRA:
308  return AArch64_AM::ASR;
309  case ISD::ROTR:
310  return AArch64_AM::ROR;
311  }
312 }
313 
314 /// \brief Determine whether it is worth to fold V into an extended register.
315 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
316  // it hurts if the value is used at least twice, unless we are optimizing
317  // for code size.
318  if (ForCodeSize || V.hasOneUse())
319  return true;
320  return false;
321 }
322 
323 /// SelectShiftedRegister - Select a "shifted register" operand. If the value
324 /// is not shifted, set the Shift operand to default of "LSL 0". The logical
325 /// instructions allow the shifted register to be rotated, but the arithmetic
326 /// instructions do not. The AllowROR parameter specifies whether ROR is
327 /// supported.
328 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
329  SDValue &Reg, SDValue &Shift) {
331  if (ShType == AArch64_AM::InvalidShiftExtend)
332  return false;
333  if (!AllowROR && ShType == AArch64_AM::ROR)
334  return false;
335 
336  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
337  unsigned BitSize = N.getValueType().getSizeInBits();
338  unsigned Val = RHS->getZExtValue() & (BitSize - 1);
339  unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
340 
341  Reg = N.getOperand(0);
342  Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
343  return isWorthFolding(N);
344  }
345 
346  return false;
347 }
348 
349 /// getExtendTypeForNode - Translate an extend node to the corresponding
350 /// ExtendType value.
352 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
353  if (N.getOpcode() == ISD::SIGN_EXTEND ||
355  EVT SrcVT;
357  SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
358  else
359  SrcVT = N.getOperand(0).getValueType();
360 
361  if (!IsLoadStore && SrcVT == MVT::i8)
362  return AArch64_AM::SXTB;
363  else if (!IsLoadStore && SrcVT == MVT::i16)
364  return AArch64_AM::SXTH;
365  else if (SrcVT == MVT::i32)
366  return AArch64_AM::SXTW;
367  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
368 
370  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
371  N.getOpcode() == ISD::ANY_EXTEND) {
372  EVT SrcVT = N.getOperand(0).getValueType();
373  if (!IsLoadStore && SrcVT == MVT::i8)
374  return AArch64_AM::UXTB;
375  else if (!IsLoadStore && SrcVT == MVT::i16)
376  return AArch64_AM::UXTH;
377  else if (SrcVT == MVT::i32)
378  return AArch64_AM::UXTW;
379  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
380 
382  } else if (N.getOpcode() == ISD::AND) {
384  if (!CSD)
386  uint64_t AndMask = CSD->getZExtValue();
387 
388  switch (AndMask) {
389  default:
391  case 0xFF:
392  return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
393  case 0xFFFF:
394  return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
395  case 0xFFFFFFFF:
396  return AArch64_AM::UXTW;
397  }
398  }
399 
401 }
402 
403 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
404 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
405  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
407  return false;
408 
409  SDValue SV = DL->getOperand(0);
410  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
411  return false;
412 
413  SDValue EV = SV.getOperand(1);
414  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
415  return false;
416 
417  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
418  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
419  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
420  LaneOp = EV.getOperand(0);
421 
422  return true;
423 }
424 
425 // Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a
426 // high lane extract.
427 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
428  SDValue &LaneOp, int &LaneIdx) {
429 
430  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
431  std::swap(Op0, Op1);
432  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
433  return false;
434  }
435  StdOp = Op1;
436  return true;
437 }
438 
439 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
440 /// is a lane in the upper half of a 128-bit vector. Recognize and select this
441 /// so that we don't emit unnecessary lane extracts.
442 SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
443  SDLoc dl(N);
444  SDValue Op0 = N->getOperand(0);
445  SDValue Op1 = N->getOperand(1);
446  SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
447  SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
448  int LaneIdx = -1; // Will hold the lane index.
449 
450  if (Op1.getOpcode() != ISD::MUL ||
451  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
452  LaneIdx)) {
453  std::swap(Op0, Op1);
454  if (Op1.getOpcode() != ISD::MUL ||
455  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
456  LaneIdx))
457  return nullptr;
458  }
459 
460  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
461 
462  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
463 
464  unsigned MLAOpc = ~0U;
465 
466  switch (N->getSimpleValueType(0).SimpleTy) {
467  default:
468  llvm_unreachable("Unrecognized MLA.");
469  case MVT::v4i16:
470  MLAOpc = AArch64::MLAv4i16_indexed;
471  break;
472  case MVT::v8i16:
473  MLAOpc = AArch64::MLAv8i16_indexed;
474  break;
475  case MVT::v2i32:
476  MLAOpc = AArch64::MLAv2i32_indexed;
477  break;
478  case MVT::v4i32:
479  MLAOpc = AArch64::MLAv4i32_indexed;
480  break;
481  }
482 
483  return CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops);
484 }
485 
486 SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
487  SDLoc dl(N);
488  SDValue SMULLOp0;
489  SDValue SMULLOp1;
490  int LaneIdx;
491 
492  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
493  LaneIdx))
494  return nullptr;
495 
496  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
497 
498  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
499 
500  unsigned SMULLOpc = ~0U;
501 
502  if (IntNo == Intrinsic::aarch64_neon_smull) {
503  switch (N->getSimpleValueType(0).SimpleTy) {
504  default:
505  llvm_unreachable("Unrecognized SMULL.");
506  case MVT::v4i32:
507  SMULLOpc = AArch64::SMULLv4i16_indexed;
508  break;
509  case MVT::v2i64:
510  SMULLOpc = AArch64::SMULLv2i32_indexed;
511  break;
512  }
513  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
514  switch (N->getSimpleValueType(0).SimpleTy) {
515  default:
516  llvm_unreachable("Unrecognized SMULL.");
517  case MVT::v4i32:
518  SMULLOpc = AArch64::UMULLv4i16_indexed;
519  break;
520  case MVT::v2i64:
521  SMULLOpc = AArch64::UMULLv2i32_indexed;
522  break;
523  }
524  } else
525  llvm_unreachable("Unrecognized intrinsic.");
526 
527  return CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops);
528 }
529 
530 /// Instructions that accept extend modifiers like UXTW expect the register
531 /// being extended to be a GPR32, but the incoming DAG might be acting on a
532 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
533 /// this is the case.
535  if (N.getValueType() == MVT::i32)
536  return N;
537 
538  SDLoc dl(N);
539  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
541  dl, MVT::i32, N, SubReg);
542  return SDValue(Node, 0);
543 }
544 
545 
546 /// SelectArithExtendedRegister - Select a "extended register" operand. This
547 /// operand folds in an extend followed by an optional left shift.
548 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
549  SDValue &Shift) {
550  unsigned ShiftVal = 0;
552 
553  if (N.getOpcode() == ISD::SHL) {
555  if (!CSD)
556  return false;
557  ShiftVal = CSD->getZExtValue();
558  if (ShiftVal > 4)
559  return false;
560 
561  Ext = getExtendTypeForNode(N.getOperand(0));
563  return false;
564 
565  Reg = N.getOperand(0).getOperand(0);
566  } else {
567  Ext = getExtendTypeForNode(N);
569  return false;
570 
571  Reg = N.getOperand(0);
572  }
573 
574  // AArch64 mandates that the RHS of the operation must use the smallest
575  // register classs that could contain the size being extended from. Thus,
576  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
577  // there might not be an actual 32-bit value in the program. We can
578  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
579  assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
580  Reg = narrowIfNeeded(CurDAG, Reg);
581  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
582  MVT::i32);
583  return isWorthFolding(N);
584 }
585 
586 /// If there's a use of this ADDlow that's not itself a load/store then we'll
587 /// need to create a real ADD instruction from it anyway and there's no point in
588 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
589 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
590 /// leads to duplaicated ADRP instructions.
592  for (auto Use : N->uses()) {
593  if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
594  Use->getOpcode() != ISD::ATOMIC_LOAD &&
595  Use->getOpcode() != ISD::ATOMIC_STORE)
596  return false;
597 
598  // ldar and stlr have much more restrictive addressing modes (just a
599  // register).
600  if (cast<MemSDNode>(Use)->getOrdering() > Monotonic)
601  return false;
602  }
603 
604  return true;
605 }
606 
607 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
608 /// immediate" address. The "Size" argument is the size in bytes of the memory
609 /// reference, which determines the scale.
610 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
611  SDValue &Base, SDValue &OffImm) {
612  SDLoc dl(N);
613  const DataLayout &DL = CurDAG->getDataLayout();
614  const TargetLowering *TLI = getTargetLowering();
615  if (N.getOpcode() == ISD::FrameIndex) {
616  int FI = cast<FrameIndexSDNode>(N)->getIndex();
617  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
618  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
619  return true;
620  }
621 
623  GlobalAddressSDNode *GAN =
625  Base = N.getOperand(0);
626  OffImm = N.getOperand(1);
627  if (!GAN)
628  return true;
629 
630  const GlobalValue *GV = GAN->getGlobal();
631  unsigned Alignment = GV->getAlignment();
632  Type *Ty = GV->getType()->getElementType();
633  if (Alignment == 0 && Ty->isSized())
634  Alignment = DL.getABITypeAlignment(Ty);
635 
636  if (Alignment >= Size)
637  return true;
638  }
639 
640  if (CurDAG->isBaseWithConstantOffset(N)) {
641  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
642  int64_t RHSC = (int64_t)RHS->getZExtValue();
643  unsigned Scale = Log2_32(Size);
644  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
645  Base = N.getOperand(0);
646  if (Base.getOpcode() == ISD::FrameIndex) {
647  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
648  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
649  }
650  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
651  return true;
652  }
653  }
654  }
655 
656  // Before falling back to our general case, check if the unscaled
657  // instructions can handle this. If so, that's preferable.
658  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
659  return false;
660 
661  // Base only. The address will be materialized into a register before
662  // the memory is accessed.
663  // add x0, Xbase, #offset
664  // ldr x0, [x0]
665  Base = N;
666  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
667  return true;
668 }
669 
670 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
671 /// immediate" address. This should only match when there is an offset that
672 /// is not valid for a scaled immediate addressing mode. The "Size" argument
673 /// is the size in bytes of the memory reference, which is needed here to know
674 /// what is valid for a scaled immediate.
675 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
676  SDValue &Base,
677  SDValue &OffImm) {
678  if (!CurDAG->isBaseWithConstantOffset(N))
679  return false;
680  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
681  int64_t RHSC = RHS->getSExtValue();
682  // If the offset is valid as a scaled immediate, don't match here.
683  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
684  RHSC < (0x1000 << Log2_32(Size)))
685  return false;
686  if (RHSC >= -256 && RHSC < 256) {
687  Base = N.getOperand(0);
688  if (Base.getOpcode() == ISD::FrameIndex) {
689  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
690  const TargetLowering *TLI = getTargetLowering();
691  Base = CurDAG->getTargetFrameIndex(
692  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
693  }
694  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
695  return true;
696  }
697  }
698  return false;
699 }
700 
701 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
702  SDLoc dl(N);
703  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
704  SDValue ImpDef = SDValue(
706  MachineSDNode *Node = CurDAG->getMachineNode(
707  TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
708  return SDValue(Node, 0);
709 }
710 
711 /// \brief Check if the given SHL node (\p N), can be used to form an
712 /// extended register for an addressing mode.
713 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
714  bool WantExtend, SDValue &Offset,
715  SDValue &SignExtend) {
716  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
718  if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
719  return false;
720 
721  SDLoc dl(N);
722  if (WantExtend) {
724  getExtendTypeForNode(N.getOperand(0), true);
726  return false;
727 
728  Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
729  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
730  MVT::i32);
731  } else {
732  Offset = N.getOperand(0);
733  SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
734  }
735 
736  unsigned LegalShiftVal = Log2_32(Size);
737  unsigned ShiftVal = CSD->getZExtValue();
738 
739  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
740  return false;
741 
742  if (isWorthFolding(N))
743  return true;
744 
745  return false;
746 }
747 
748 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
749  SDValue &Base, SDValue &Offset,
750  SDValue &SignExtend,
751  SDValue &DoShift) {
752  if (N.getOpcode() != ISD::ADD)
753  return false;
754  SDValue LHS = N.getOperand(0);
755  SDValue RHS = N.getOperand(1);
756  SDLoc dl(N);
757 
758  // We don't want to match immediate adds here, because they are better lowered
759  // to the register-immediate addressing modes.
760  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
761  return false;
762 
763  // Check if this particular node is reused in any non-memory related
764  // operation. If yes, do not try to fold this node into the address
765  // computation, since the computation will be kept.
766  const SDNode *Node = N.getNode();
767  for (SDNode *UI : Node->uses()) {
768  if (!isa<MemSDNode>(*UI))
769  return false;
770  }
771 
772  // Remember if it is worth folding N when it produces extended register.
773  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
774 
775  // Try to match a shifted extend on the RHS.
776  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
777  SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
778  Base = LHS;
779  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
780  return true;
781  }
782 
783  // Try to match a shifted extend on the LHS.
784  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
785  SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
786  Base = RHS;
787  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
788  return true;
789  }
790 
791  // There was no shift, whatever else we find.
792  DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
793 
795  // Try to match an unshifted extend on the LHS.
796  if (IsExtendedRegisterWorthFolding &&
797  (Ext = getExtendTypeForNode(LHS, true)) !=
799  Base = RHS;
800  Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
801  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
802  MVT::i32);
803  if (isWorthFolding(LHS))
804  return true;
805  }
806 
807  // Try to match an unshifted extend on the RHS.
808  if (IsExtendedRegisterWorthFolding &&
809  (Ext = getExtendTypeForNode(RHS, true)) !=
811  Base = LHS;
812  Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
813  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
814  MVT::i32);
815  if (isWorthFolding(RHS))
816  return true;
817  }
818 
819  return false;
820 }
821 
822 // Check if the given immediate is preferred by ADD. If an immediate can be
823 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
824 // encoded by one MOVZ, return true.
825 static bool isPreferredADD(int64_t ImmOff) {
826  // Constant in [0x0, 0xfff] can be encoded in ADD.
827  if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
828  return true;
829  // Check if it can be encoded in an "ADD LSL #12".
830  if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
831  // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
832  return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
833  (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
834  return false;
835 }
836 
837 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
838  SDValue &Base, SDValue &Offset,
839  SDValue &SignExtend,
840  SDValue &DoShift) {
841  if (N.getOpcode() != ISD::ADD)
842  return false;
843  SDValue LHS = N.getOperand(0);
844  SDValue RHS = N.getOperand(1);
845  SDLoc DL(N);
846 
847  // Check if this particular node is reused in any non-memory related
848  // operation. If yes, do not try to fold this node into the address
849  // computation, since the computation will be kept.
850  const SDNode *Node = N.getNode();
851  for (SDNode *UI : Node->uses()) {
852  if (!isa<MemSDNode>(*UI))
853  return false;
854  }
855 
856  // Watch out if RHS is a wide immediate, it can not be selected into
857  // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
858  // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
859  // instructions like:
860  // MOV X0, WideImmediate
861  // ADD X1, BaseReg, X0
862  // LDR X2, [X1, 0]
863  // For such situation, using [BaseReg, XReg] addressing mode can save one
864  // ADD/SUB:
865  // MOV X0, WideImmediate
866  // LDR X2, [BaseReg, X0]
867  if (isa<ConstantSDNode>(RHS)) {
868  int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
869  unsigned Scale = Log2_32(Size);
870  // Skip the immediate can be seleced by load/store addressing mode.
871  // Also skip the immediate can be encoded by a single ADD (SUB is also
872  // checked by using -ImmOff).
873  if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
874  isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
875  return false;
876 
877  SDValue Ops[] = { RHS };
878  SDNode *MOVI =
879  CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
880  SDValue MOVIV = SDValue(MOVI, 0);
881  // This ADD of two X register will be selected into [Reg+Reg] mode.
882  N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
883  }
884 
885  // Remember if it is worth folding N when it produces extended register.
886  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
887 
888  // Try to match a shifted extend on the RHS.
889  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
890  SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
891  Base = LHS;
892  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
893  return true;
894  }
895 
896  // Try to match a shifted extend on the LHS.
897  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
898  SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
899  Base = RHS;
900  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
901  return true;
902  }
903 
904  // Match any non-shifted, non-extend, non-immediate add expression.
905  Base = LHS;
906  Offset = RHS;
907  SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
908  DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
909  // Reg1 + Reg2 is free: no check needed.
910  return true;
911 }
912 
913 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
914  static const unsigned RegClassIDs[] = {
915  AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
916  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
917  AArch64::dsub2, AArch64::dsub3};
918 
919  return createTuple(Regs, RegClassIDs, SubRegs);
920 }
921 
922 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
923  static const unsigned RegClassIDs[] = {
924  AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
925  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
926  AArch64::qsub2, AArch64::qsub3};
927 
928  return createTuple(Regs, RegClassIDs, SubRegs);
929 }
930 
931 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
932  const unsigned RegClassIDs[],
933  const unsigned SubRegs[]) {
934  // There's no special register-class for a vector-list of 1 element: it's just
935  // a vector.
936  if (Regs.size() == 1)
937  return Regs[0];
938 
939  assert(Regs.size() >= 2 && Regs.size() <= 4);
940 
941  SDLoc DL(Regs[0]);
942 
944 
945  // First operand of REG_SEQUENCE is the desired RegClass.
946  Ops.push_back(
947  CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
948 
949  // Then we get pairs of source & subregister-position for the components.
950  for (unsigned i = 0; i < Regs.size(); ++i) {
951  Ops.push_back(Regs[i]);
952  Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
953  }
954 
955  SDNode *N =
956  CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
957  return SDValue(N, 0);
958 }
959 
960 SDNode *AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs,
961  unsigned Opc, bool isExt) {
962  SDLoc dl(N);
963  EVT VT = N->getValueType(0);
964 
965  unsigned ExtOff = isExt;
966 
967  // Form a REG_SEQUENCE to force register allocation.
968  unsigned Vec0Off = ExtOff + 1;
969  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
970  N->op_begin() + Vec0Off + NumVecs);
971  SDValue RegSeq = createQTuple(Regs);
972 
974  if (isExt)
975  Ops.push_back(N->getOperand(1));
976  Ops.push_back(RegSeq);
977  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
978  return CurDAG->getMachineNode(Opc, dl, VT, Ops);
979 }
980 
981 SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
982  LoadSDNode *LD = cast<LoadSDNode>(N);
983  if (LD->isUnindexed())
984  return nullptr;
985  EVT VT = LD->getMemoryVT();
986  EVT DstVT = N->getValueType(0);
988  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
989 
990  // We're not doing validity checking here. That was done when checking
991  // if we should mark the load as indexed or not. We're just selecting
992  // the right instruction.
993  unsigned Opcode = 0;
994 
995  ISD::LoadExtType ExtType = LD->getExtensionType();
996  bool InsertTo64 = false;
997  if (VT == MVT::i64)
998  Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
999  else if (VT == MVT::i32) {
1000  if (ExtType == ISD::NON_EXTLOAD)
1001  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1002  else if (ExtType == ISD::SEXTLOAD)
1003  Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1004  else {
1005  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1006  InsertTo64 = true;
1007  // The result of the load is only i32. It's the subreg_to_reg that makes
1008  // it into an i64.
1009  DstVT = MVT::i32;
1010  }
1011  } else if (VT == MVT::i16) {
1012  if (ExtType == ISD::SEXTLOAD) {
1013  if (DstVT == MVT::i64)
1014  Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1015  else
1016  Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1017  } else {
1018  Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1019  InsertTo64 = DstVT == MVT::i64;
1020  // The result of the load is only i32. It's the subreg_to_reg that makes
1021  // it into an i64.
1022  DstVT = MVT::i32;
1023  }
1024  } else if (VT == MVT::i8) {
1025  if (ExtType == ISD::SEXTLOAD) {
1026  if (DstVT == MVT::i64)
1027  Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1028  else
1029  Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1030  } else {
1031  Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1032  InsertTo64 = DstVT == MVT::i64;
1033  // The result of the load is only i32. It's the subreg_to_reg that makes
1034  // it into an i64.
1035  DstVT = MVT::i32;
1036  }
1037  } else if (VT == MVT::f32) {
1038  Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1039  } else if (VT == MVT::f64 || VT.is64BitVector()) {
1040  Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1041  } else if (VT.is128BitVector()) {
1042  Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1043  } else
1044  return nullptr;
1045  SDValue Chain = LD->getChain();
1046  SDValue Base = LD->getBasePtr();
1047  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1048  int OffsetVal = (int)OffsetOp->getZExtValue();
1049  SDLoc dl(N);
1050  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1051  SDValue Ops[] = { Base, Offset, Chain };
1052  SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1053  MVT::Other, Ops);
1054  // Either way, we're replacing the node, so tell the caller that.
1055  Done = true;
1056  SDValue LoadedVal = SDValue(Res, 1);
1057  if (InsertTo64) {
1058  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1059  LoadedVal =
1060  SDValue(CurDAG->getMachineNode(
1062  CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1063  SubReg),
1064  0);
1065  }
1066 
1067  ReplaceUses(SDValue(N, 0), LoadedVal);
1068  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1069  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1070 
1071  return nullptr;
1072 }
1073 
1074 SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs,
1075  unsigned Opc, unsigned SubRegIdx) {
1076  SDLoc dl(N);
1077  EVT VT = N->getValueType(0);
1078  SDValue Chain = N->getOperand(0);
1079 
1080  SDValue Ops[] = {N->getOperand(2), // Mem operand;
1081  Chain};
1082 
1083  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1084 
1085  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1086  SDValue SuperReg = SDValue(Ld, 0);
1087  for (unsigned i = 0; i < NumVecs; ++i)
1088  ReplaceUses(SDValue(N, i),
1089  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1090 
1091  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1092  return nullptr;
1093 }
1094 
1095 SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1096  unsigned Opc, unsigned SubRegIdx) {
1097  SDLoc dl(N);
1098  EVT VT = N->getValueType(0);
1099  SDValue Chain = N->getOperand(0);
1100 
1101  SDValue Ops[] = {N->getOperand(1), // Mem operand
1102  N->getOperand(2), // Incremental
1103  Chain};
1104 
1105  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1107 
1108  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1109 
1110  // Update uses of write back register
1111  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1112 
1113  // Update uses of vector list
1114  SDValue SuperReg = SDValue(Ld, 1);
1115  if (NumVecs == 1)
1116  ReplaceUses(SDValue(N, 0), SuperReg);
1117  else
1118  for (unsigned i = 0; i < NumVecs; ++i)
1119  ReplaceUses(SDValue(N, i),
1120  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1121 
1122  // Update the chain
1123  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1124  return nullptr;
1125 }
1126 
1127 SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1128  unsigned Opc) {
1129  SDLoc dl(N);
1130  EVT VT = N->getOperand(2)->getValueType(0);
1131 
1132  // Form a REG_SEQUENCE to force register allocation.
1133  bool Is128Bit = VT.getSizeInBits() == 128;
1134  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1135  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1136 
1137  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1138  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1139 
1140  return St;
1141 }
1142 
1143 SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1144  unsigned Opc) {
1145  SDLoc dl(N);
1146  EVT VT = N->getOperand(2)->getValueType(0);
1147  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1148  MVT::Other}; // Type for the Chain
1149 
1150  // Form a REG_SEQUENCE to force register allocation.
1151  bool Is128Bit = VT.getSizeInBits() == 128;
1152  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1153  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1154 
1155  SDValue Ops[] = {RegSeq,
1156  N->getOperand(NumVecs + 1), // base register
1157  N->getOperand(NumVecs + 2), // Incremental
1158  N->getOperand(0)}; // Chain
1159  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1160 
1161  return St;
1162 }
1163 
1164 namespace {
1165 /// WidenVector - Given a value in the V64 register class, produce the
1166 /// equivalent value in the V128 register class.
1167 class WidenVector {
1168  SelectionDAG &DAG;
1169 
1170 public:
1171  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1172 
1173  SDValue operator()(SDValue V64Reg) {
1174  EVT VT = V64Reg.getValueType();
1175  unsigned NarrowSize = VT.getVectorNumElements();
1176  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1177  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1178  SDLoc DL(V64Reg);
1179 
1180  SDValue Undef =
1181  SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1182  return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1183  }
1184 };
1185 } // namespace
1186 
1187 /// NarrowVector - Given a value in the V128 register class, produce the
1188 /// equivalent value in the V64 register class.
1189 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1190  EVT VT = V128Reg.getValueType();
1191  unsigned WideSize = VT.getVectorNumElements();
1192  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1193  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1194 
1195  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1196  V128Reg);
1197 }
1198 
1199 SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1200  unsigned Opc) {
1201  SDLoc dl(N);
1202  EVT VT = N->getValueType(0);
1203  bool Narrow = VT.getSizeInBits() == 64;
1204 
1205  // Form a REG_SEQUENCE to force register allocation.
1206  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1207 
1208  if (Narrow)
1209  std::transform(Regs.begin(), Regs.end(), Regs.begin(),
1210  WidenVector(*CurDAG));
1211 
1212  SDValue RegSeq = createQTuple(Regs);
1213 
1214  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1215 
1216  unsigned LaneNo =
1217  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1218 
1219  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1220  N->getOperand(NumVecs + 3), N->getOperand(0)};
1221  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1222  SDValue SuperReg = SDValue(Ld, 0);
1223 
1224  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1225  static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
1226  AArch64::qsub3 };
1227  for (unsigned i = 0; i < NumVecs; ++i) {
1228  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1229  if (Narrow)
1230  NV = NarrowVector(NV, *CurDAG);
1231  ReplaceUses(SDValue(N, i), NV);
1232  }
1233 
1234  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1235 
1236  return Ld;
1237 }
1238 
1239 SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1240  unsigned Opc) {
1241  SDLoc dl(N);
1242  EVT VT = N->getValueType(0);
1243  bool Narrow = VT.getSizeInBits() == 64;
1244 
1245  // Form a REG_SEQUENCE to force register allocation.
1246  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1247 
1248  if (Narrow)
1249  std::transform(Regs.begin(), Regs.end(), Regs.begin(),
1250  WidenVector(*CurDAG));
1251 
1252  SDValue RegSeq = createQTuple(Regs);
1253 
1254  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1255  RegSeq->getValueType(0), MVT::Other};
1256 
1257  unsigned LaneNo =
1258  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1259 
1260  SDValue Ops[] = {RegSeq,
1261  CurDAG->getTargetConstant(LaneNo, dl,
1262  MVT::i64), // Lane Number
1263  N->getOperand(NumVecs + 2), // Base register
1264  N->getOperand(NumVecs + 3), // Incremental
1265  N->getOperand(0)};
1266  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1267 
1268  // Update uses of the write back register
1269  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1270 
1271  // Update uses of the vector list
1272  SDValue SuperReg = SDValue(Ld, 1);
1273  if (NumVecs == 1) {
1274  ReplaceUses(SDValue(N, 0),
1275  Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1276  } else {
1277  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1278  static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
1279  AArch64::qsub3 };
1280  for (unsigned i = 0; i < NumVecs; ++i) {
1281  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1282  SuperReg);
1283  if (Narrow)
1284  NV = NarrowVector(NV, *CurDAG);
1285  ReplaceUses(SDValue(N, i), NV);
1286  }
1287  }
1288 
1289  // Update the Chain
1290  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1291 
1292  return Ld;
1293 }
1294 
1295 SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1296  unsigned Opc) {
1297  SDLoc dl(N);
1298  EVT VT = N->getOperand(2)->getValueType(0);
1299  bool Narrow = VT.getSizeInBits() == 64;
1300 
1301  // Form a REG_SEQUENCE to force register allocation.
1302  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1303 
1304  if (Narrow)
1305  std::transform(Regs.begin(), Regs.end(), Regs.begin(),
1306  WidenVector(*CurDAG));
1307 
1308  SDValue RegSeq = createQTuple(Regs);
1309 
1310  unsigned LaneNo =
1311  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1312 
1313  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1314  N->getOperand(NumVecs + 3), N->getOperand(0)};
1315  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1316 
1317  // Transfer memoperands.
1318  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1319  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1320  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1321 
1322  return St;
1323 }
1324 
1325 SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1326  unsigned Opc) {
1327  SDLoc dl(N);
1328  EVT VT = N->getOperand(2)->getValueType(0);
1329  bool Narrow = VT.getSizeInBits() == 64;
1330 
1331  // Form a REG_SEQUENCE to force register allocation.
1332  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1333 
1334  if (Narrow)
1335  std::transform(Regs.begin(), Regs.end(), Regs.begin(),
1336  WidenVector(*CurDAG));
1337 
1338  SDValue RegSeq = createQTuple(Regs);
1339 
1340  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1341  MVT::Other};
1342 
1343  unsigned LaneNo =
1344  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1345 
1346  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1347  N->getOperand(NumVecs + 2), // Base Register
1348  N->getOperand(NumVecs + 3), // Incremental
1349  N->getOperand(0)};
1350  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1351 
1352  // Transfer memoperands.
1353  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1354  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1355  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1356 
1357  return St;
1358 }
1359 
1361  unsigned &Opc, SDValue &Opd0,
1362  unsigned &LSB, unsigned &MSB,
1363  unsigned NumberOfIgnoredLowBits,
1364  bool BiggerPattern) {
1365  assert(N->getOpcode() == ISD::AND &&
1366  "N must be a AND operation to call this function");
1367 
1368  EVT VT = N->getValueType(0);
1369 
1370  // Here we can test the type of VT and return false when the type does not
1371  // match, but since it is done prior to that call in the current context
1372  // we turned that into an assert to avoid redundant code.
1373  assert((VT == MVT::i32 || VT == MVT::i64) &&
1374  "Type checking must have been done before calling this function");
1375 
1376  // FIXME: simplify-demanded-bits in DAGCombine will probably have
1377  // changed the AND node to a 32-bit mask operation. We'll have to
1378  // undo that as part of the transform here if we want to catch all
1379  // the opportunities.
1380  // Currently the NumberOfIgnoredLowBits argument helps to recover
1381  // form these situations when matching bigger pattern (bitfield insert).
1382 
1383  // For unsigned extracts, check for a shift right and mask
1384  uint64_t And_imm = 0;
1385  if (!isOpcWithIntImmediate(N, ISD::AND, And_imm))
1386  return false;
1387 
1388  const SDNode *Op0 = N->getOperand(0).getNode();
1389 
1390  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1391  // simplified. Try to undo that
1392  And_imm |= (1 << NumberOfIgnoredLowBits) - 1;
1393 
1394  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1395  if (And_imm & (And_imm + 1))
1396  return false;
1397 
1398  bool ClampMSB = false;
1399  uint64_t Srl_imm = 0;
1400  // Handle the SRL + ANY_EXTEND case.
1401  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1402  isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) {
1403  // Extend the incoming operand of the SRL to 64-bit.
1404  Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1405  // Make sure to clamp the MSB so that we preserve the semantics of the
1406  // original operations.
1407  ClampMSB = true;
1408  } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1410  Srl_imm)) {
1411  // If the shift result was truncated, we can still combine them.
1412  Opd0 = Op0->getOperand(0).getOperand(0);
1413 
1414  // Use the type of SRL node.
1415  VT = Opd0->getValueType(0);
1416  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) {
1417  Opd0 = Op0->getOperand(0);
1418  } else if (BiggerPattern) {
1419  // Let's pretend a 0 shift right has been performed.
1420  // The resulting code will be at least as good as the original one
1421  // plus it may expose more opportunities for bitfield insert pattern.
1422  // FIXME: Currently we limit this to the bigger pattern, because
1423  // some optimizations expect AND and not UBFM
1424  Opd0 = N->getOperand(0);
1425  } else
1426  return false;
1427 
1428  // Bail out on large immediates. This happens when no proper
1429  // combining/constant folding was performed.
1430  if (!BiggerPattern && (Srl_imm <= 0 || Srl_imm >= VT.getSizeInBits())) {
1431  DEBUG((dbgs() << N
1432  << ": Found large shift immediate, this should not happen\n"));
1433  return false;
1434  }
1435 
1436  LSB = Srl_imm;
1437  MSB = Srl_imm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(And_imm)
1438  : countTrailingOnes<uint64_t>(And_imm)) -
1439  1;
1440  if (ClampMSB)
1441  // Since we're moving the extend before the right shift operation, we need
1442  // to clamp the MSB to make sure we don't shift in undefined bits instead of
1443  // the zeros which would get shifted in with the original right shift
1444  // operation.
1445  MSB = MSB > 31 ? 31 : MSB;
1446 
1447  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1448  return true;
1449 }
1450 
1451 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
1452  SDValue &Opd0, unsigned &LSB,
1453  unsigned &MSB) {
1454  // We are looking for the following pattern which basically extracts several
1455  // continuous bits from the source value and places it from the LSB of the
1456  // destination value, all other bits of the destination value or set to zero:
1457  //
1458  // Value2 = AND Value, MaskImm
1459  // SRL Value2, ShiftImm
1460  //
1461  // with MaskImm >> ShiftImm to search for the bit width.
1462  //
1463  // This gets selected into a single UBFM:
1464  //
1465  // UBFM Value, ShiftImm, BitWide + Srl_imm -1
1466  //
1467 
1468  if (N->getOpcode() != ISD::SRL)
1469  return false;
1470 
1471  uint64_t And_mask = 0;
1472  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask))
1473  return false;
1474 
1475  Opd0 = N->getOperand(0).getOperand(0);
1476 
1477  uint64_t Srl_imm = 0;
1478  if (!isIntImmediate(N->getOperand(1), Srl_imm))
1479  return false;
1480 
1481  // Check whether we really have several bits extract here.
1482  unsigned BitWide = 64 - countLeadingOnes(~(And_mask >> Srl_imm));
1483  if (BitWide && isMask_64(And_mask >> Srl_imm)) {
1484  if (N->getValueType(0) == MVT::i32)
1485  Opc = AArch64::UBFMWri;
1486  else
1487  Opc = AArch64::UBFMXri;
1488 
1489  LSB = Srl_imm;
1490  MSB = BitWide + Srl_imm - 1;
1491  return true;
1492  }
1493 
1494  return false;
1495 }
1496 
1497 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1498  unsigned &Immr, unsigned &Imms,
1499  bool BiggerPattern) {
1500  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1501  "N must be a SHR/SRA operation to call this function");
1502 
1503  EVT VT = N->getValueType(0);
1504 
1505  // Here we can test the type of VT and return false when the type does not
1506  // match, but since it is done prior to that call in the current context
1507  // we turned that into an assert to avoid redundant code.
1508  assert((VT == MVT::i32 || VT == MVT::i64) &&
1509  "Type checking must have been done before calling this function");
1510 
1511  // Check for AND + SRL doing several bits extract.
1512  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
1513  return true;
1514 
1515  // we're looking for a shift of a shift
1516  uint64_t Shl_imm = 0;
1517  uint64_t Trunc_bits = 0;
1518  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
1519  Opd0 = N->getOperand(0).getOperand(0);
1520  } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1521  N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1522  // We are looking for a shift of truncate. Truncate from i64 to i32 could
1523  // be considered as setting high 32 bits as zero. Our strategy here is to
1524  // always generate 64bit UBFM. This consistency will help the CSE pass
1525  // later find more redundancy.
1526  Opd0 = N->getOperand(0).getOperand(0);
1527  Trunc_bits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1528  VT = Opd0->getValueType(0);
1529  assert(VT == MVT::i64 && "the promoted type should be i64");
1530  } else if (BiggerPattern) {
1531  // Let's pretend a 0 shift left has been performed.
1532  // FIXME: Currently we limit this to the bigger pattern case,
1533  // because some optimizations expect AND and not UBFM
1534  Opd0 = N->getOperand(0);
1535  } else
1536  return false;
1537 
1538  // Missing combines/constant folding may have left us with strange
1539  // constants.
1540  if (Shl_imm >= VT.getSizeInBits()) {
1541  DEBUG((dbgs() << N
1542  << ": Found large shift immediate, this should not happen\n"));
1543  return false;
1544  }
1545 
1546  uint64_t Srl_imm = 0;
1547  if (!isIntImmediate(N->getOperand(1), Srl_imm))
1548  return false;
1549 
1550  assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
1551  "bad amount in shift node!");
1552  int immr = Srl_imm - Shl_imm;
1553  Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
1554  Imms = VT.getSizeInBits() - Shl_imm - Trunc_bits - 1;
1555  // SRA requires a signed extraction
1556  if (VT == MVT::i32)
1557  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1558  else
1559  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1560  return true;
1561 }
1562 
1563 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
1564  SDValue &Opd0, unsigned &Immr, unsigned &Imms,
1565  unsigned NumberOfIgnoredLowBits = 0,
1566  bool BiggerPattern = false) {
1567  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
1568  return false;
1569 
1570  switch (N->getOpcode()) {
1571  default:
1572  if (!N->isMachineOpcode())
1573  return false;
1574  break;
1575  case ISD::AND:
1576  return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
1577  NumberOfIgnoredLowBits, BiggerPattern);
1578  case ISD::SRL:
1579  case ISD::SRA:
1580  return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
1581  }
1582 
1583  unsigned NOpc = N->getMachineOpcode();
1584  switch (NOpc) {
1585  default:
1586  return false;
1587  case AArch64::SBFMWri:
1588  case AArch64::UBFMWri:
1589  case AArch64::SBFMXri:
1590  case AArch64::UBFMXri:
1591  Opc = NOpc;
1592  Opd0 = N->getOperand(0);
1593  Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
1594  Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
1595  return true;
1596  }
1597  // Unreachable
1598  return false;
1599 }
1600 
1601 SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
1602  unsigned Opc, Immr, Imms;
1603  SDValue Opd0;
1604  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
1605  return nullptr;
1606 
1607  EVT VT = N->getValueType(0);
1608  SDLoc dl(N);
1609 
1610  // If the bit extract operation is 64bit but the original type is 32bit, we
1611  // need to add one EXTRACT_SUBREG.
1612  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
1613  SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
1614  CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
1615 
1616  SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
1617  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1618  MachineSDNode *Node =
1619  CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i32,
1620  SDValue(BFM, 0), SubReg);
1621  return Node;
1622  }
1623 
1624  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1625  CurDAG->getTargetConstant(Imms, dl, VT)};
1626  return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1627 }
1628 
1629 /// Does DstMask form a complementary pair with the mask provided by
1630 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
1631 /// this asks whether DstMask zeroes precisely those bits that will be set by
1632 /// the other half.
1633 static bool isBitfieldDstMask(uint64_t DstMask, APInt BitsToBeInserted,
1634  unsigned NumberOfIgnoredHighBits, EVT VT) {
1635  assert((VT == MVT::i32 || VT == MVT::i64) &&
1636  "i32 or i64 mask type expected!");
1637  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
1638 
1639  APInt SignificantDstMask = APInt(BitWidth, DstMask);
1640  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
1641 
1642  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
1643  (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
1644 }
1645 
1646 // Look for bits that will be useful for later uses.
1647 // A bit is consider useless as soon as it is dropped and never used
1648 // before it as been dropped.
1649 // E.g., looking for useful bit of x
1650 // 1. y = x & 0x7
1651 // 2. z = y >> 2
1652 // After #1, x useful bits are 0x7, then the useful bits of x, live through
1653 // y.
1654 // After #2, the useful bits of x are 0x4.
1655 // However, if x is used on an unpredicatable instruction, then all its bits
1656 // are useful.
1657 // E.g.
1658 // 1. y = x & 0x7
1659 // 2. z = y >> 2
1660 // 3. str x, [@x]
1661 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
1662 
1664  unsigned Depth) {
1665  uint64_t Imm =
1666  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1667  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
1668  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
1669  getUsefulBits(Op, UsefulBits, Depth + 1);
1670 }
1671 
1672 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
1673  uint64_t Imm, uint64_t MSB,
1674  unsigned Depth) {
1675  // inherit the bitwidth value
1676  APInt OpUsefulBits(UsefulBits);
1677  OpUsefulBits = 1;
1678 
1679  if (MSB >= Imm) {
1680  OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
1681  --OpUsefulBits;
1682  // The interesting part will be in the lower part of the result
1683  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1684  // The interesting part was starting at Imm in the argument
1685  OpUsefulBits = OpUsefulBits.shl(Imm);
1686  } else {
1687  OpUsefulBits = OpUsefulBits.shl(MSB + 1);
1688  --OpUsefulBits;
1689  // The interesting part will be shifted in the result
1690  OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm);
1691  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1692  // The interesting part was at zero in the argument
1693  OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm);
1694  }
1695 
1696  UsefulBits &= OpUsefulBits;
1697 }
1698 
1699 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
1700  unsigned Depth) {
1701  uint64_t Imm =
1702  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1703  uint64_t MSB =
1704  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1705 
1706  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1707 }
1708 
1710  unsigned Depth) {
1711  uint64_t ShiftTypeAndValue =
1712  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1713  APInt Mask(UsefulBits);
1714  Mask.clearAllBits();
1715  Mask.flipAllBits();
1716 
1717  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
1718  // Shift Left
1719  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1720  Mask = Mask.shl(ShiftAmt);
1721  getUsefulBits(Op, Mask, Depth + 1);
1722  Mask = Mask.lshr(ShiftAmt);
1723  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
1724  // Shift Right
1725  // We do not handle AArch64_AM::ASR, because the sign will change the
1726  // number of useful bits
1727  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1728  Mask = Mask.lshr(ShiftAmt);
1729  getUsefulBits(Op, Mask, Depth + 1);
1730  Mask = Mask.shl(ShiftAmt);
1731  } else
1732  return;
1733 
1734  UsefulBits &= Mask;
1735 }
1736 
1737 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
1738  unsigned Depth) {
1739  uint64_t Imm =
1740  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1741  uint64_t MSB =
1742  cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
1743 
1744  if (Op.getOperand(1) == Orig)
1745  return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1746 
1747  APInt OpUsefulBits(UsefulBits);
1748  OpUsefulBits = 1;
1749 
1750  if (MSB >= Imm) {
1751  OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
1752  --OpUsefulBits;
1753  UsefulBits &= ~OpUsefulBits;
1754  getUsefulBits(Op, UsefulBits, Depth + 1);
1755  } else {
1756  OpUsefulBits = OpUsefulBits.shl(MSB + 1);
1757  --OpUsefulBits;
1758  UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm));
1759  getUsefulBits(Op, UsefulBits, Depth + 1);
1760  }
1761 }
1762 
1763 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
1764  SDValue Orig, unsigned Depth) {
1765 
1766  // Users of this node should have already been instruction selected
1767  // FIXME: Can we turn that into an assert?
1768  if (!UserNode->isMachineOpcode())
1769  return;
1770 
1771  switch (UserNode->getMachineOpcode()) {
1772  default:
1773  return;
1774  case AArch64::ANDSWri:
1775  case AArch64::ANDSXri:
1776  case AArch64::ANDWri:
1777  case AArch64::ANDXri:
1778  // We increment Depth only when we call the getUsefulBits
1779  return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
1780  Depth);
1781  case AArch64::UBFMWri:
1782  case AArch64::UBFMXri:
1783  return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
1784 
1785  case AArch64::ORRWrs:
1786  case AArch64::ORRXrs:
1787  if (UserNode->getOperand(1) != Orig)
1788  return;
1789  return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
1790  Depth);
1791  case AArch64::BFMWri:
1792  case AArch64::BFMXri:
1793  return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
1794  }
1795 }
1796 
1797 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
1798  if (Depth >= 6)
1799  return;
1800  // Initialize UsefulBits
1801  if (!Depth) {
1802  unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits();
1803  // At the beginning, assume every produced bits is useful
1804  UsefulBits = APInt(Bitwidth, 0);
1805  UsefulBits.flipAllBits();
1806  }
1807  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
1808 
1809  for (SDNode *Node : Op.getNode()->uses()) {
1810  // A use cannot produce useful bits
1811  APInt UsefulBitsForUse = APInt(UsefulBits);
1812  getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
1813  UsersUsefulBits |= UsefulBitsForUse;
1814  }
1815  // UsefulBits contains the produced bits that are meaningful for the
1816  // current definition, thus a user cannot make a bit meaningful at
1817  // this point
1818  UsefulBits &= UsersUsefulBits;
1819 }
1820 
1821 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
1822 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
1823 /// 0, return Op unchanged.
1824 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
1825  if (ShlAmount == 0)
1826  return Op;
1827 
1828  EVT VT = Op.getValueType();
1829  SDLoc dl(Op);
1830  unsigned BitWidth = VT.getSizeInBits();
1831  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1832 
1833  SDNode *ShiftNode;
1834  if (ShlAmount > 0) {
1835  // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
1836  ShiftNode = CurDAG->getMachineNode(
1837  UBFMOpc, dl, VT, Op,
1838  CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
1839  CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
1840  } else {
1841  // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
1842  assert(ShlAmount < 0 && "expected right shift");
1843  int ShrAmount = -ShlAmount;
1844  ShiftNode = CurDAG->getMachineNode(
1845  UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
1846  CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
1847  }
1848 
1849  return SDValue(ShiftNode, 0);
1850 }
1851 
1852 /// Does this tree qualify as an attempt to move a bitfield into position,
1853 /// essentially "(and (shl VAL, N), Mask)".
1855  SDValue &Src, int &ShiftAmount,
1856  int &MaskWidth) {
1857  EVT VT = Op.getValueType();
1858  unsigned BitWidth = VT.getSizeInBits();
1859  (void)BitWidth;
1860  assert(BitWidth == 32 || BitWidth == 64);
1861 
1862  APInt KnownZero, KnownOne;
1863  CurDAG->computeKnownBits(Op, KnownZero, KnownOne);
1864 
1865  // Non-zero in the sense that they're not provably zero, which is the key
1866  // point if we want to use this value
1867  uint64_t NonZeroBits = (~KnownZero).getZExtValue();
1868 
1869  // Discard a constant AND mask if present. It's safe because the node will
1870  // already have been factored into the computeKnownBits calculation above.
1871  uint64_t AndImm;
1872  if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
1873  assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0);
1874  Op = Op.getOperand(0);
1875  }
1876 
1877  uint64_t ShlImm;
1878  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
1879  return false;
1880  Op = Op.getOperand(0);
1881 
1882  if (!isShiftedMask_64(NonZeroBits))
1883  return false;
1884 
1885  ShiftAmount = countTrailingZeros(NonZeroBits);
1886  MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
1887 
1888  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
1889  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
1890  // amount.
1891  Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
1892 
1893  return true;
1894 }
1895 
1896 // Given a OR operation, check if we have the following pattern
1897 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
1898 // isBitfieldExtractOp)
1899 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
1900 // countTrailingZeros(mask2) == imm2 - imm + 1
1901 // f = d | c
1902 // if yes, given reference arguments will be update so that one can replace
1903 // the OR instruction with:
1904 // f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2
1905 static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
1906  SDValue &Src, unsigned &ImmR,
1907  unsigned &ImmS, SelectionDAG *CurDAG) {
1908  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
1909 
1910  // Set Opc
1911  EVT VT = N->getValueType(0);
1912  if (VT == MVT::i32)
1913  Opc = AArch64::BFMWri;
1914  else if (VT == MVT::i64)
1915  Opc = AArch64::BFMXri;
1916  else
1917  return false;
1918 
1919  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
1920  // have the expected shape. Try to undo that.
1921  APInt UsefulBits;
1922  getUsefulBits(SDValue(N, 0), UsefulBits);
1923 
1924  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
1925  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
1926 
1927  // OR is commutative, check both possibilities (does llvm provide a
1928  // way to do that directely, e.g., via code matcher?)
1929  SDValue OrOpd1Val = N->getOperand(1);
1930  SDNode *OrOpd0 = N->getOperand(0).getNode();
1931  SDNode *OrOpd1 = N->getOperand(1).getNode();
1932  for (int i = 0; i < 2;
1933  ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {
1934  unsigned BFXOpc;
1935  int DstLSB, Width;
1936  if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
1937  NumberOfIgnoredLowBits, true)) {
1938  // Check that the returned opcode is compatible with the pattern,
1939  // i.e., same type and zero extended (U and not S)
1940  if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
1941  (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
1942  continue;
1943 
1944  // Compute the width of the bitfield insertion
1945  DstLSB = 0;
1946  Width = ImmS - ImmR + 1;
1947  // FIXME: This constraint is to catch bitfield insertion we may
1948  // want to widen the pattern if we want to grab general bitfied
1949  // move case
1950  if (Width <= 0)
1951  continue;
1952 
1953  // If the mask on the insertee is correct, we have a BFXIL operation. We
1954  // can share the ImmR and ImmS values from the already-computed UBFM.
1955  } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src,
1956  DstLSB, Width)) {
1957  ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
1958  ImmS = Width - 1;
1959  } else
1960  continue;
1961 
1962  // Check the second part of the pattern
1963  EVT VT = OrOpd1->getValueType(0);
1964  assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
1965 
1966  // Compute the Known Zero for the candidate of the first operand.
1967  // This allows to catch more general case than just looking for
1968  // AND with imm. Indeed, simplify-demanded-bits may have removed
1969  // the AND instruction because it proves it was useless.
1970  APInt KnownZero, KnownOne;
1971  CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne);
1972 
1973  // Check if there is enough room for the second operand to appear
1974  // in the first one
1975  APInt BitsToBeInserted =
1976  APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width);
1977 
1978  if ((BitsToBeInserted & ~KnownZero) != 0)
1979  continue;
1980 
1981  // Set the first operand
1982  uint64_t Imm;
1983  if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
1984  isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
1985  // In that case, we can eliminate the AND
1986  Dst = OrOpd1->getOperand(0);
1987  else
1988  // Maybe the AND has been removed by simplify-demanded-bits
1989  // or is useful because it discards more bits
1990  Dst = OrOpd1Val;
1991 
1992  // both parts match
1993  return true;
1994  }
1995 
1996  return false;
1997 }
1998 
1999 SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
2000  if (N->getOpcode() != ISD::OR)
2001  return nullptr;
2002 
2003  unsigned Opc;
2004  unsigned LSB, MSB;
2005  SDValue Opd0, Opd1;
2006 
2007  if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG))
2008  return nullptr;
2009 
2010  EVT VT = N->getValueType(0);
2011  SDLoc dl(N);
2012  SDValue Ops[] = { Opd0,
2013  Opd1,
2014  CurDAG->getTargetConstant(LSB, dl, VT),
2015  CurDAG->getTargetConstant(MSB, dl, VT) };
2016  return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2017 }
2018 
2019 SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) {
2020  EVT VT = N->getValueType(0);
2021  unsigned Variant;
2022  unsigned Opc;
2023  unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
2024 
2025  if (VT == MVT::f32) {
2026  Variant = 0;
2027  } else if (VT == MVT::f64) {
2028  Variant = 1;
2029  } else
2030  return nullptr; // Unrecognized argument type. Fall back on default codegen.
2031 
2032  // Pick the FRINTX variant needed to set the flags.
2033  unsigned FRINTXOpc = FRINTXOpcs[Variant];
2034 
2035  switch (N->getOpcode()) {
2036  default:
2037  return nullptr; // Unrecognized libm ISD node. Fall back on default codegen.
2038  case ISD::FCEIL: {
2039  unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr };
2040  Opc = FRINTPOpcs[Variant];
2041  break;
2042  }
2043  case ISD::FFLOOR: {
2044  unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr };
2045  Opc = FRINTMOpcs[Variant];
2046  break;
2047  }
2048  case ISD::FTRUNC: {
2049  unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr };
2050  Opc = FRINTZOpcs[Variant];
2051  break;
2052  }
2053  case ISD::FROUND: {
2054  unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr };
2055  Opc = FRINTAOpcs[Variant];
2056  break;
2057  }
2058  }
2059 
2060  SDLoc dl(N);
2061  SDValue In = N->getOperand(0);
2063  Ops.push_back(In);
2064 
2065  if (!TM.Options.UnsafeFPMath) {
2066  SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In);
2067  Ops.push_back(SDValue(FRINTX, 1));
2068  }
2069 
2070  return CurDAG->getMachineNode(Opc, dl, VT, Ops);
2071 }
2072 
2073 bool
2074 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2075  unsigned RegWidth) {
2076  APFloat FVal(0.0);
2077  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2078  FVal = CN->getValueAPF();
2079  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2080  // Some otherwise illegal constants are allowed in this case.
2081  if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2082  !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2083  return false;
2084 
2085  ConstantPoolSDNode *CN =
2086  dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2087  FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2088  } else
2089  return false;
2090 
2091  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2092  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2093  // x-register.
2094  //
2095  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2096  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2097  // integers.
2098  bool IsExact;
2099 
2100  // fbits is between 1 and 64 in the worst-case, which means the fmul
2101  // could have 2^64 as an actual operand. Need 65 bits of precision.
2102  APSInt IntVal(65, true);
2103  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2104 
2105  // N.b. isPowerOf2 also checks for > 0.
2106  if (!IsExact || !IntVal.isPowerOf2()) return false;
2107  unsigned FBits = IntVal.logBase2();
2108 
2109  // Checks above should have guaranteed that we haven't lost information in
2110  // finding FBits, but it must still be in range.
2111  if (FBits == 0 || FBits > RegWidth) return false;
2112 
2113  FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
2114  return true;
2115 }
2116 
2117 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
2118 // of the string and obtains the integer values from them and combines these
2119 // into a single value to be used in the MRS/MSR instruction.
2122  RegString.split(Fields, ":");
2123 
2124  if (Fields.size() == 1)
2125  return -1;
2126 
2127  assert(Fields.size() == 5
2128  && "Invalid number of fields in read register string");
2129 
2130  SmallVector<int, 5> Ops;
2131  bool AllIntFields = true;
2132 
2133  for (StringRef Field : Fields) {
2134  unsigned IntField;
2135  AllIntFields &= !Field.getAsInteger(10, IntField);
2136  Ops.push_back(IntField);
2137  }
2138 
2139  assert(AllIntFields &&
2140  "Unexpected non-integer value in special register string.");
2141 
2142  // Need to combine the integer fields of the string into a single value
2143  // based on the bit encoding of MRS/MSR instruction.
2144  return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2145  (Ops[3] << 3) | (Ops[4]);
2146 }
2147 
2148 // Lower the read_register intrinsic to an MRS instruction node if the special
2149 // register string argument is either of the form detailed in the ALCE (the
2150 // form described in getIntOperandsFromRegsterString) or is a named register
2151 // known by the MRS SysReg mapper.
2152 SDNode *AArch64DAGToDAGISel::SelectReadRegister(SDNode *N) {
2153  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2154  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2155  SDLoc DL(N);
2156 
2157  int Reg = getIntOperandFromRegisterString(RegString->getString());
2158  if (Reg != -1)
2159  return CurDAG->getMachineNode(AArch64::MRS, DL, N->getSimpleValueType(0),
2160  MVT::Other,
2161  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2162  N->getOperand(0));
2163 
2164  // Use the sysreg mapper to map the remaining possible strings to the
2165  // value for the register to be used for the instruction operand.
2166  AArch64SysReg::MRSMapper mapper;
2167  bool IsValidSpecialReg;
2168  Reg = mapper.fromString(RegString->getString(),
2169  Subtarget->getFeatureBits(),
2170  IsValidSpecialReg);
2171  if (IsValidSpecialReg)
2172  return CurDAG->getMachineNode(AArch64::MRS, DL, N->getSimpleValueType(0),
2173  MVT::Other,
2174  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2175  N->getOperand(0));
2176 
2177  return nullptr;
2178 }
2179 
2180 // Lower the write_register intrinsic to an MSR instruction node if the special
2181 // register string argument is either of the form detailed in the ALCE (the
2182 // form described in getIntOperandsFromRegsterString) or is a named register
2183 // known by the MSR SysReg mapper.
2184 SDNode *AArch64DAGToDAGISel::SelectWriteRegister(SDNode *N) {
2185  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2186  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2187  SDLoc DL(N);
2188 
2189  int Reg = getIntOperandFromRegisterString(RegString->getString());
2190  if (Reg != -1)
2191  return CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2192  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2193  N->getOperand(2), N->getOperand(0));
2194 
2195  // Check if the register was one of those allowed as the pstatefield value in
2196  // the MSR (immediate) instruction. To accept the values allowed in the
2197  // pstatefield for the MSR (immediate) instruction, we also require that an
2198  // immediate value has been provided as an argument, we know that this is
2199  // the case as it has been ensured by semantic checking.
2201  bool IsValidSpecialReg;
2202  Reg = PMapper.fromString(RegString->getString(),
2203  Subtarget->getFeatureBits(),
2204  IsValidSpecialReg);
2205  if (IsValidSpecialReg) {
2206  assert (isa<ConstantSDNode>(N->getOperand(2))
2207  && "Expected a constant integer expression.");
2208  uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
2209  return CurDAG->getMachineNode(AArch64::MSRpstate, DL, MVT::Other,
2210  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2211  CurDAG->getTargetConstant(Immed, DL, MVT::i16),
2212  N->getOperand(0));
2213  }
2214 
2215  // Use the sysreg mapper to attempt to map the remaining possible strings
2216  // to the value for the register to be used for the MSR (register)
2217  // instruction operand.
2218  AArch64SysReg::MSRMapper Mapper;
2219  Reg = Mapper.fromString(RegString->getString(),
2220  Subtarget->getFeatureBits(),
2221  IsValidSpecialReg);
2222 
2223  if (IsValidSpecialReg)
2224  return CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2225  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2226  N->getOperand(2), N->getOperand(0));
2227 
2228  return nullptr;
2229 }
2230 
2232  // Dump information about the Node being selected
2233  DEBUG(errs() << "Selecting: ");
2234  DEBUG(Node->dump(CurDAG));
2235  DEBUG(errs() << "\n");
2236 
2237  // If we have a custom node, we already have selected!
2238  if (Node->isMachineOpcode()) {
2239  DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
2240  Node->setNodeId(-1);
2241  return nullptr;
2242  }
2243 
2244  // Few custom selection stuff.
2245  SDNode *ResNode = nullptr;
2246  EVT VT = Node->getValueType(0);
2247 
2248  switch (Node->getOpcode()) {
2249  default:
2250  break;
2251 
2252  case ISD::READ_REGISTER:
2253  if (SDNode *Res = SelectReadRegister(Node))
2254  return Res;
2255  break;
2256 
2257  case ISD::WRITE_REGISTER:
2258  if (SDNode *Res = SelectWriteRegister(Node))
2259  return Res;
2260  break;
2261 
2262  case ISD::ADD:
2263  if (SDNode *I = SelectMLAV64LaneV128(Node))
2264  return I;
2265  break;
2266 
2267  case ISD::LOAD: {
2268  // Try to select as an indexed load. Fall through to normal processing
2269  // if we can't.
2270  bool Done = false;
2271  SDNode *I = SelectIndexedLoad(Node, Done);
2272  if (Done)
2273  return I;
2274  break;
2275  }
2276 
2277  case ISD::SRL:
2278  case ISD::AND:
2279  case ISD::SRA:
2280  if (SDNode *I = SelectBitfieldExtractOp(Node))
2281  return I;
2282  break;
2283 
2284  case ISD::OR:
2285  if (SDNode *I = SelectBitfieldInsertOp(Node))
2286  return I;
2287  break;
2288 
2289  case ISD::EXTRACT_VECTOR_ELT: {
2290  // Extracting lane zero is a special case where we can just use a plain
2291  // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
2292  // the rest of the compiler, especially the register allocator and copyi
2293  // propagation, to reason about, so is preferred when it's possible to
2294  // use it.
2295  ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
2296  // Bail and use the default Select() for non-zero lanes.
2297  if (LaneNode->getZExtValue() != 0)
2298  break;
2299  // If the element type is not the same as the result type, likewise
2300  // bail and use the default Select(), as there's more to do than just
2301  // a cross-class COPY. This catches extracts of i8 and i16 elements
2302  // since they will need an explicit zext.
2303  if (VT != Node->getOperand(0).getValueType().getVectorElementType())
2304  break;
2305  unsigned SubReg;
2306  switch (Node->getOperand(0)
2307  .getValueType()
2309  .getSizeInBits()) {
2310  default:
2311  llvm_unreachable("Unexpected vector element type!");
2312  case 64:
2313  SubReg = AArch64::dsub;
2314  break;
2315  case 32:
2316  SubReg = AArch64::ssub;
2317  break;
2318  case 16:
2319  SubReg = AArch64::hsub;
2320  break;
2321  case 8:
2322  llvm_unreachable("unexpected zext-requiring extract element!");
2323  }
2324  SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
2325  Node->getOperand(0));
2326  DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
2327  DEBUG(Extract->dumpr(CurDAG));
2328  DEBUG(dbgs() << "\n");
2329  return Extract.getNode();
2330  }
2331  case ISD::Constant: {
2332  // Materialize zero constants as copies from WZR/XZR. This allows
2333  // the coalescer to propagate these into other instructions.
2334  ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
2335  if (ConstNode->isNullValue()) {
2336  if (VT == MVT::i32)
2337  return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
2338  AArch64::WZR, MVT::i32).getNode();
2339  else if (VT == MVT::i64)
2340  return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
2341  AArch64::XZR, MVT::i64).getNode();
2342  }
2343  break;
2344  }
2345 
2346  case ISD::FrameIndex: {
2347  // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
2348  int FI = cast<FrameIndexSDNode>(Node)->getIndex();
2349  unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
2350  const TargetLowering *TLI = getTargetLowering();
2351  SDValue TFI = CurDAG->getTargetFrameIndex(
2352  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2353  SDLoc DL(Node);
2354  SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
2355  CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
2356  return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
2357  }
2358  case ISD::INTRINSIC_W_CHAIN: {
2359  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2360  switch (IntNo) {
2361  default:
2362  break;
2363  case Intrinsic::aarch64_ldaxp:
2364  case Intrinsic::aarch64_ldxp: {
2365  unsigned Op =
2366  IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
2367  SDValue MemAddr = Node->getOperand(2);
2368  SDLoc DL(Node);
2369  SDValue Chain = Node->getOperand(0);
2370 
2371  SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
2372  MVT::Other, MemAddr, Chain);
2373 
2374  // Transfer memoperands.
2375  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2376  MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2377  cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
2378  return Ld;
2379  }
2380  case Intrinsic::aarch64_stlxp:
2381  case Intrinsic::aarch64_stxp: {
2382  unsigned Op =
2383  IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
2384  SDLoc DL(Node);
2385  SDValue Chain = Node->getOperand(0);
2386  SDValue ValLo = Node->getOperand(2);
2387  SDValue ValHi = Node->getOperand(3);
2388  SDValue MemAddr = Node->getOperand(4);
2389 
2390  // Place arguments in the right order.
2391  SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
2392 
2393  SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
2394  // Transfer memoperands.
2395  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2396  MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2397  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
2398 
2399  return St;
2400  }
2401  case Intrinsic::aarch64_neon_ld1x2:
2402  if (VT == MVT::v8i8)
2403  return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
2404  else if (VT == MVT::v16i8)
2405  return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
2406  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2407  return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
2408  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2409  return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
2410  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2411  return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
2412  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2413  return SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
2414  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2415  return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2416  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2417  return SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
2418  break;
2419  case Intrinsic::aarch64_neon_ld1x3:
2420  if (VT == MVT::v8i8)
2421  return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
2422  else if (VT == MVT::v16i8)
2423  return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
2424  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2425  return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
2426  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2427  return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
2428  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2429  return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
2430  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2431  return SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
2432  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2433  return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
2434  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2435  return SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
2436  break;
2437  case Intrinsic::aarch64_neon_ld1x4:
2438  if (VT == MVT::v8i8)
2439  return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
2440  else if (VT == MVT::v16i8)
2441  return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
2442  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2443  return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
2444  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2445  return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
2446  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2447  return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
2448  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2449  return SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
2450  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2451  return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
2452  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2453  return SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
2454  break;
2455  case Intrinsic::aarch64_neon_ld2:
2456  if (VT == MVT::v8i8)
2457  return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
2458  else if (VT == MVT::v16i8)
2459  return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
2460  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2461  return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
2462  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2463  return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
2464  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2465  return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
2466  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2467  return SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
2468  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2469  return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2470  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2471  return SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
2472  break;
2473  case Intrinsic::aarch64_neon_ld3:
2474  if (VT == MVT::v8i8)
2475  return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
2476  else if (VT == MVT::v16i8)
2477  return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
2478  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2479  return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
2480  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2481  return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
2482  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2483  return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
2484  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2485  return SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
2486  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2487  return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
2488  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2489  return SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
2490  break;
2491  case Intrinsic::aarch64_neon_ld4:
2492  if (VT == MVT::v8i8)
2493  return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
2494  else if (VT == MVT::v16i8)
2495  return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
2496  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2497  return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
2498  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2499  return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
2500  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2501  return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
2502  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2503  return SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
2504  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2505  return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
2506  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2507  return SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
2508  break;
2509  case Intrinsic::aarch64_neon_ld2r:
2510  if (VT == MVT::v8i8)
2511  return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
2512  else if (VT == MVT::v16i8)
2513  return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
2514  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2515  return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
2516  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2517  return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
2518  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2519  return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
2520  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2521  return SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
2522  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2523  return SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
2524  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2525  return SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
2526  break;
2527  case Intrinsic::aarch64_neon_ld3r:
2528  if (VT == MVT::v8i8)
2529  return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
2530  else if (VT == MVT::v16i8)
2531  return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
2532  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2533  return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
2534  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2535  return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
2536  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2537  return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
2538  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2539  return SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
2540  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2541  return SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
2542  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2543  return SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
2544  break;
2545  case Intrinsic::aarch64_neon_ld4r:
2546  if (VT == MVT::v8i8)
2547  return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
2548  else if (VT == MVT::v16i8)
2549  return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
2550  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2551  return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
2552  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2553  return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
2554  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2555  return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
2556  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2557  return SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
2558  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2559  return SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
2560  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2561  return SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
2562  break;
2563  case Intrinsic::aarch64_neon_ld2lane:
2564  if (VT == MVT::v16i8 || VT == MVT::v8i8)
2565  return SelectLoadLane(Node, 2, AArch64::LD2i8);
2566  else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
2567  VT == MVT::v8f16)
2568  return SelectLoadLane(Node, 2, AArch64::LD2i16);
2569  else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2570  VT == MVT::v2f32)
2571  return SelectLoadLane(Node, 2, AArch64::LD2i32);
2572  else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2573  VT == MVT::v1f64)
2574  return SelectLoadLane(Node, 2, AArch64::LD2i64);
2575  break;
2576  case Intrinsic::aarch64_neon_ld3lane:
2577  if (VT == MVT::v16i8 || VT == MVT::v8i8)
2578  return SelectLoadLane(Node, 3, AArch64::LD3i8);
2579  else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
2580  VT == MVT::v8f16)
2581  return SelectLoadLane(Node, 3, AArch64::LD3i16);
2582  else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2583  VT == MVT::v2f32)
2584  return SelectLoadLane(Node, 3, AArch64::LD3i32);
2585  else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2586  VT == MVT::v1f64)
2587  return SelectLoadLane(Node, 3, AArch64::LD3i64);
2588  break;
2589  case Intrinsic::aarch64_neon_ld4lane:
2590  if (VT == MVT::v16i8 || VT == MVT::v8i8)
2591  return SelectLoadLane(Node, 4, AArch64::LD4i8);
2592  else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
2593  VT == MVT::v8f16)
2594  return SelectLoadLane(Node, 4, AArch64::LD4i16);
2595  else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2596  VT == MVT::v2f32)
2597  return SelectLoadLane(Node, 4, AArch64::LD4i32);
2598  else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2599  VT == MVT::v1f64)
2600  return SelectLoadLane(Node, 4, AArch64::LD4i64);
2601  break;
2602  }
2603  } break;
2604  case ISD::INTRINSIC_WO_CHAIN: {
2605  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
2606  switch (IntNo) {
2607  default:
2608  break;
2609  case Intrinsic::aarch64_neon_tbl2:
2610  return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two
2611  : AArch64::TBLv16i8Two,
2612  false);
2613  case Intrinsic::aarch64_neon_tbl3:
2614  return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
2615  : AArch64::TBLv16i8Three,
2616  false);
2617  case Intrinsic::aarch64_neon_tbl4:
2618  return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
2619  : AArch64::TBLv16i8Four,
2620  false);
2621  case Intrinsic::aarch64_neon_tbx2:
2622  return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two
2623  : AArch64::TBXv16i8Two,
2624  true);
2625  case Intrinsic::aarch64_neon_tbx3:
2626  return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
2627  : AArch64::TBXv16i8Three,
2628  true);
2629  case Intrinsic::aarch64_neon_tbx4:
2630  return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
2631  : AArch64::TBXv16i8Four,
2632  true);
2633  case Intrinsic::aarch64_neon_smull:
2634  case Intrinsic::aarch64_neon_umull:
2635  if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node))
2636  return N;
2637  break;
2638  }
2639  break;
2640  }
2641  case ISD::INTRINSIC_VOID: {
2642  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2643  if (Node->getNumOperands() >= 3)
2644  VT = Node->getOperand(2)->getValueType(0);
2645  switch (IntNo) {
2646  default:
2647  break;
2648  case Intrinsic::aarch64_neon_st1x2: {
2649  if (VT == MVT::v8i8)
2650  return SelectStore(Node, 2, AArch64::ST1Twov8b);
2651  else if (VT == MVT::v16i8)
2652  return SelectStore(Node, 2, AArch64::ST1Twov16b);
2653  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2654  return SelectStore(Node, 2, AArch64::ST1Twov4h);
2655  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2656  return SelectStore(Node, 2, AArch64::ST1Twov8h);
2657  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2658  return SelectStore(Node, 2, AArch64::ST1Twov2s);
2659  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2660  return SelectStore(Node, 2, AArch64::ST1Twov4s);
2661  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2662  return SelectStore(Node, 2, AArch64::ST1Twov2d);
2663  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2664  return SelectStore(Node, 2, AArch64::ST1Twov1d);
2665  break;
2666  }
2667  case Intrinsic::aarch64_neon_st1x3: {
2668  if (VT == MVT::v8i8)
2669  return SelectStore(Node, 3, AArch64::ST1Threev8b);
2670  else if (VT == MVT::v16i8)
2671  return SelectStore(Node, 3, AArch64::ST1Threev16b);
2672  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2673  return SelectStore(Node, 3, AArch64::ST1Threev4h);
2674  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2675  return SelectStore(Node, 3, AArch64::ST1Threev8h);
2676  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2677  return SelectStore(Node, 3, AArch64::ST1Threev2s);
2678  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2679  return SelectStore(Node, 3, AArch64::ST1Threev4s);
2680  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2681  return SelectStore(Node, 3, AArch64::ST1Threev2d);
2682  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2683  return SelectStore(Node, 3, AArch64::ST1Threev1d);
2684  break;
2685  }
2686  case Intrinsic::aarch64_neon_st1x4: {
2687  if (VT == MVT::v8i8)
2688  return SelectStore(Node, 4, AArch64::ST1Fourv8b);
2689  else if (VT == MVT::v16i8)
2690  return SelectStore(Node, 4, AArch64::ST1Fourv16b);
2691  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2692  return SelectStore(Node, 4, AArch64::ST1Fourv4h);
2693  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2694  return SelectStore(Node, 4, AArch64::ST1Fourv8h);
2695  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2696  return SelectStore(Node, 4, AArch64::ST1Fourv2s);
2697  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2698  return SelectStore(Node, 4, AArch64::ST1Fourv4s);
2699  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2700  return SelectStore(Node, 4, AArch64::ST1Fourv2d);
2701  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2702  return SelectStore(Node, 4, AArch64::ST1Fourv1d);
2703  break;
2704  }
2705  case Intrinsic::aarch64_neon_st2: {
2706  if (VT == MVT::v8i8)
2707  return SelectStore(Node, 2, AArch64::ST2Twov8b);
2708  else if (VT == MVT::v16i8)
2709  return SelectStore(Node, 2, AArch64::ST2Twov16b);
2710  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2711  return SelectStore(Node, 2, AArch64::ST2Twov4h);
2712  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2713  return SelectStore(Node, 2, AArch64::ST2Twov8h);
2714  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2715  return SelectStore(Node, 2, AArch64::ST2Twov2s);
2716  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2717  return SelectStore(Node, 2, AArch64::ST2Twov4s);
2718  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2719  return SelectStore(Node, 2, AArch64::ST2Twov2d);
2720  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2721  return SelectStore(Node, 2, AArch64::ST1Twov1d);
2722  break;
2723  }
2724  case Intrinsic::aarch64_neon_st3: {
2725  if (VT == MVT::v8i8)
2726  return SelectStore(Node, 3, AArch64::ST3Threev8b);
2727  else if (VT == MVT::v16i8)
2728  return SelectStore(Node, 3, AArch64::ST3Threev16b);
2729  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2730  return SelectStore(Node, 3, AArch64::ST3Threev4h);
2731  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2732  return SelectStore(Node, 3, AArch64::ST3Threev8h);
2733  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2734  return SelectStore(Node, 3, AArch64::ST3Threev2s);
2735  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2736  return SelectStore(Node, 3, AArch64::ST3Threev4s);
2737  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2738  return SelectStore(Node, 3, AArch64::ST3Threev2d);
2739  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2740  return SelectStore(Node, 3, AArch64::ST1Threev1d);
2741  break;
2742  }
2743  case Intrinsic::aarch64_neon_st4: {
2744  if (VT == MVT::v8i8)
2745  return SelectStore(Node, 4, AArch64::ST4Fourv8b);
2746  else if (VT == MVT::v16i8)
2747  return SelectStore(Node, 4, AArch64::ST4Fourv16b);
2748  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2749  return SelectStore(Node, 4, AArch64::ST4Fourv4h);
2750  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2751  return SelectStore(Node, 4, AArch64::ST4Fourv8h);
2752  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2753  return SelectStore(Node, 4, AArch64::ST4Fourv2s);
2754  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2755  return SelectStore(Node, 4, AArch64::ST4Fourv4s);
2756  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2757  return SelectStore(Node, 4, AArch64::ST4Fourv2d);
2758  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2759  return SelectStore(Node, 4, AArch64::ST1Fourv1d);
2760  break;
2761  }
2762  case Intrinsic::aarch64_neon_st2lane: {
2763  if (VT == MVT::v16i8 || VT == MVT::v8i8)
2764  return SelectStoreLane(Node, 2, AArch64::ST2i8);
2765  else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
2766  VT == MVT::v8f16)
2767  return SelectStoreLane(Node, 2, AArch64::ST2i16);
2768  else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2769  VT == MVT::v2f32)
2770  return SelectStoreLane(Node, 2, AArch64::ST2i32);
2771  else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2772  VT == MVT::v1f64)
2773  return SelectStoreLane(Node, 2, AArch64::ST2i64);
2774  break;
2775  }
2776  case Intrinsic::aarch64_neon_st3lane: {
2777  if (VT == MVT::v16i8 || VT == MVT::v8i8)
2778  return SelectStoreLane(Node, 3, AArch64::ST3i8);
2779  else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
2780  VT == MVT::v8f16)
2781  return SelectStoreLane(Node, 3, AArch64::ST3i16);
2782  else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2783  VT == MVT::v2f32)
2784  return SelectStoreLane(Node, 3, AArch64::ST3i32);
2785  else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2786  VT == MVT::v1f64)
2787  return SelectStoreLane(Node, 3, AArch64::ST3i64);
2788  break;
2789  }
2790  case Intrinsic::aarch64_neon_st4lane: {
2791  if (VT == MVT::v16i8 || VT == MVT::v8i8)
2792  return SelectStoreLane(Node, 4, AArch64::ST4i8);
2793  else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
2794  VT == MVT::v8f16)
2795  return SelectStoreLane(Node, 4, AArch64::ST4i16);
2796  else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
2797  VT == MVT::v2f32)
2798  return SelectStoreLane(Node, 4, AArch64::ST4i32);
2799  else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
2800  VT == MVT::v1f64)
2801  return SelectStoreLane(Node, 4, AArch64::ST4i64);
2802  break;
2803  }
2804  }
2805  }
2806  case AArch64ISD::LD2post: {
2807  if (VT == MVT::v8i8)
2808  return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
2809  else if (VT == MVT::v16i8)
2810  return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
2811  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2812  return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
2813  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2814  return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
2815  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2816  return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
2817  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2818  return SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
2819  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2820  return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
2821  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2822  return SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
2823  break;
2824  }
2825  case AArch64ISD::LD3post: {
2826  if (VT == MVT::v8i8)
2827  return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
2828  else if (VT == MVT::v16i8)
2829  return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
2830  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2831  return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
2832  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2833  return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
2834  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2835  return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
2836  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2837  return SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
2838  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2839  return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
2840  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2841  return SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
2842  break;
2843  }
2844  case AArch64ISD::LD4post: {
2845  if (VT == MVT::v8i8)
2846  return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
2847  else if (VT == MVT::v16i8)
2848  return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
2849  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2850  return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
2851  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2852  return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
2853  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2854  return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
2855  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2856  return SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
2857  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2858  return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
2859  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2860  return SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
2861  break;
2862  }
2863  case AArch64ISD::LD1x2post: {
2864  if (VT == MVT::v8i8)
2865  return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
2866  else if (VT == MVT::v16i8)
2867  return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
2868  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2869  return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
2870  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2871  return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
2872  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2873  return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
2874  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2875  return SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
2876  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2877  return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
2878  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2879  return SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
2880  break;
2881  }
2882  case AArch64ISD::LD1x3post: {
2883  if (VT == MVT::v8i8)
2884  return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
2885  else if (VT == MVT::v16i8)
2886  return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
2887  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2888  return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
2889  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2890  return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
2891  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2892  return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
2893  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2894  return SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
2895  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2896  return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
2897  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2898  return SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
2899  break;
2900  }
2901  case AArch64ISD::LD1x4post: {
2902  if (VT == MVT::v8i8)
2903  return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
2904  else if (VT == MVT::v16i8)
2905  return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
2906  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2907  return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
2908  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2909  return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
2910  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2911  return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
2912  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2913  return SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
2914  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2915  return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
2916  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2917  return SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
2918  break;
2919  }
2920  case AArch64ISD::LD1DUPpost: {
2921  if (VT == MVT::v8i8)
2922  return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
2923  else if (VT == MVT::v16i8)
2924  return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
2925  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2926  return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
2927  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2928  return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
2929  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2930  return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
2931  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2932  return SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
2933  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2934  return SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
2935  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2936  return SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
2937  break;
2938  }
2939  case AArch64ISD::LD2DUPpost: {
2940  if (VT == MVT::v8i8)
2941  return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
2942  else if (VT == MVT::v16i8)
2943  return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
2944  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2945  return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
2946  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2947  return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
2948  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2949  return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
2950  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2951  return SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
2952  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2953  return SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
2954  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2955  return SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
2956  break;
2957  }
2958  case AArch64ISD::LD3DUPpost: {
2959  if (VT == MVT::v8i8)
2960  return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
2961  else if (VT == MVT::v16i8)
2962  return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
2963  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2964  return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
2965  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2966  return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
2967  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2968  return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
2969  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2970  return SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
2971  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2972  return SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
2973  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2974  return SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
2975  break;
2976  }
2977  case AArch64ISD::LD4DUPpost: {
2978  if (VT == MVT::v8i8)
2979  return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
2980  else if (VT == MVT::v16i8)
2981  return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
2982  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
2983  return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
2984  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
2985  return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
2986  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
2987  return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
2988  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
2989  return SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
2990  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
2991  return SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
2992  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
2993  return SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
2994  break;
2995  }
2996  case AArch64ISD::LD1LANEpost: {
2997  if (VT == MVT::v16i8 || VT == MVT::v8i8)
2998  return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
2999  else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3000  VT == MVT::v8f16)
3001  return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
3002  else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3003  VT == MVT::v2f32)
3004  return SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
3005  else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3006  VT == MVT::v1f64)
3007  return SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
3008  break;
3009  }
3010  case AArch64ISD::LD2LANEpost: {
3011  if (VT == MVT::v16i8 || VT == MVT::v8i8)
3012  return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
3013  else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3014  VT == MVT::v8f16)
3015  return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
3016  else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3017  VT == MVT::v2f32)
3018  return SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
3019  else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3020  VT == MVT::v1f64)
3021  return SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
3022  break;
3023  }
3024  case AArch64ISD::LD3LANEpost: {
3025  if (VT == MVT::v16i8 || VT == MVT::v8i8)
3026  return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
3027  else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3028  VT == MVT::v8f16)
3029  return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
3030  else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3031  VT == MVT::v2f32)
3032  return SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
3033  else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3034  VT == MVT::v1f64)
3035  return SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
3036  break;
3037  }
3038  case AArch64ISD::LD4LANEpost: {
3039  if (VT == MVT::v16i8 || VT == MVT::v8i8)
3040  return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
3041  else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3042  VT == MVT::v8f16)
3043  return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
3044  else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3045  VT == MVT::v2f32)
3046  return SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
3047  else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3048  VT == MVT::v1f64)
3049  return SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
3050  break;
3051  }
3052  case AArch64ISD::ST2post: {
3053  VT = Node->getOperand(1).getValueType();
3054  if (VT == MVT::v8i8)
3055  return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
3056  else if (VT == MVT::v16i8)
3057  return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
3058  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
3059  return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
3060  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
3061  return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
3062  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
3063  return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
3064  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
3065  return SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
3066  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
3067  return SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
3068  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
3069  return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3070  break;
3071  }
3072  case AArch64ISD::ST3post: {
3073  VT = Node->getOperand(1).getValueType();
3074  if (VT == MVT::v8i8)
3075  return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
3076  else if (VT == MVT::v16i8)
3077  return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
3078  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
3079  return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
3080  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
3081  return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
3082  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
3083  return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
3084  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
3085  return SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
3086  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
3087  return SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
3088  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
3089  return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
3090  break;
3091  }
3092  case AArch64ISD::ST4post: {
3093  VT = Node->getOperand(1).getValueType();
3094  if (VT == MVT::v8i8)
3095  return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
3096  else if (VT == MVT::v16i8)
3097  return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
3098  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
3099  return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
3100  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
3101  return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
3102  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
3103  return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
3104  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
3105  return SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
3106  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
3107  return SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
3108  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
3109  return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
3110  break;
3111  }
3112  case AArch64ISD::ST1x2post: {
3113  VT = Node->getOperand(1).getValueType();
3114  if (VT == MVT::v8i8)
3115  return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
3116  else if (VT == MVT::v16i8)
3117  return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
3118  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
3119  return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
3120  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
3121  return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
3122  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
3123  return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
3124  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
3125  return SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
3126  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
3127  return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3128  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
3129  return SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
3130  break;
3131  }
3132  case AArch64ISD::ST1x3post: {
3133  VT = Node->getOperand(1).getValueType();
3134  if (VT == MVT::v8i8)
3135  return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
3136  else if (VT == MVT::v16i8)
3137  return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
3138  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
3139  return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
3140  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
3141  return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
3142  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
3143  return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
3144  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
3145  return SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
3146  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
3147  return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
3148  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
3149  return SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
3150  break;
3151  }
3152  case AArch64ISD::ST1x4post: {
3153  VT = Node->getOperand(1).getValueType();
3154  if (VT == MVT::v8i8)
3155  return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
3156  else if (VT == MVT::v16i8)
3157  return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
3158  else if (VT == MVT::v4i16 || VT == MVT::v4f16)
3159  return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
3160  else if (VT == MVT::v8i16 || VT == MVT::v8f16)
3161  return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
3162  else if (VT == MVT::v2i32 || VT == MVT::v2f32)
3163  return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
3164  else if (VT == MVT::v4i32 || VT == MVT::v4f32)
3165  return SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
3166  else if (VT == MVT::v1i64 || VT == MVT::v1f64)
3167  return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
3168  else if (VT == MVT::v2i64 || VT == MVT::v2f64)
3169  return SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
3170  break;
3171  }
3172  case AArch64ISD::ST2LANEpost: {
3173  VT = Node->getOperand(1).getValueType();
3174  if (VT == MVT::v16i8 || VT == MVT::v8i8)
3175  return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
3176  else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3177  VT == MVT::v8f16)
3178  return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
3179  else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3180  VT == MVT::v2f32)
3181  return SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
3182  else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3183  VT == MVT::v1f64)
3184  return SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
3185  break;
3186  }
3187  case AArch64ISD::ST3LANEpost: {
3188  VT = Node->getOperand(1).getValueType();
3189  if (VT == MVT::v16i8 || VT == MVT::v8i8)
3190  return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
3191  else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3192  VT == MVT::v8f16)
3193  return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
3194  else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3195  VT == MVT::v2f32)
3196  return SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
3197  else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3198  VT == MVT::v1f64)
3199  return SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
3200  break;
3201  }
3202  case AArch64ISD::ST4LANEpost: {
3203  VT = Node->getOperand(1).getValueType();
3204  if (VT == MVT::v16i8 || VT == MVT::v8i8)
3205  return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
3206  else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3207  VT == MVT::v8f16)
3208  return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
3209  else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3210  VT == MVT::v2f32)
3211  return SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
3212  else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3213  VT == MVT::v1f64)
3214  return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
3215  break;
3216  }
3217 
3218  case ISD::FCEIL:
3219  case ISD::FFLOOR:
3220  case ISD::FTRUNC:
3221  case ISD::FROUND:
3222  if (SDNode *I = SelectLIBM(Node))
3223  return I;
3224  break;
3225  }
3226 
3227  // Select the default instruction
3228  ResNode = SelectCode(Node);
3229 
3230  DEBUG(errs() << "=> ");
3231  if (ResNode == nullptr || ResNode == Node)
3232  DEBUG(Node->dump(CurDAG));
3233  else
3234  DEBUG(ResNode->dump(CurDAG));
3235  DEBUG(errs() << "\n");
3236 
3237  return ResNode;
3238 }
3239 
3240 /// createAArch64ISelDag - This pass converts a legalized DAG into a
3241 /// AArch64-specific DAG, ready for instruction scheduling.
3243  CodeGenOpt::Level OptLevel) {
3244  return new AArch64DAGToDAGISel(TM, OptLevel);
3245 }
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1239
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1252
void dump() const
Dump this node, for debugging.
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:292
static bool isBitfieldDstMask(uint64_t DstMask, APInt BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted, suitable for use in a BFI instruction.
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
iterator end() const
Definition: ArrayRef.h:123
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in the KnownZero/KnownO...
const GlobalValue * getGlobal() const
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:450
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
void setNodeId(int Id)
Set unique node id.
const SDValue & getBasePtr() const
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:287
APInt LLVM_ATTRIBUTE_UNUSED_RESULT zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1015
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:150
void dumpr() const
Dump (recursively) this node and its use-def subgraph.
MachineMemOperand - A description of a memory reference used in the backend.
static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, SDValue &LaneOp, int &LaneIdx)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static bool isPreferredADD(int64_t ImmOff)
Shift and rotation operations.
Definition: ISDOpcodes.h:332
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:407
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:69
APInt LLVM_ATTRIBUTE_UNUSED_RESULT lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.cpp:1142
Reg
All possible values of the reg field in the ModR/M byte.
SimpleValueType SimpleTy
bool isSized(SmallPtrSetImpl< const Type * > *Visited=nullptr) const
isSized - Return true if it makes sense to take the size of this type.
Definition: Type.h:268
EVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
Definition: ValueTypes.h:210
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:216
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:191
bool isMask_64(uint64_t Value)
isMask_64 - This function returns true if the argument is a non-empty sequence of ones starting at th...
Definition: MathExtras.h:335
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: ArrayRef.h:31
APInt LLVM_ATTRIBUTE_UNUSED_RESULT shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:868
EVT getMemoryVT() const
Return the type of the in-memory value.
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:142
Type * getElementType() const
Definition: DerivedTypes.h:323
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:134
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
SDNode * getNode() const
get the SDNode which holds the desired result
IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
Definition: TargetOpcodes.h:52
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
A self-contained host- and target-independent arbitrary-precision floating-point software implementat...
Definition: APFloat.h:122
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:157
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:85
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:109
MVT - Machine Value Type.
const SDValue & getOperand(unsigned i) const
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:780
const Constant * getConstVal() const
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
INSERT_SUBREG - This instruction takes three operands: a register that has subregisters, a register providing an insert value, and a subregister index.
Definition: TargetOpcodes.h:49
SDValue getTargetConstant(uint64_t Val, SDLoc DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:436
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1273
iterator begin() const
Definition: ArrayRef.h:122
unsigned getOpcode() const
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32...
EVT - Extended Value Type.
Definition: ValueTypes.h:31
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
bool isShiftedMask_64(uint64_t Value)
isShiftedMask_64 - This function returns true if the argument contains a non-empty sequence of ones w...
Definition: MathExtras.h:348
REG_SEQUENCE - This variadic instruction is used to form a register that represents a consecutive seq...
Definition: TargetOpcodes.h:82
const SDValue & getOffset() const
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.cpp:749
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:674
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:936
EXTRACT_SUBREG - This instruction takes two operands: a register that has subregisters, and a subregister index.
Definition: TargetOpcodes.h:41
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:273
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:179
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
unsigned Log2_32(uint32_t Value)
Log2_32 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
Definition: MathExtras.h:468
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Class for arbitrary precision integers.
Definition: APInt.h:73
iterator_range< use_iterator > uses()
int64_t getSExtValue() const
op_iterator op_begin() const
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:383
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:386
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOpt::Level OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG...
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:498
uint32_t fromString(StringRef Name, const FeatureBitset &FeatureBits, bool &Valid) const
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:185
static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx)
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:321
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:217
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static int getIntOperandFromRegisterString(StringRef RegString)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:401
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:518
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:233
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
MachineSDNode * getMachineNode(unsigned Opcode, SDLoc dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
uint32_t fromString(StringRef Name, const FeatureBitset &FeatureBits, bool &Valid) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:135
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
const MDNode * getMD() const
unsigned getAlignment() const
Definition: Globals.cpp:63
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
SDValue getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
#define DEBUG(X)
Definition: Debug.h:92
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1361
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
A single uniqued string.
Definition: Metadata.h:508
SUBREG_TO_REG - This instruction is similar to INSERT_SUBREG except that the first operand is an imme...
Definition: TargetOpcodes.h:58
Conversion operators.
Definition: ISDOpcodes.h:380
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:666
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:389
static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst, SDValue &Src, unsigned &ImmR, unsigned &ImmS, SelectionDAG *CurDAG)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
bool is64BitVector() const
is64BitVector - Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:130
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, SDValue &Src, int &ShiftAmount, int &MaskWidth)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL...
std::size_t countLeadingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the most significant bit to the first zero bit.
Definition: MathExtras.h:391
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:662
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:203
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
uint64_t getZExtValue() const
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:761
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:225
This class is used to represent ISD::LOAD nodes.
Function must be optimized for size first.
Definition: Attributes.h:80