LLVM  6.0.0svn
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the AArch64 target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64TargetMachine.h"
16 #include "llvm/ADT/APSInt.h"
18 #include "llvm/IR/Function.h" // To access function attributes.
19 #include "llvm/IR/GlobalValue.h"
20 #include "llvm/IR/Intrinsics.h"
21 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/KnownBits.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "aarch64-isel"
30 
31 //===--------------------------------------------------------------------===//
32 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
33 /// instructions for SelectionDAG operations.
34 ///
35 namespace {
36 
37 class AArch64DAGToDAGISel : public SelectionDAGISel {
38 
39  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
40  /// make the right decision when generating code for different targets.
41  const AArch64Subtarget *Subtarget;
42 
43  bool ForCodeSize;
44 
45 public:
46  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
47  CodeGenOpt::Level OptLevel)
48  : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
49  ForCodeSize(false) {}
50 
51  StringRef getPassName() const override {
52  return "AArch64 Instruction Selection";
53  }
54 
55  bool runOnMachineFunction(MachineFunction &MF) override {
56  ForCodeSize = MF.getFunction()->optForSize();
57  Subtarget = &MF.getSubtarget<AArch64Subtarget>();
59  }
60 
61  void Select(SDNode *Node) override;
62 
63  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
64  /// inline asm expressions.
65  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
66  unsigned ConstraintID,
67  std::vector<SDValue> &OutOps) override;
68 
69  bool tryMLAV64LaneV128(SDNode *N);
70  bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
71  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
72  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
74  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
75  return SelectShiftedRegister(N, false, Reg, Shift);
76  }
77  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
78  return SelectShiftedRegister(N, true, Reg, Shift);
79  }
80  bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
81  return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
82  }
83  bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
84  return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
85  }
86  bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
87  return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
88  }
89  bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
90  return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
91  }
92  bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
93  return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
94  }
95  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
96  return SelectAddrModeIndexed(N, 1, Base, OffImm);
97  }
98  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
99  return SelectAddrModeIndexed(N, 2, Base, OffImm);
100  }
101  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
102  return SelectAddrModeIndexed(N, 4, Base, OffImm);
103  }
104  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
105  return SelectAddrModeIndexed(N, 8, Base, OffImm);
106  }
107  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
108  return SelectAddrModeIndexed(N, 16, Base, OffImm);
109  }
110  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
111  return SelectAddrModeUnscaled(N, 1, Base, OffImm);
112  }
113  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
114  return SelectAddrModeUnscaled(N, 2, Base, OffImm);
115  }
116  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
117  return SelectAddrModeUnscaled(N, 4, Base, OffImm);
118  }
119  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
120  return SelectAddrModeUnscaled(N, 8, Base, OffImm);
121  }
122  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
123  return SelectAddrModeUnscaled(N, 16, Base, OffImm);
124  }
125 
126  template<int Width>
127  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
128  SDValue &SignExtend, SDValue &DoShift) {
129  return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
130  }
131 
132  template<int Width>
133  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
134  SDValue &SignExtend, SDValue &DoShift) {
135  return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
136  }
137 
138 
139  /// Form sequences of consecutive 64/128-bit registers for use in NEON
140  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
141  /// between 1 and 4 elements. If it contains a single element that is returned
142  /// unchanged; otherwise a REG_SEQUENCE value is returned.
143  SDValue createDTuple(ArrayRef<SDValue> Vecs);
144  SDValue createQTuple(ArrayRef<SDValue> Vecs);
145 
146  /// Generic helper for the createDTuple/createQTuple
147  /// functions. Those should almost always be called instead.
148  SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
149  const unsigned SubRegs[]);
150 
151  void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
152 
153  bool tryIndexedLoad(SDNode *N);
154 
155  void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
156  unsigned SubRegIdx);
157  void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
158  unsigned SubRegIdx);
159  void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
160  void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
161 
162  void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
163  void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
164  void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
165  void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
166 
167  bool tryBitfieldExtractOp(SDNode *N);
168  bool tryBitfieldExtractOpFromSExt(SDNode *N);
169  bool tryBitfieldInsertOp(SDNode *N);
170  bool tryBitfieldInsertInZeroOp(SDNode *N);
171 
172  bool tryReadRegister(SDNode *N);
173  bool tryWriteRegister(SDNode *N);
174 
175 // Include the pieces autogenerated from the target description.
176 #include "AArch64GenDAGISel.inc"
177 
178 private:
179  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
180  SDValue &Shift);
181  bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
182  SDValue &OffImm);
183  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
184  SDValue &OffImm);
185  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
186  SDValue &OffImm);
187  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
188  SDValue &Offset, SDValue &SignExtend,
189  SDValue &DoShift);
190  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
191  SDValue &Offset, SDValue &SignExtend,
192  SDValue &DoShift);
193  bool isWorthFolding(SDValue V) const;
194  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
195  SDValue &Offset, SDValue &SignExtend);
196 
197  template<unsigned RegWidth>
198  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
199  return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
200  }
201 
202  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
203 
204  bool SelectCMP_SWAP(SDNode *N);
205 
206 };
207 } // end anonymous namespace
208 
209 /// isIntImmediate - This method tests to see if the node is a constant
210 /// operand. If so Imm will receive the 32-bit value.
211 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
212  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
213  Imm = C->getZExtValue();
214  return true;
215  }
216  return false;
217 }
218 
219 // isIntImmediate - This method tests to see if a constant operand.
220 // If so Imm will receive the value.
221 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
222  return isIntImmediate(N.getNode(), Imm);
223 }
224 
225 // isOpcWithIntImmediate - This method tests to see if the node is a specific
226 // opcode and that it has a immediate integer right operand.
227 // If so Imm will receive the 32 bit value.
228 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
229  uint64_t &Imm) {
230  return N->getOpcode() == Opc &&
231  isIntImmediate(N->getOperand(1).getNode(), Imm);
232 }
233 
234 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
235  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
236  switch(ConstraintID) {
237  default:
238  llvm_unreachable("Unexpected asm memory constraint");
242  // We need to make sure that this one operand does not end up in XZR, thus
243  // require the address to be in a PointerRegClass register.
244  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
245  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
246  SDLoc dl(Op);
247  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
248  SDValue NewOp =
249  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
250  dl, Op.getValueType(),
251  Op, RC), 0);
252  OutOps.push_back(NewOp);
253  return false;
254  }
255  return true;
256 }
257 
258 /// SelectArithImmed - Select an immediate value that can be represented as
259 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
260 /// Val set to the 12-bit value and Shift set to the shifter operand.
261 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
262  SDValue &Shift) {
263  // This function is called from the addsub_shifted_imm ComplexPattern,
264  // which lists [imm] as the list of opcode it's interested in, however
265  // we still need to check whether the operand is actually an immediate
266  // here because the ComplexPattern opcode list is only used in
267  // root-level opcode matching.
268  if (!isa<ConstantSDNode>(N.getNode()))
269  return false;
270 
271  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
272  unsigned ShiftAmt;
273 
274  if (Immed >> 12 == 0) {
275  ShiftAmt = 0;
276  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
277  ShiftAmt = 12;
278  Immed = Immed >> 12;
279  } else
280  return false;
281 
282  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
283  SDLoc dl(N);
284  Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
285  Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
286  return true;
287 }
288 
289 /// SelectNegArithImmed - As above, but negates the value before trying to
290 /// select it.
291 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
292  SDValue &Shift) {
293  // This function is called from the addsub_shifted_imm ComplexPattern,
294  // which lists [imm] as the list of opcode it's interested in, however
295  // we still need to check whether the operand is actually an immediate
296  // here because the ComplexPattern opcode list is only used in
297  // root-level opcode matching.
298  if (!isa<ConstantSDNode>(N.getNode()))
299  return false;
300 
301  // The immediate operand must be a 24-bit zero-extended immediate.
302  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
303 
304  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
305  // have the opposite effect on the C flag, so this pattern mustn't match under
306  // those circumstances.
307  if (Immed == 0)
308  return false;
309 
310  if (N.getValueType() == MVT::i32)
311  Immed = ~((uint32_t)Immed) + 1;
312  else
313  Immed = ~Immed + 1ULL;
314  if (Immed & 0xFFFFFFFFFF000000ULL)
315  return false;
316 
317  Immed &= 0xFFFFFFULL;
318  return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
319  Shift);
320 }
321 
322 /// getShiftTypeForNode - Translate a shift node to the corresponding
323 /// ShiftType value.
325  switch (N.getOpcode()) {
326  default:
328  case ISD::SHL:
329  return AArch64_AM::LSL;
330  case ISD::SRL:
331  return AArch64_AM::LSR;
332  case ISD::SRA:
333  return AArch64_AM::ASR;
334  case ISD::ROTR:
335  return AArch64_AM::ROR;
336  }
337 }
338 
339 /// \brief Determine whether it is worth it to fold SHL into the addressing
340 /// mode.
341 static bool isWorthFoldingSHL(SDValue V) {
342  assert(V.getOpcode() == ISD::SHL && "invalid opcode");
343  // It is worth folding logical shift of up to three places.
344  auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
345  if (!CSD)
346  return false;
347  unsigned ShiftVal = CSD->getZExtValue();
348  if (ShiftVal > 3)
349  return false;
350 
351  // Check if this particular node is reused in any non-memory related
352  // operation. If yes, do not try to fold this node into the address
353  // computation, since the computation will be kept.
354  const SDNode *Node = V.getNode();
355  for (SDNode *UI : Node->uses())
356  if (!isa<MemSDNode>(*UI))
357  for (SDNode *UII : UI->uses())
358  if (!isa<MemSDNode>(*UII))
359  return false;
360  return true;
361 }
362 
363 /// \brief Determine whether it is worth to fold V into an extended register.
364 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
365  // Trivial if we are optimizing for code size or if there is only
366  // one use of the value.
367  if (ForCodeSize || V.hasOneUse())
368  return true;
369  // If a subtarget has a fastpath LSL we can fold a logical shift into
370  // the addressing mode and save a cycle.
371  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
373  return true;
374  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
375  const SDValue LHS = V.getOperand(0);
376  const SDValue RHS = V.getOperand(1);
377  if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
378  return true;
379  if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
380  return true;
381  }
382 
383  // It hurts otherwise, since the value will be reused.
384  return false;
385 }
386 
387 /// SelectShiftedRegister - Select a "shifted register" operand. If the value
388 /// is not shifted, set the Shift operand to default of "LSL 0". The logical
389 /// instructions allow the shifted register to be rotated, but the arithmetic
390 /// instructions do not. The AllowROR parameter specifies whether ROR is
391 /// supported.
392 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
393  SDValue &Reg, SDValue &Shift) {
395  if (ShType == AArch64_AM::InvalidShiftExtend)
396  return false;
397  if (!AllowROR && ShType == AArch64_AM::ROR)
398  return false;
399 
400  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
401  unsigned BitSize = N.getValueSizeInBits();
402  unsigned Val = RHS->getZExtValue() & (BitSize - 1);
403  unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
404 
405  Reg = N.getOperand(0);
406  Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
407  return isWorthFolding(N);
408  }
409 
410  return false;
411 }
412 
413 /// getExtendTypeForNode - Translate an extend node to the corresponding
414 /// ExtendType value.
416 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
417  if (N.getOpcode() == ISD::SIGN_EXTEND ||
419  EVT SrcVT;
421  SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
422  else
423  SrcVT = N.getOperand(0).getValueType();
424 
425  if (!IsLoadStore && SrcVT == MVT::i8)
426  return AArch64_AM::SXTB;
427  else if (!IsLoadStore && SrcVT == MVT::i16)
428  return AArch64_AM::SXTH;
429  else if (SrcVT == MVT::i32)
430  return AArch64_AM::SXTW;
431  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
432 
434  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
435  N.getOpcode() == ISD::ANY_EXTEND) {
436  EVT SrcVT = N.getOperand(0).getValueType();
437  if (!IsLoadStore && SrcVT == MVT::i8)
438  return AArch64_AM::UXTB;
439  else if (!IsLoadStore && SrcVT == MVT::i16)
440  return AArch64_AM::UXTH;
441  else if (SrcVT == MVT::i32)
442  return AArch64_AM::UXTW;
443  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
444 
446  } else if (N.getOpcode() == ISD::AND) {
448  if (!CSD)
450  uint64_t AndMask = CSD->getZExtValue();
451 
452  switch (AndMask) {
453  default:
455  case 0xFF:
456  return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
457  case 0xFFFF:
458  return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
459  case 0xFFFFFFFF:
460  return AArch64_AM::UXTW;
461  }
462  }
463 
465 }
466 
467 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
468 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
469  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
471  return false;
472 
473  SDValue SV = DL->getOperand(0);
474  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
475  return false;
476 
477  SDValue EV = SV.getOperand(1);
478  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
479  return false;
480 
481  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
482  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
483  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
484  LaneOp = EV.getOperand(0);
485 
486  return true;
487 }
488 
489 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
490 // high lane extract.
491 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
492  SDValue &LaneOp, int &LaneIdx) {
493 
494  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
495  std::swap(Op0, Op1);
496  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
497  return false;
498  }
499  StdOp = Op1;
500  return true;
501 }
502 
503 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
504 /// is a lane in the upper half of a 128-bit vector. Recognize and select this
505 /// so that we don't emit unnecessary lane extracts.
506 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
507  SDLoc dl(N);
508  SDValue Op0 = N->getOperand(0);
509  SDValue Op1 = N->getOperand(1);
510  SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
511  SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
512  int LaneIdx = -1; // Will hold the lane index.
513 
514  if (Op1.getOpcode() != ISD::MUL ||
515  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
516  LaneIdx)) {
517  std::swap(Op0, Op1);
518  if (Op1.getOpcode() != ISD::MUL ||
519  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
520  LaneIdx))
521  return false;
522  }
523 
524  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
525 
526  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
527 
528  unsigned MLAOpc = ~0U;
529 
530  switch (N->getSimpleValueType(0).SimpleTy) {
531  default:
532  llvm_unreachable("Unrecognized MLA.");
533  case MVT::v4i16:
534  MLAOpc = AArch64::MLAv4i16_indexed;
535  break;
536  case MVT::v8i16:
537  MLAOpc = AArch64::MLAv8i16_indexed;
538  break;
539  case MVT::v2i32:
540  MLAOpc = AArch64::MLAv2i32_indexed;
541  break;
542  case MVT::v4i32:
543  MLAOpc = AArch64::MLAv4i32_indexed;
544  break;
545  }
546 
547  ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
548  return true;
549 }
550 
551 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
552  SDLoc dl(N);
553  SDValue SMULLOp0;
554  SDValue SMULLOp1;
555  int LaneIdx;
556 
557  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
558  LaneIdx))
559  return false;
560 
561  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
562 
563  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
564 
565  unsigned SMULLOpc = ~0U;
566 
567  if (IntNo == Intrinsic::aarch64_neon_smull) {
568  switch (N->getSimpleValueType(0).SimpleTy) {
569  default:
570  llvm_unreachable("Unrecognized SMULL.");
571  case MVT::v4i32:
572  SMULLOpc = AArch64::SMULLv4i16_indexed;
573  break;
574  case MVT::v2i64:
575  SMULLOpc = AArch64::SMULLv2i32_indexed;
576  break;
577  }
578  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
579  switch (N->getSimpleValueType(0).SimpleTy) {
580  default:
581  llvm_unreachable("Unrecognized SMULL.");
582  case MVT::v4i32:
583  SMULLOpc = AArch64::UMULLv4i16_indexed;
584  break;
585  case MVT::v2i64:
586  SMULLOpc = AArch64::UMULLv2i32_indexed;
587  break;
588  }
589  } else
590  llvm_unreachable("Unrecognized intrinsic.");
591 
592  ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
593  return true;
594 }
595 
596 /// Instructions that accept extend modifiers like UXTW expect the register
597 /// being extended to be a GPR32, but the incoming DAG might be acting on a
598 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
599 /// this is the case.
601  if (N.getValueType() == MVT::i32)
602  return N;
603 
604  SDLoc dl(N);
605  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
606  MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
607  dl, MVT::i32, N, SubReg);
608  return SDValue(Node, 0);
609 }
610 
611 
612 /// SelectArithExtendedRegister - Select a "extended register" operand. This
613 /// operand folds in an extend followed by an optional left shift.
614 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
615  SDValue &Shift) {
616  unsigned ShiftVal = 0;
618 
619  if (N.getOpcode() == ISD::SHL) {
621  if (!CSD)
622  return false;
623  ShiftVal = CSD->getZExtValue();
624  if (ShiftVal > 4)
625  return false;
626 
627  Ext = getExtendTypeForNode(N.getOperand(0));
629  return false;
630 
631  Reg = N.getOperand(0).getOperand(0);
632  } else {
633  Ext = getExtendTypeForNode(N);
635  return false;
636 
637  Reg = N.getOperand(0);
638 
639  // Don't match if free 32-bit -> 64-bit zext can be used instead.
640  if (Ext == AArch64_AM::UXTW &&
641  Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
642  return false;
643  }
644 
645  // AArch64 mandates that the RHS of the operation must use the smallest
646  // register class that could contain the size being extended from. Thus,
647  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
648  // there might not be an actual 32-bit value in the program. We can
649  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
650  assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
651  Reg = narrowIfNeeded(CurDAG, Reg);
652  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
653  MVT::i32);
654  return isWorthFolding(N);
655 }
656 
657 /// If there's a use of this ADDlow that's not itself a load/store then we'll
658 /// need to create a real ADD instruction from it anyway and there's no point in
659 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
660 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
661 /// leads to duplicated ADRP instructions.
663  for (auto Use : N->uses()) {
664  if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
665  Use->getOpcode() != ISD::ATOMIC_LOAD &&
666  Use->getOpcode() != ISD::ATOMIC_STORE)
667  return false;
668 
669  // ldar and stlr have much more restrictive addressing modes (just a
670  // register).
671  if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering()))
672  return false;
673  }
674 
675  return true;
676 }
677 
678 /// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
679 /// immediate" address. The "Size" argument is the size in bytes of the memory
680 /// reference, which determines the scale.
681 bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
682  SDValue &Base,
683  SDValue &OffImm) {
684  SDLoc dl(N);
685  const DataLayout &DL = CurDAG->getDataLayout();
686  const TargetLowering *TLI = getTargetLowering();
687  if (N.getOpcode() == ISD::FrameIndex) {
688  int FI = cast<FrameIndexSDNode>(N)->getIndex();
689  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
690  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
691  return true;
692  }
693 
694  // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
695  // selected here doesn't support labels/immediates, only base+offset.
696 
697  if (CurDAG->isBaseWithConstantOffset(N)) {
698  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
699  int64_t RHSC = RHS->getSExtValue();
700  unsigned Scale = Log2_32(Size);
701  if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) &&
702  RHSC < (0x40 << Scale)) {
703  Base = N.getOperand(0);
704  if (Base.getOpcode() == ISD::FrameIndex) {
705  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
706  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
707  }
708  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
709  return true;
710  }
711  }
712  }
713 
714  // Base only. The address will be materialized into a register before
715  // the memory is accessed.
716  // add x0, Xbase, #offset
717  // stp x1, x2, [x0]
718  Base = N;
719  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
720  return true;
721 }
722 
723 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
724 /// immediate" address. The "Size" argument is the size in bytes of the memory
725 /// reference, which determines the scale.
726 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
727  SDValue &Base, SDValue &OffImm) {
728  SDLoc dl(N);
729  const DataLayout &DL = CurDAG->getDataLayout();
730  const TargetLowering *TLI = getTargetLowering();
731  if (N.getOpcode() == ISD::FrameIndex) {
732  int FI = cast<FrameIndexSDNode>(N)->getIndex();
733  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
734  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
735  return true;
736  }
737 
739  GlobalAddressSDNode *GAN =
741  Base = N.getOperand(0);
742  OffImm = N.getOperand(1);
743  if (!GAN)
744  return true;
745 
746  const GlobalValue *GV = GAN->getGlobal();
747  unsigned Alignment = GV->getAlignment();
748  Type *Ty = GV->getValueType();
749  if (Alignment == 0 && Ty->isSized())
750  Alignment = DL.getABITypeAlignment(Ty);
751 
752  if (Alignment >= Size)
753  return true;
754  }
755 
756  if (CurDAG->isBaseWithConstantOffset(N)) {
757  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
758  int64_t RHSC = (int64_t)RHS->getZExtValue();
759  unsigned Scale = Log2_32(Size);
760  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
761  Base = N.getOperand(0);
762  if (Base.getOpcode() == ISD::FrameIndex) {
763  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
764  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
765  }
766  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
767  return true;
768  }
769  }
770  }
771 
772  // Before falling back to our general case, check if the unscaled
773  // instructions can handle this. If so, that's preferable.
774  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
775  return false;
776 
777  // Base only. The address will be materialized into a register before
778  // the memory is accessed.
779  // add x0, Xbase, #offset
780  // ldr x0, [x0]
781  Base = N;
782  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
783  return true;
784 }
785 
786 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
787 /// immediate" address. This should only match when there is an offset that
788 /// is not valid for a scaled immediate addressing mode. The "Size" argument
789 /// is the size in bytes of the memory reference, which is needed here to know
790 /// what is valid for a scaled immediate.
791 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
792  SDValue &Base,
793  SDValue &OffImm) {
794  if (!CurDAG->isBaseWithConstantOffset(N))
795  return false;
796  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
797  int64_t RHSC = RHS->getSExtValue();
798  // If the offset is valid as a scaled immediate, don't match here.
799  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
800  RHSC < (0x1000 << Log2_32(Size)))
801  return false;
802  if (RHSC >= -256 && RHSC < 256) {
803  Base = N.getOperand(0);
804  if (Base.getOpcode() == ISD::FrameIndex) {
805  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
806  const TargetLowering *TLI = getTargetLowering();
807  Base = CurDAG->getTargetFrameIndex(
808  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
809  }
810  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
811  return true;
812  }
813  }
814  return false;
815 }
816 
817 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
818  SDLoc dl(N);
819  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
820  SDValue ImpDef = SDValue(
821  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
822  MachineSDNode *Node = CurDAG->getMachineNode(
823  TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
824  return SDValue(Node, 0);
825 }
826 
827 /// \brief Check if the given SHL node (\p N), can be used to form an
828 /// extended register for an addressing mode.
829 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
830  bool WantExtend, SDValue &Offset,
831  SDValue &SignExtend) {
832  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
834  if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
835  return false;
836 
837  SDLoc dl(N);
838  if (WantExtend) {
840  getExtendTypeForNode(N.getOperand(0), true);
842  return false;
843 
844  Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
845  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
846  MVT::i32);
847  } else {
848  Offset = N.getOperand(0);
849  SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
850  }
851 
852  unsigned LegalShiftVal = Log2_32(Size);
853  unsigned ShiftVal = CSD->getZExtValue();
854 
855  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
856  return false;
857 
858  return isWorthFolding(N);
859 }
860 
861 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
862  SDValue &Base, SDValue &Offset,
863  SDValue &SignExtend,
864  SDValue &DoShift) {
865  if (N.getOpcode() != ISD::ADD)
866  return false;
867  SDValue LHS = N.getOperand(0);
868  SDValue RHS = N.getOperand(1);
869  SDLoc dl(N);
870 
871  // We don't want to match immediate adds here, because they are better lowered
872  // to the register-immediate addressing modes.
873  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
874  return false;
875 
876  // Check if this particular node is reused in any non-memory related
877  // operation. If yes, do not try to fold this node into the address
878  // computation, since the computation will be kept.
879  const SDNode *Node = N.getNode();
880  for (SDNode *UI : Node->uses()) {
881  if (!isa<MemSDNode>(*UI))
882  return false;
883  }
884 
885  // Remember if it is worth folding N when it produces extended register.
886  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
887 
888  // Try to match a shifted extend on the RHS.
889  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
890  SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
891  Base = LHS;
892  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
893  return true;
894  }
895 
896  // Try to match a shifted extend on the LHS.
897  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
898  SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
899  Base = RHS;
900  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
901  return true;
902  }
903 
904  // There was no shift, whatever else we find.
905  DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
906 
908  // Try to match an unshifted extend on the LHS.
909  if (IsExtendedRegisterWorthFolding &&
910  (Ext = getExtendTypeForNode(LHS, true)) !=
912  Base = RHS;
913  Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
914  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
915  MVT::i32);
916  if (isWorthFolding(LHS))
917  return true;
918  }
919 
920  // Try to match an unshifted extend on the RHS.
921  if (IsExtendedRegisterWorthFolding &&
922  (Ext = getExtendTypeForNode(RHS, true)) !=
924  Base = LHS;
925  Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
926  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
927  MVT::i32);
928  if (isWorthFolding(RHS))
929  return true;
930  }
931 
932  return false;
933 }
934 
935 // Check if the given immediate is preferred by ADD. If an immediate can be
936 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
937 // encoded by one MOVZ, return true.
938 static bool isPreferredADD(int64_t ImmOff) {
939  // Constant in [0x0, 0xfff] can be encoded in ADD.
940  if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
941  return true;
942  // Check if it can be encoded in an "ADD LSL #12".
943  if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
944  // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
945  return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
946  (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
947  return false;
948 }
949 
950 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
951  SDValue &Base, SDValue &Offset,
952  SDValue &SignExtend,
953  SDValue &DoShift) {
954  if (N.getOpcode() != ISD::ADD)
955  return false;
956  SDValue LHS = N.getOperand(0);
957  SDValue RHS = N.getOperand(1);
958  SDLoc DL(N);
959 
960  // Check if this particular node is reused in any non-memory related
961  // operation. If yes, do not try to fold this node into the address
962  // computation, since the computation will be kept.
963  const SDNode *Node = N.getNode();
964  for (SDNode *UI : Node->uses()) {
965  if (!isa<MemSDNode>(*UI))
966  return false;
967  }
968 
969  // Watch out if RHS is a wide immediate, it can not be selected into
970  // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
971  // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
972  // instructions like:
973  // MOV X0, WideImmediate
974  // ADD X1, BaseReg, X0
975  // LDR X2, [X1, 0]
976  // For such situation, using [BaseReg, XReg] addressing mode can save one
977  // ADD/SUB:
978  // MOV X0, WideImmediate
979  // LDR X2, [BaseReg, X0]
980  if (isa<ConstantSDNode>(RHS)) {
981  int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
982  unsigned Scale = Log2_32(Size);
983  // Skip the immediate can be selected by load/store addressing mode.
984  // Also skip the immediate can be encoded by a single ADD (SUB is also
985  // checked by using -ImmOff).
986  if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
987  isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
988  return false;
989 
990  SDValue Ops[] = { RHS };
991  SDNode *MOVI =
992  CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
993  SDValue MOVIV = SDValue(MOVI, 0);
994  // This ADD of two X register will be selected into [Reg+Reg] mode.
995  N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
996  }
997 
998  // Remember if it is worth folding N when it produces extended register.
999  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1000 
1001  // Try to match a shifted extend on the RHS.
1002  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1003  SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1004  Base = LHS;
1005  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1006  return true;
1007  }
1008 
1009  // Try to match a shifted extend on the LHS.
1010  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1011  SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1012  Base = RHS;
1013  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1014  return true;
1015  }
1016 
1017  // Match any non-shifted, non-extend, non-immediate add expression.
1018  Base = LHS;
1019  Offset = RHS;
1020  SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1021  DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1022  // Reg1 + Reg2 is free: no check needed.
1023  return true;
1024 }
1025 
1026 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1027  static const unsigned RegClassIDs[] = {
1028  AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1029  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1030  AArch64::dsub2, AArch64::dsub3};
1031 
1032  return createTuple(Regs, RegClassIDs, SubRegs);
1033 }
1034 
1035 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1036  static const unsigned RegClassIDs[] = {
1037  AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1038  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1039  AArch64::qsub2, AArch64::qsub3};
1040 
1041  return createTuple(Regs, RegClassIDs, SubRegs);
1042 }
1043 
1044 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1045  const unsigned RegClassIDs[],
1046  const unsigned SubRegs[]) {
1047  // There's no special register-class for a vector-list of 1 element: it's just
1048  // a vector.
1049  if (Regs.size() == 1)
1050  return Regs[0];
1051 
1052  assert(Regs.size() >= 2 && Regs.size() <= 4);
1053 
1054  SDLoc DL(Regs[0]);
1055 
1057 
1058  // First operand of REG_SEQUENCE is the desired RegClass.
1059  Ops.push_back(
1060  CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1061 
1062  // Then we get pairs of source & subregister-position for the components.
1063  for (unsigned i = 0; i < Regs.size(); ++i) {
1064  Ops.push_back(Regs[i]);
1065  Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1066  }
1067 
1068  SDNode *N =
1069  CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1070  return SDValue(N, 0);
1071 }
1072 
1073 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1074  bool isExt) {
1075  SDLoc dl(N);
1076  EVT VT = N->getValueType(0);
1077 
1078  unsigned ExtOff = isExt;
1079 
1080  // Form a REG_SEQUENCE to force register allocation.
1081  unsigned Vec0Off = ExtOff + 1;
1082  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1083  N->op_begin() + Vec0Off + NumVecs);
1084  SDValue RegSeq = createQTuple(Regs);
1085 
1087  if (isExt)
1088  Ops.push_back(N->getOperand(1));
1089  Ops.push_back(RegSeq);
1090  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1091  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1092 }
1093 
1094 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1095  LoadSDNode *LD = cast<LoadSDNode>(N);
1096  if (LD->isUnindexed())
1097  return false;
1098  EVT VT = LD->getMemoryVT();
1099  EVT DstVT = N->getValueType(0);
1101  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1102 
1103  // We're not doing validity checking here. That was done when checking
1104  // if we should mark the load as indexed or not. We're just selecting
1105  // the right instruction.
1106  unsigned Opcode = 0;
1107 
1108  ISD::LoadExtType ExtType = LD->getExtensionType();
1109  bool InsertTo64 = false;
1110  if (VT == MVT::i64)
1111  Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1112  else if (VT == MVT::i32) {
1113  if (ExtType == ISD::NON_EXTLOAD)
1114  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1115  else if (ExtType == ISD::SEXTLOAD)
1116  Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1117  else {
1118  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1119  InsertTo64 = true;
1120  // The result of the load is only i32. It's the subreg_to_reg that makes
1121  // it into an i64.
1122  DstVT = MVT::i32;
1123  }
1124  } else if (VT == MVT::i16) {
1125  if (ExtType == ISD::SEXTLOAD) {
1126  if (DstVT == MVT::i64)
1127  Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1128  else
1129  Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1130  } else {
1131  Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1132  InsertTo64 = DstVT == MVT::i64;
1133  // The result of the load is only i32. It's the subreg_to_reg that makes
1134  // it into an i64.
1135  DstVT = MVT::i32;
1136  }
1137  } else if (VT == MVT::i8) {
1138  if (ExtType == ISD::SEXTLOAD) {
1139  if (DstVT == MVT::i64)
1140  Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1141  else
1142  Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1143  } else {
1144  Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1145  InsertTo64 = DstVT == MVT::i64;
1146  // The result of the load is only i32. It's the subreg_to_reg that makes
1147  // it into an i64.
1148  DstVT = MVT::i32;
1149  }
1150  } else if (VT == MVT::f16) {
1151  Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1152  } else if (VT == MVT::f32) {
1153  Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1154  } else if (VT == MVT::f64 || VT.is64BitVector()) {
1155  Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1156  } else if (VT.is128BitVector()) {
1157  Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1158  } else
1159  return false;
1160  SDValue Chain = LD->getChain();
1161  SDValue Base = LD->getBasePtr();
1162  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1163  int OffsetVal = (int)OffsetOp->getZExtValue();
1164  SDLoc dl(N);
1165  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1166  SDValue Ops[] = { Base, Offset, Chain };
1167  SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1168  MVT::Other, Ops);
1169  // Either way, we're replacing the node, so tell the caller that.
1170  SDValue LoadedVal = SDValue(Res, 1);
1171  if (InsertTo64) {
1172  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1173  LoadedVal =
1174  SDValue(CurDAG->getMachineNode(
1175  AArch64::SUBREG_TO_REG, dl, MVT::i64,
1176  CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1177  SubReg),
1178  0);
1179  }
1180 
1181  ReplaceUses(SDValue(N, 0), LoadedVal);
1182  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1183  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1184  CurDAG->RemoveDeadNode(N);
1185  return true;
1186 }
1187 
1188 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1189  unsigned SubRegIdx) {
1190  SDLoc dl(N);
1191  EVT VT = N->getValueType(0);
1192  SDValue Chain = N->getOperand(0);
1193 
1194  SDValue Ops[] = {N->getOperand(2), // Mem operand;
1195  Chain};
1196 
1197  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1198 
1199  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1200  SDValue SuperReg = SDValue(Ld, 0);
1201  for (unsigned i = 0; i < NumVecs; ++i)
1202  ReplaceUses(SDValue(N, i),
1203  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1204 
1205  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1206 
1207  // Transfer memoperands.
1208  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1209  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1210  cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
1211 
1212  CurDAG->RemoveDeadNode(N);
1213 }
1214 
1215 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1216  unsigned Opc, unsigned SubRegIdx) {
1217  SDLoc dl(N);
1218  EVT VT = N->getValueType(0);
1219  SDValue Chain = N->getOperand(0);
1220 
1221  SDValue Ops[] = {N->getOperand(1), // Mem operand
1222  N->getOperand(2), // Incremental
1223  Chain};
1224 
1225  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1227 
1228  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1229 
1230  // Update uses of write back register
1231  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1232 
1233  // Update uses of vector list
1234  SDValue SuperReg = SDValue(Ld, 1);
1235  if (NumVecs == 1)
1236  ReplaceUses(SDValue(N, 0), SuperReg);
1237  else
1238  for (unsigned i = 0; i < NumVecs; ++i)
1239  ReplaceUses(SDValue(N, i),
1240  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1241 
1242  // Update the chain
1243  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1244  CurDAG->RemoveDeadNode(N);
1245 }
1246 
1247 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1248  unsigned Opc) {
1249  SDLoc dl(N);
1250  EVT VT = N->getOperand(2)->getValueType(0);
1251 
1252  // Form a REG_SEQUENCE to force register allocation.
1253  bool Is128Bit = VT.getSizeInBits() == 128;
1254  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1255  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1256 
1257  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1258  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1259 
1260  // Transfer memoperands.
1261  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1262  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1263  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1264 
1265  ReplaceNode(N, St);
1266 }
1267 
1268 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1269  unsigned Opc) {
1270  SDLoc dl(N);
1271  EVT VT = N->getOperand(2)->getValueType(0);
1272  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1273  MVT::Other}; // Type for the Chain
1274 
1275  // Form a REG_SEQUENCE to force register allocation.
1276  bool Is128Bit = VT.getSizeInBits() == 128;
1277  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1278  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1279 
1280  SDValue Ops[] = {RegSeq,
1281  N->getOperand(NumVecs + 1), // base register
1282  N->getOperand(NumVecs + 2), // Incremental
1283  N->getOperand(0)}; // Chain
1284  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1285 
1286  ReplaceNode(N, St);
1287 }
1288 
1289 namespace {
1290 /// WidenVector - Given a value in the V64 register class, produce the
1291 /// equivalent value in the V128 register class.
1292 class WidenVector {
1293  SelectionDAG &DAG;
1294 
1295 public:
1296  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1297 
1298  SDValue operator()(SDValue V64Reg) {
1299  EVT VT = V64Reg.getValueType();
1300  unsigned NarrowSize = VT.getVectorNumElements();
1301  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1302  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1303  SDLoc DL(V64Reg);
1304 
1305  SDValue Undef =
1306  SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1307  return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1308  }
1309 };
1310 } // namespace
1311 
1312 /// NarrowVector - Given a value in the V128 register class, produce the
1313 /// equivalent value in the V64 register class.
1314 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1315  EVT VT = V128Reg.getValueType();
1316  unsigned WideSize = VT.getVectorNumElements();
1317  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1318  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1319 
1320  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1321  V128Reg);
1322 }
1323 
1324 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1325  unsigned Opc) {
1326  SDLoc dl(N);
1327  EVT VT = N->getValueType(0);
1328  bool Narrow = VT.getSizeInBits() == 64;
1329 
1330  // Form a REG_SEQUENCE to force register allocation.
1331  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1332 
1333  if (Narrow)
1334  transform(Regs, Regs.begin(),
1335  WidenVector(*CurDAG));
1336 
1337  SDValue RegSeq = createQTuple(Regs);
1338 
1339  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1340 
1341  unsigned LaneNo =
1342  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1343 
1344  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1345  N->getOperand(NumVecs + 3), N->getOperand(0)};
1346  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1347  SDValue SuperReg = SDValue(Ld, 0);
1348 
1349  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1350  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1351  AArch64::qsub2, AArch64::qsub3 };
1352  for (unsigned i = 0; i < NumVecs; ++i) {
1353  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1354  if (Narrow)
1355  NV = NarrowVector(NV, *CurDAG);
1356  ReplaceUses(SDValue(N, i), NV);
1357  }
1358 
1359  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1360  CurDAG->RemoveDeadNode(N);
1361 }
1362 
1363 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1364  unsigned Opc) {
1365  SDLoc dl(N);
1366  EVT VT = N->getValueType(0);
1367  bool Narrow = VT.getSizeInBits() == 64;
1368 
1369  // Form a REG_SEQUENCE to force register allocation.
1370  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1371 
1372  if (Narrow)
1373  transform(Regs, Regs.begin(),
1374  WidenVector(*CurDAG));
1375 
1376  SDValue RegSeq = createQTuple(Regs);
1377 
1378  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1379  RegSeq->getValueType(0), MVT::Other};
1380 
1381  unsigned LaneNo =
1382  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1383 
1384  SDValue Ops[] = {RegSeq,
1385  CurDAG->getTargetConstant(LaneNo, dl,
1386  MVT::i64), // Lane Number
1387  N->getOperand(NumVecs + 2), // Base register
1388  N->getOperand(NumVecs + 3), // Incremental
1389  N->getOperand(0)};
1390  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1391 
1392  // Update uses of the write back register
1393  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1394 
1395  // Update uses of the vector list
1396  SDValue SuperReg = SDValue(Ld, 1);
1397  if (NumVecs == 1) {
1398  ReplaceUses(SDValue(N, 0),
1399  Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1400  } else {
1401  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1402  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1403  AArch64::qsub2, AArch64::qsub3 };
1404  for (unsigned i = 0; i < NumVecs; ++i) {
1405  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1406  SuperReg);
1407  if (Narrow)
1408  NV = NarrowVector(NV, *CurDAG);
1409  ReplaceUses(SDValue(N, i), NV);
1410  }
1411  }
1412 
1413  // Update the Chain
1414  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1415  CurDAG->RemoveDeadNode(N);
1416 }
1417 
1418 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1419  unsigned Opc) {
1420  SDLoc dl(N);
1421  EVT VT = N->getOperand(2)->getValueType(0);
1422  bool Narrow = VT.getSizeInBits() == 64;
1423 
1424  // Form a REG_SEQUENCE to force register allocation.
1425  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1426 
1427  if (Narrow)
1428  transform(Regs, Regs.begin(),
1429  WidenVector(*CurDAG));
1430 
1431  SDValue RegSeq = createQTuple(Regs);
1432 
1433  unsigned LaneNo =
1434  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1435 
1436  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1437  N->getOperand(NumVecs + 3), N->getOperand(0)};
1438  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1439 
1440  // Transfer memoperands.
1441  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1442  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1443  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1444 
1445  ReplaceNode(N, St);
1446 }
1447 
1448 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1449  unsigned Opc) {
1450  SDLoc dl(N);
1451  EVT VT = N->getOperand(2)->getValueType(0);
1452  bool Narrow = VT.getSizeInBits() == 64;
1453 
1454  // Form a REG_SEQUENCE to force register allocation.
1455  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1456 
1457  if (Narrow)
1458  transform(Regs, Regs.begin(),
1459  WidenVector(*CurDAG));
1460 
1461  SDValue RegSeq = createQTuple(Regs);
1462 
1463  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1464  MVT::Other};
1465 
1466  unsigned LaneNo =
1467  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1468 
1469  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1470  N->getOperand(NumVecs + 2), // Base Register
1471  N->getOperand(NumVecs + 3), // Incremental
1472  N->getOperand(0)};
1473  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1474 
1475  // Transfer memoperands.
1476  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1477  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1478  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
1479 
1480  ReplaceNode(N, St);
1481 }
1482 
1484  unsigned &Opc, SDValue &Opd0,
1485  unsigned &LSB, unsigned &MSB,
1486  unsigned NumberOfIgnoredLowBits,
1487  bool BiggerPattern) {
1488  assert(N->getOpcode() == ISD::AND &&
1489  "N must be a AND operation to call this function");
1490 
1491  EVT VT = N->getValueType(0);
1492 
1493  // Here we can test the type of VT and return false when the type does not
1494  // match, but since it is done prior to that call in the current context
1495  // we turned that into an assert to avoid redundant code.
1496  assert((VT == MVT::i32 || VT == MVT::i64) &&
1497  "Type checking must have been done before calling this function");
1498 
1499  // FIXME: simplify-demanded-bits in DAGCombine will probably have
1500  // changed the AND node to a 32-bit mask operation. We'll have to
1501  // undo that as part of the transform here if we want to catch all
1502  // the opportunities.
1503  // Currently the NumberOfIgnoredLowBits argument helps to recover
1504  // form these situations when matching bigger pattern (bitfield insert).
1505 
1506  // For unsigned extracts, check for a shift right and mask
1507  uint64_t AndImm = 0;
1508  if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
1509  return false;
1510 
1511  const SDNode *Op0 = N->getOperand(0).getNode();
1512 
1513  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1514  // simplified. Try to undo that
1515  AndImm |= (1 << NumberOfIgnoredLowBits) - 1;
1516 
1517  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1518  if (AndImm & (AndImm + 1))
1519  return false;
1520 
1521  bool ClampMSB = false;
1522  uint64_t SrlImm = 0;
1523  // Handle the SRL + ANY_EXTEND case.
1524  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1525  isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
1526  // Extend the incoming operand of the SRL to 64-bit.
1527  Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1528  // Make sure to clamp the MSB so that we preserve the semantics of the
1529  // original operations.
1530  ClampMSB = true;
1531  } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1533  SrlImm)) {
1534  // If the shift result was truncated, we can still combine them.
1535  Opd0 = Op0->getOperand(0).getOperand(0);
1536 
1537  // Use the type of SRL node.
1538  VT = Opd0->getValueType(0);
1539  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
1540  Opd0 = Op0->getOperand(0);
1541  } else if (BiggerPattern) {
1542  // Let's pretend a 0 shift right has been performed.
1543  // The resulting code will be at least as good as the original one
1544  // plus it may expose more opportunities for bitfield insert pattern.
1545  // FIXME: Currently we limit this to the bigger pattern, because
1546  // some optimizations expect AND and not UBFM.
1547  Opd0 = N->getOperand(0);
1548  } else
1549  return false;
1550 
1551  // Bail out on large immediates. This happens when no proper
1552  // combining/constant folding was performed.
1553  if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
1554  DEBUG((dbgs() << N
1555  << ": Found large shift immediate, this should not happen\n"));
1556  return false;
1557  }
1558 
1559  LSB = SrlImm;
1560  MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
1561  : countTrailingOnes<uint64_t>(AndImm)) -
1562  1;
1563  if (ClampMSB)
1564  // Since we're moving the extend before the right shift operation, we need
1565  // to clamp the MSB to make sure we don't shift in undefined bits instead of
1566  // the zeros which would get shifted in with the original right shift
1567  // operation.
1568  MSB = MSB > 31 ? 31 : MSB;
1569 
1570  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1571  return true;
1572 }
1573 
1574 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
1575  SDValue &Opd0, unsigned &Immr,
1576  unsigned &Imms) {
1578 
1579  EVT VT = N->getValueType(0);
1580  unsigned BitWidth = VT.getSizeInBits();
1581  assert((VT == MVT::i32 || VT == MVT::i64) &&
1582  "Type checking must have been done before calling this function");
1583 
1584  SDValue Op = N->getOperand(0);
1585  if (Op->getOpcode() == ISD::TRUNCATE) {
1586  Op = Op->getOperand(0);
1587  VT = Op->getValueType(0);
1588  BitWidth = VT.getSizeInBits();
1589  }
1590 
1591  uint64_t ShiftImm;
1592  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
1593  !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1594  return false;
1595 
1596  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1597  if (ShiftImm + Width > BitWidth)
1598  return false;
1599 
1600  Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
1601  Opd0 = Op.getOperand(0);
1602  Immr = ShiftImm;
1603  Imms = ShiftImm + Width - 1;
1604  return true;
1605 }
1606 
1607 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
1608  SDValue &Opd0, unsigned &LSB,
1609  unsigned &MSB) {
1610  // We are looking for the following pattern which basically extracts several
1611  // continuous bits from the source value and places it from the LSB of the
1612  // destination value, all other bits of the destination value or set to zero:
1613  //
1614  // Value2 = AND Value, MaskImm
1615  // SRL Value2, ShiftImm
1616  //
1617  // with MaskImm >> ShiftImm to search for the bit width.
1618  //
1619  // This gets selected into a single UBFM:
1620  //
1621  // UBFM Value, ShiftImm, BitWide + SrlImm -1
1622  //
1623 
1624  if (N->getOpcode() != ISD::SRL)
1625  return false;
1626 
1627  uint64_t AndMask = 0;
1628  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
1629  return false;
1630 
1631  Opd0 = N->getOperand(0).getOperand(0);
1632 
1633  uint64_t SrlImm = 0;
1634  if (!isIntImmediate(N->getOperand(1), SrlImm))
1635  return false;
1636 
1637  // Check whether we really have several bits extract here.
1638  unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
1639  if (BitWide && isMask_64(AndMask >> SrlImm)) {
1640  if (N->getValueType(0) == MVT::i32)
1641  Opc = AArch64::UBFMWri;
1642  else
1643  Opc = AArch64::UBFMXri;
1644 
1645  LSB = SrlImm;
1646  MSB = BitWide + SrlImm - 1;
1647  return true;
1648  }
1649 
1650  return false;
1651 }
1652 
1653 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1654  unsigned &Immr, unsigned &Imms,
1655  bool BiggerPattern) {
1656  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1657  "N must be a SHR/SRA operation to call this function");
1658 
1659  EVT VT = N->getValueType(0);
1660 
1661  // Here we can test the type of VT and return false when the type does not
1662  // match, but since it is done prior to that call in the current context
1663  // we turned that into an assert to avoid redundant code.
1664  assert((VT == MVT::i32 || VT == MVT::i64) &&
1665  "Type checking must have been done before calling this function");
1666 
1667  // Check for AND + SRL doing several bits extract.
1668  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
1669  return true;
1670 
1671  // We're looking for a shift of a shift.
1672  uint64_t ShlImm = 0;
1673  uint64_t TruncBits = 0;
1674  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
1675  Opd0 = N->getOperand(0).getOperand(0);
1676  } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1677  N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1678  // We are looking for a shift of truncate. Truncate from i64 to i32 could
1679  // be considered as setting high 32 bits as zero. Our strategy here is to
1680  // always generate 64bit UBFM. This consistency will help the CSE pass
1681  // later find more redundancy.
1682  Opd0 = N->getOperand(0).getOperand(0);
1683  TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1684  VT = Opd0->getValueType(0);
1685  assert(VT == MVT::i64 && "the promoted type should be i64");
1686  } else if (BiggerPattern) {
1687  // Let's pretend a 0 shift left has been performed.
1688  // FIXME: Currently we limit this to the bigger pattern case,
1689  // because some optimizations expect AND and not UBFM
1690  Opd0 = N->getOperand(0);
1691  } else
1692  return false;
1693 
1694  // Missing combines/constant folding may have left us with strange
1695  // constants.
1696  if (ShlImm >= VT.getSizeInBits()) {
1697  DEBUG((dbgs() << N
1698  << ": Found large shift immediate, this should not happen\n"));
1699  return false;
1700  }
1701 
1702  uint64_t SrlImm = 0;
1703  if (!isIntImmediate(N->getOperand(1), SrlImm))
1704  return false;
1705 
1706  assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
1707  "bad amount in shift node!");
1708  int immr = SrlImm - ShlImm;
1709  Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
1710  Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
1711  // SRA requires a signed extraction
1712  if (VT == MVT::i32)
1713  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1714  else
1715  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1716  return true;
1717 }
1718 
1719 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
1721 
1722  EVT VT = N->getValueType(0);
1723  EVT NarrowVT = N->getOperand(0)->getValueType(0);
1724  if (VT != MVT::i64 || NarrowVT != MVT::i32)
1725  return false;
1726 
1727  uint64_t ShiftImm;
1728  SDValue Op = N->getOperand(0);
1729  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1730  return false;
1731 
1732  SDLoc dl(N);
1733  // Extend the incoming operand of the shift to 64-bits.
1734  SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
1735  unsigned Immr = ShiftImm;
1736  unsigned Imms = NarrowVT.getSizeInBits() - 1;
1737  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1738  CurDAG->getTargetConstant(Imms, dl, VT)};
1739  CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
1740  return true;
1741 }
1742 
1743 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
1744  SDValue &Opd0, unsigned &Immr, unsigned &Imms,
1745  unsigned NumberOfIgnoredLowBits = 0,
1746  bool BiggerPattern = false) {
1747  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
1748  return false;
1749 
1750  switch (N->getOpcode()) {
1751  default:
1752  if (!N->isMachineOpcode())
1753  return false;
1754  break;
1755  case ISD::AND:
1756  return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
1757  NumberOfIgnoredLowBits, BiggerPattern);
1758  case ISD::SRL:
1759  case ISD::SRA:
1760  return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
1761 
1763  return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
1764  }
1765 
1766  unsigned NOpc = N->getMachineOpcode();
1767  switch (NOpc) {
1768  default:
1769  return false;
1770  case AArch64::SBFMWri:
1771  case AArch64::UBFMWri:
1772  case AArch64::SBFMXri:
1773  case AArch64::UBFMXri:
1774  Opc = NOpc;
1775  Opd0 = N->getOperand(0);
1776  Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
1777  Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
1778  return true;
1779  }
1780  // Unreachable
1781  return false;
1782 }
1783 
1784 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
1785  unsigned Opc, Immr, Imms;
1786  SDValue Opd0;
1787  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
1788  return false;
1789 
1790  EVT VT = N->getValueType(0);
1791  SDLoc dl(N);
1792 
1793  // If the bit extract operation is 64bit but the original type is 32bit, we
1794  // need to add one EXTRACT_SUBREG.
1795  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
1796  SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
1797  CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
1798 
1799  SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
1800  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1801  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
1802  MVT::i32, SDValue(BFM, 0), SubReg));
1803  return true;
1804  }
1805 
1806  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1807  CurDAG->getTargetConstant(Imms, dl, VT)};
1808  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1809  return true;
1810 }
1811 
1812 /// Does DstMask form a complementary pair with the mask provided by
1813 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
1814 /// this asks whether DstMask zeroes precisely those bits that will be set by
1815 /// the other half.
1816 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
1817  unsigned NumberOfIgnoredHighBits, EVT VT) {
1818  assert((VT == MVT::i32 || VT == MVT::i64) &&
1819  "i32 or i64 mask type expected!");
1820  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
1821 
1822  APInt SignificantDstMask = APInt(BitWidth, DstMask);
1823  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
1824 
1825  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
1826  (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
1827 }
1828 
1829 // Look for bits that will be useful for later uses.
1830 // A bit is consider useless as soon as it is dropped and never used
1831 // before it as been dropped.
1832 // E.g., looking for useful bit of x
1833 // 1. y = x & 0x7
1834 // 2. z = y >> 2
1835 // After #1, x useful bits are 0x7, then the useful bits of x, live through
1836 // y.
1837 // After #2, the useful bits of x are 0x4.
1838 // However, if x is used on an unpredicatable instruction, then all its bits
1839 // are useful.
1840 // E.g.
1841 // 1. y = x & 0x7
1842 // 2. z = y >> 2
1843 // 3. str x, [@x]
1844 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
1845 
1847  unsigned Depth) {
1848  uint64_t Imm =
1849  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1850  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
1851  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
1852  getUsefulBits(Op, UsefulBits, Depth + 1);
1853 }
1854 
1855 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
1856  uint64_t Imm, uint64_t MSB,
1857  unsigned Depth) {
1858  // inherit the bitwidth value
1859  APInt OpUsefulBits(UsefulBits);
1860  OpUsefulBits = 1;
1861 
1862  if (MSB >= Imm) {
1863  OpUsefulBits <<= MSB - Imm + 1;
1864  --OpUsefulBits;
1865  // The interesting part will be in the lower part of the result
1866  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1867  // The interesting part was starting at Imm in the argument
1868  OpUsefulBits <<= Imm;
1869  } else {
1870  OpUsefulBits <<= MSB + 1;
1871  --OpUsefulBits;
1872  // The interesting part will be shifted in the result
1873  OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
1874  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1875  // The interesting part was at zero in the argument
1876  OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
1877  }
1878 
1879  UsefulBits &= OpUsefulBits;
1880 }
1881 
1882 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
1883  unsigned Depth) {
1884  uint64_t Imm =
1885  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1886  uint64_t MSB =
1887  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1888 
1889  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1890 }
1891 
1893  unsigned Depth) {
1894  uint64_t ShiftTypeAndValue =
1895  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1896  APInt Mask(UsefulBits);
1897  Mask.clearAllBits();
1898  Mask.flipAllBits();
1899 
1900  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
1901  // Shift Left
1902  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1903  Mask <<= ShiftAmt;
1904  getUsefulBits(Op, Mask, Depth + 1);
1905  Mask.lshrInPlace(ShiftAmt);
1906  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
1907  // Shift Right
1908  // We do not handle AArch64_AM::ASR, because the sign will change the
1909  // number of useful bits
1910  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1911  Mask.lshrInPlace(ShiftAmt);
1912  getUsefulBits(Op, Mask, Depth + 1);
1913  Mask <<= ShiftAmt;
1914  } else
1915  return;
1916 
1917  UsefulBits &= Mask;
1918 }
1919 
1920 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
1921  unsigned Depth) {
1922  uint64_t Imm =
1923  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1924  uint64_t MSB =
1925  cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
1926 
1927  APInt OpUsefulBits(UsefulBits);
1928  OpUsefulBits = 1;
1929 
1930  APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
1931  ResultUsefulBits.flipAllBits();
1932  APInt Mask(UsefulBits.getBitWidth(), 0);
1933 
1934  getUsefulBits(Op, ResultUsefulBits, Depth + 1);
1935 
1936  if (MSB >= Imm) {
1937  // The instruction is a BFXIL.
1938  uint64_t Width = MSB - Imm + 1;
1939  uint64_t LSB = Imm;
1940 
1941  OpUsefulBits <<= Width;
1942  --OpUsefulBits;
1943 
1944  if (Op.getOperand(1) == Orig) {
1945  // Copy the low bits from the result to bits starting from LSB.
1946  Mask = ResultUsefulBits & OpUsefulBits;
1947  Mask <<= LSB;
1948  }
1949 
1950  if (Op.getOperand(0) == Orig)
1951  // Bits starting from LSB in the input contribute to the result.
1952  Mask |= (ResultUsefulBits & ~OpUsefulBits);
1953  } else {
1954  // The instruction is a BFI.
1955  uint64_t Width = MSB + 1;
1956  uint64_t LSB = UsefulBits.getBitWidth() - Imm;
1957 
1958  OpUsefulBits <<= Width;
1959  --OpUsefulBits;
1960  OpUsefulBits <<= LSB;
1961 
1962  if (Op.getOperand(1) == Orig) {
1963  // Copy the bits from the result to the zero bits.
1964  Mask = ResultUsefulBits & OpUsefulBits;
1965  Mask.lshrInPlace(LSB);
1966  }
1967 
1968  if (Op.getOperand(0) == Orig)
1969  Mask |= (ResultUsefulBits & ~OpUsefulBits);
1970  }
1971 
1972  UsefulBits &= Mask;
1973 }
1974 
1975 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
1976  SDValue Orig, unsigned Depth) {
1977 
1978  // Users of this node should have already been instruction selected
1979  // FIXME: Can we turn that into an assert?
1980  if (!UserNode->isMachineOpcode())
1981  return;
1982 
1983  switch (UserNode->getMachineOpcode()) {
1984  default:
1985  return;
1986  case AArch64::ANDSWri:
1987  case AArch64::ANDSXri:
1988  case AArch64::ANDWri:
1989  case AArch64::ANDXri:
1990  // We increment Depth only when we call the getUsefulBits
1991  return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
1992  Depth);
1993  case AArch64::UBFMWri:
1994  case AArch64::UBFMXri:
1995  return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
1996 
1997  case AArch64::ORRWrs:
1998  case AArch64::ORRXrs:
1999  if (UserNode->getOperand(1) != Orig)
2000  return;
2001  return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2002  Depth);
2003  case AArch64::BFMWri:
2004  case AArch64::BFMXri:
2005  return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2006 
2007  case AArch64::STRBBui:
2008  case AArch64::STURBBi:
2009  if (UserNode->getOperand(0) != Orig)
2010  return;
2011  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2012  return;
2013 
2014  case AArch64::STRHHui:
2015  case AArch64::STURHHi:
2016  if (UserNode->getOperand(0) != Orig)
2017  return;
2018  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2019  return;
2020  }
2021 }
2022 
2023 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2024  if (Depth >= 6)
2025  return;
2026  // Initialize UsefulBits
2027  if (!Depth) {
2028  unsigned Bitwidth = Op.getScalarValueSizeInBits();
2029  // At the beginning, assume every produced bits is useful
2030  UsefulBits = APInt(Bitwidth, 0);
2031  UsefulBits.flipAllBits();
2032  }
2033  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2034 
2035  for (SDNode *Node : Op.getNode()->uses()) {
2036  // A use cannot produce useful bits
2037  APInt UsefulBitsForUse = APInt(UsefulBits);
2038  getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2039  UsersUsefulBits |= UsefulBitsForUse;
2040  }
2041  // UsefulBits contains the produced bits that are meaningful for the
2042  // current definition, thus a user cannot make a bit meaningful at
2043  // this point
2044  UsefulBits &= UsersUsefulBits;
2045 }
2046 
2047 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
2048 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2049 /// 0, return Op unchanged.
2050 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2051  if (ShlAmount == 0)
2052  return Op;
2053 
2054  EVT VT = Op.getValueType();
2055  SDLoc dl(Op);
2056  unsigned BitWidth = VT.getSizeInBits();
2057  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2058 
2059  SDNode *ShiftNode;
2060  if (ShlAmount > 0) {
2061  // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2062  ShiftNode = CurDAG->getMachineNode(
2063  UBFMOpc, dl, VT, Op,
2064  CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2065  CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2066  } else {
2067  // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2068  assert(ShlAmount < 0 && "expected right shift");
2069  int ShrAmount = -ShlAmount;
2070  ShiftNode = CurDAG->getMachineNode(
2071  UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2072  CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2073  }
2074 
2075  return SDValue(ShiftNode, 0);
2076 }
2077 
2078 /// Does this tree qualify as an attempt to move a bitfield into position,
2079 /// essentially "(and (shl VAL, N), Mask)".
2081  bool BiggerPattern,
2082  SDValue &Src, int &ShiftAmount,
2083  int &MaskWidth) {
2084  EVT VT = Op.getValueType();
2085  unsigned BitWidth = VT.getSizeInBits();
2086  (void)BitWidth;
2087  assert(BitWidth == 32 || BitWidth == 64);
2088 
2089  KnownBits Known;
2090  CurDAG->computeKnownBits(Op, Known);
2091 
2092  // Non-zero in the sense that they're not provably zero, which is the key
2093  // point if we want to use this value
2094  uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2095 
2096  // Discard a constant AND mask if present. It's safe because the node will
2097  // already have been factored into the computeKnownBits calculation above.
2098  uint64_t AndImm;
2099  if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
2100  assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
2101  Op = Op.getOperand(0);
2102  }
2103 
2104  // Don't match if the SHL has more than one use, since then we'll end up
2105  // generating SHL+UBFIZ instead of just keeping SHL+AND.
2106  if (!BiggerPattern && !Op.hasOneUse())
2107  return false;
2108 
2109  uint64_t ShlImm;
2110  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2111  return false;
2112  Op = Op.getOperand(0);
2113 
2114  if (!isShiftedMask_64(NonZeroBits))
2115  return false;
2116 
2117  ShiftAmount = countTrailingZeros(NonZeroBits);
2118  MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
2119 
2120  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2121  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2122  // amount. BiggerPattern is true when this pattern is being matched for BFI,
2123  // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2124  // which case it is not profitable to insert an extra shift.
2125  if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
2126  return false;
2127  Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
2128 
2129  return true;
2130 }
2131 
2132 static bool isShiftedMask(uint64_t Mask, EVT VT) {
2133  assert(VT == MVT::i32 || VT == MVT::i64);
2134  if (VT == MVT::i32)
2135  return isShiftedMask_32(Mask);
2136  return isShiftedMask_64(Mask);
2137 }
2138 
2139 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
2140 // inserted only sets known zero bits.
2142  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2143 
2144  EVT VT = N->getValueType(0);
2145  if (VT != MVT::i32 && VT != MVT::i64)
2146  return false;
2147 
2148  unsigned BitWidth = VT.getSizeInBits();
2149 
2150  uint64_t OrImm;
2151  if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
2152  return false;
2153 
2154  // Skip this transformation if the ORR immediate can be encoded in the ORR.
2155  // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2156  // performance neutral.
2157  if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
2158  return false;
2159 
2160  uint64_t MaskImm;
2161  SDValue And = N->getOperand(0);
2162  // Must be a single use AND with an immediate operand.
2163  if (!And.hasOneUse() ||
2164  !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
2165  return false;
2166 
2167  // Compute the Known Zero for the AND as this allows us to catch more general
2168  // cases than just looking for AND with imm.
2169  KnownBits Known;
2170  CurDAG->computeKnownBits(And, Known);
2171 
2172  // Non-zero in the sense that they're not provably zero, which is the key
2173  // point if we want to use this value.
2174  uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
2175 
2176  // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2177  if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
2178  return false;
2179 
2180  // The bits being inserted must only set those bits that are known to be zero.
2181  if ((OrImm & NotKnownZero) != 0) {
2182  // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2183  // currently handle this case.
2184  return false;
2185  }
2186 
2187  // BFI/BFXIL dst, src, #lsb, #width.
2188  int LSB = countTrailingOnes(NotKnownZero);
2189  int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2190 
2191  // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2192  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2193  unsigned ImmS = Width - 1;
2194 
2195  // If we're creating a BFI instruction avoid cases where we need more
2196  // instructions to materialize the BFI constant as compared to the original
2197  // ORR. A BFXIL will use the same constant as the original ORR, so the code
2198  // should be no worse in this case.
2199  bool IsBFI = LSB != 0;
2200  uint64_t BFIImm = OrImm >> LSB;
2201  if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
2202  // We have a BFI instruction and we know the constant can't be materialized
2203  // with a ORR-immediate with the zero register.
2204  unsigned OrChunks = 0, BFIChunks = 0;
2205  for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
2206  if (((OrImm >> Shift) & 0xFFFF) != 0)
2207  ++OrChunks;
2208  if (((BFIImm >> Shift) & 0xFFFF) != 0)
2209  ++BFIChunks;
2210  }
2211  if (BFIChunks > OrChunks)
2212  return false;
2213  }
2214 
2215  // Materialize the constant to be inserted.
2216  SDLoc DL(N);
2217  unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2218  SDNode *MOVI = CurDAG->getMachineNode(
2219  MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2220 
2221  // Create the BFI/BFXIL instruction.
2222  SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2223  CurDAG->getTargetConstant(ImmR, DL, VT),
2224  CurDAG->getTargetConstant(ImmS, DL, VT)};
2225  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2226  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2227  return true;
2228 }
2229 
2230 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
2231  SelectionDAG *CurDAG) {
2232  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2233 
2234  EVT VT = N->getValueType(0);
2235  if (VT != MVT::i32 && VT != MVT::i64)
2236  return false;
2237 
2238  unsigned BitWidth = VT.getSizeInBits();
2239 
2240  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
2241  // have the expected shape. Try to undo that.
2242 
2243  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
2244  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
2245 
2246  // Given a OR operation, check if we have the following pattern
2247  // ubfm c, b, imm, imm2 (or something that does the same jobs, see
2248  // isBitfieldExtractOp)
2249  // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
2250  // countTrailingZeros(mask2) == imm2 - imm + 1
2251  // f = d | c
2252  // if yes, replace the OR instruction with:
2253  // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
2254 
2255  // OR is commutative, check all combinations of operand order and values of
2256  // BiggerPattern, i.e.
2257  // Opd0, Opd1, BiggerPattern=false
2258  // Opd1, Opd0, BiggerPattern=false
2259  // Opd0, Opd1, BiggerPattern=true
2260  // Opd1, Opd0, BiggerPattern=true
2261  // Several of these combinations may match, so check with BiggerPattern=false
2262  // first since that will produce better results by matching more instructions
2263  // and/or inserting fewer extra instructions.
2264  for (int I = 0; I < 4; ++I) {
2265 
2266  SDValue Dst, Src;
2267  unsigned ImmR, ImmS;
2268  bool BiggerPattern = I / 2;
2269  SDValue OrOpd0Val = N->getOperand(I % 2);
2270  SDNode *OrOpd0 = OrOpd0Val.getNode();
2271  SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
2272  SDNode *OrOpd1 = OrOpd1Val.getNode();
2273 
2274  unsigned BFXOpc;
2275  int DstLSB, Width;
2276  if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
2277  NumberOfIgnoredLowBits, BiggerPattern)) {
2278  // Check that the returned opcode is compatible with the pattern,
2279  // i.e., same type and zero extended (U and not S)
2280  if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
2281  (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
2282  continue;
2283 
2284  // Compute the width of the bitfield insertion
2285  DstLSB = 0;
2286  Width = ImmS - ImmR + 1;
2287  // FIXME: This constraint is to catch bitfield insertion we may
2288  // want to widen the pattern if we want to grab general bitfied
2289  // move case
2290  if (Width <= 0)
2291  continue;
2292 
2293  // If the mask on the insertee is correct, we have a BFXIL operation. We
2294  // can share the ImmR and ImmS values from the already-computed UBFM.
2295  } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
2296  BiggerPattern,
2297  Src, DstLSB, Width)) {
2298  ImmR = (BitWidth - DstLSB) % BitWidth;
2299  ImmS = Width - 1;
2300  } else
2301  continue;
2302 
2303  // Check the second part of the pattern
2304  EVT VT = OrOpd1->getValueType(0);
2305  assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
2306 
2307  // Compute the Known Zero for the candidate of the first operand.
2308  // This allows to catch more general case than just looking for
2309  // AND with imm. Indeed, simplify-demanded-bits may have removed
2310  // the AND instruction because it proves it was useless.
2311  KnownBits Known;
2312  CurDAG->computeKnownBits(OrOpd1Val, Known);
2313 
2314  // Check if there is enough room for the second operand to appear
2315  // in the first one
2316  APInt BitsToBeInserted =
2317  APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
2318 
2319  if ((BitsToBeInserted & ~Known.Zero) != 0)
2320  continue;
2321 
2322  // Set the first operand
2323  uint64_t Imm;
2324  if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
2325  isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
2326  // In that case, we can eliminate the AND
2327  Dst = OrOpd1->getOperand(0);
2328  else
2329  // Maybe the AND has been removed by simplify-demanded-bits
2330  // or is useful because it discards more bits
2331  Dst = OrOpd1Val;
2332 
2333  // both parts match
2334  SDLoc DL(N);
2335  SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
2336  CurDAG->getTargetConstant(ImmS, DL, VT)};
2337  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2338  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2339  return true;
2340  }
2341 
2342  // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
2343  // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
2344  // mask (e.g., 0x000ffff0).
2345  uint64_t Mask0Imm, Mask1Imm;
2346  SDValue And0 = N->getOperand(0);
2347  SDValue And1 = N->getOperand(1);
2348  if (And0.hasOneUse() && And1.hasOneUse() &&
2349  isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
2350  isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
2351  APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
2352  (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
2353 
2354  // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
2355  // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
2356  // bits to be inserted.
2357  if (isShiftedMask(Mask0Imm, VT)) {
2358  std::swap(And0, And1);
2359  std::swap(Mask0Imm, Mask1Imm);
2360  }
2361 
2362  SDValue Src = And1->getOperand(0);
2363  SDValue Dst = And0->getOperand(0);
2364  unsigned LSB = countTrailingZeros(Mask1Imm);
2365  int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
2366 
2367  // The BFXIL inserts the low-order bits from a source register, so right
2368  // shift the needed bits into place.
2369  SDLoc DL(N);
2370  unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2371  SDNode *LSR = CurDAG->getMachineNode(
2372  ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
2373  CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
2374 
2375  // BFXIL is an alias of BFM, so translate to BFM operands.
2376  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2377  unsigned ImmS = Width - 1;
2378 
2379  // Create the BFXIL instruction.
2380  SDValue Ops[] = {Dst, SDValue(LSR, 0),
2381  CurDAG->getTargetConstant(ImmR, DL, VT),
2382  CurDAG->getTargetConstant(ImmS, DL, VT)};
2383  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2384  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2385  return true;
2386  }
2387 
2388  return false;
2389 }
2390 
2391 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
2392  if (N->getOpcode() != ISD::OR)
2393  return false;
2394 
2395  APInt NUsefulBits;
2396  getUsefulBits(SDValue(N, 0), NUsefulBits);
2397 
2398  // If all bits are not useful, just return UNDEF.
2399  if (!NUsefulBits) {
2400  CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
2401  return true;
2402  }
2403 
2404  if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
2405  return true;
2406 
2407  return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
2408 }
2409 
2410 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
2411 /// equivalent of a left shift by a constant amount followed by an and masking
2412 /// out a contiguous set of bits.
2413 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
2414  if (N->getOpcode() != ISD::AND)
2415  return false;
2416 
2417  EVT VT = N->getValueType(0);
2418  if (VT != MVT::i32 && VT != MVT::i64)
2419  return false;
2420 
2421  SDValue Op0;
2422  int DstLSB, Width;
2423  if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
2424  Op0, DstLSB, Width))
2425  return false;
2426 
2427  // ImmR is the rotate right amount.
2428  unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
2429  // ImmS is the most significant bit of the source to be moved.
2430  unsigned ImmS = Width - 1;
2431 
2432  SDLoc DL(N);
2433  SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
2434  CurDAG->getTargetConstant(ImmS, DL, VT)};
2435  unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2436  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2437  return true;
2438 }
2439 
2440 bool
2441 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2442  unsigned RegWidth) {
2443  APFloat FVal(0.0);
2444  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2445  FVal = CN->getValueAPF();
2446  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2447  // Some otherwise illegal constants are allowed in this case.
2448  if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2449  !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2450  return false;
2451 
2452  ConstantPoolSDNode *CN =
2453  dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2454  FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2455  } else
2456  return false;
2457 
2458  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2459  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2460  // x-register.
2461  //
2462  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2463  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2464  // integers.
2465  bool IsExact;
2466 
2467  // fbits is between 1 and 64 in the worst-case, which means the fmul
2468  // could have 2^64 as an actual operand. Need 65 bits of precision.
2469  APSInt IntVal(65, true);
2470  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2471 
2472  // N.b. isPowerOf2 also checks for > 0.
2473  if (!IsExact || !IntVal.isPowerOf2()) return false;
2474  unsigned FBits = IntVal.logBase2();
2475 
2476  // Checks above should have guaranteed that we haven't lost information in
2477  // finding FBits, but it must still be in range.
2478  if (FBits == 0 || FBits > RegWidth) return false;
2479 
2480  FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
2481  return true;
2482 }
2483 
2484 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
2485 // of the string and obtains the integer values from them and combines these
2486 // into a single value to be used in the MRS/MSR instruction.
2489  RegString.split(Fields, ':');
2490 
2491  if (Fields.size() == 1)
2492  return -1;
2493 
2494  assert(Fields.size() == 5
2495  && "Invalid number of fields in read register string");
2496 
2497  SmallVector<int, 5> Ops;
2498  bool AllIntFields = true;
2499 
2500  for (StringRef Field : Fields) {
2501  unsigned IntField;
2502  AllIntFields &= !Field.getAsInteger(10, IntField);
2503  Ops.push_back(IntField);
2504  }
2505 
2506  assert(AllIntFields &&
2507  "Unexpected non-integer value in special register string.");
2508 
2509  // Need to combine the integer fields of the string into a single value
2510  // based on the bit encoding of MRS/MSR instruction.
2511  return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2512  (Ops[3] << 3) | (Ops[4]);
2513 }
2514 
2515 // Lower the read_register intrinsic to an MRS instruction node if the special
2516 // register string argument is either of the form detailed in the ALCE (the
2517 // form described in getIntOperandsFromRegsterString) or is a named register
2518 // known by the MRS SysReg mapper.
2519 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
2520  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2521  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2522  SDLoc DL(N);
2523 
2524  int Reg = getIntOperandFromRegisterString(RegString->getString());
2525  if (Reg != -1) {
2526  ReplaceNode(N, CurDAG->getMachineNode(
2527  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2528  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2529  N->getOperand(0)));
2530  return true;
2531  }
2532 
2533  // Use the sysreg mapper to map the remaining possible strings to the
2534  // value for the register to be used for the instruction operand.
2535  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2536  if (TheReg && TheReg->Readable &&
2537  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2538  Reg = TheReg->Encoding;
2539  else
2540  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2541 
2542  if (Reg != -1) {
2543  ReplaceNode(N, CurDAG->getMachineNode(
2544  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2545  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2546  N->getOperand(0)));
2547  return true;
2548  }
2549 
2550  return false;
2551 }
2552 
2553 // Lower the write_register intrinsic to an MSR instruction node if the special
2554 // register string argument is either of the form detailed in the ALCE (the
2555 // form described in getIntOperandsFromRegsterString) or is a named register
2556 // known by the MSR SysReg mapper.
2557 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
2558  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2559  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2560  SDLoc DL(N);
2561 
2562  int Reg = getIntOperandFromRegisterString(RegString->getString());
2563  if (Reg != -1) {
2564  ReplaceNode(
2565  N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2566  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2567  N->getOperand(2), N->getOperand(0)));
2568  return true;
2569  }
2570 
2571  // Check if the register was one of those allowed as the pstatefield value in
2572  // the MSR (immediate) instruction. To accept the values allowed in the
2573  // pstatefield for the MSR (immediate) instruction, we also require that an
2574  // immediate value has been provided as an argument, we know that this is
2575  // the case as it has been ensured by semantic checking.
2576  auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
2577  if (PMapper) {
2578  assert (isa<ConstantSDNode>(N->getOperand(2))
2579  && "Expected a constant integer expression.");
2580  unsigned Reg = PMapper->Encoding;
2581  uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
2582  unsigned State;
2583  if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO) {
2584  assert(Immed < 2 && "Bad imm");
2585  State = AArch64::MSRpstateImm1;
2586  } else {
2587  assert(Immed < 16 && "Bad imm");
2588  State = AArch64::MSRpstateImm4;
2589  }
2590  ReplaceNode(N, CurDAG->getMachineNode(
2591  State, DL, MVT::Other,
2592  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2593  CurDAG->getTargetConstant(Immed, DL, MVT::i16),
2594  N->getOperand(0)));
2595  return true;
2596  }
2597 
2598  // Use the sysreg mapper to attempt to map the remaining possible strings
2599  // to the value for the register to be used for the MSR (register)
2600  // instruction operand.
2601  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2602  if (TheReg && TheReg->Writeable &&
2603  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2604  Reg = TheReg->Encoding;
2605  else
2606  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2607  if (Reg != -1) {
2608  ReplaceNode(N, CurDAG->getMachineNode(
2609  AArch64::MSR, DL, MVT::Other,
2610  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2611  N->getOperand(2), N->getOperand(0)));
2612  return true;
2613  }
2614 
2615  return false;
2616 }
2617 
2618 /// We've got special pseudo-instructions for these
2619 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2620  unsigned Opcode;
2621  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2622 
2623  // Leave IR for LSE if subtarget supports it.
2624  if (Subtarget->hasLSE()) return false;
2625 
2626  if (MemTy == MVT::i8)
2627  Opcode = AArch64::CMP_SWAP_8;
2628  else if (MemTy == MVT::i16)
2629  Opcode = AArch64::CMP_SWAP_16;
2630  else if (MemTy == MVT::i32)
2631  Opcode = AArch64::CMP_SWAP_32;
2632  else if (MemTy == MVT::i64)
2633  Opcode = AArch64::CMP_SWAP_64;
2634  else
2635  llvm_unreachable("Unknown AtomicCmpSwap type");
2636 
2637  MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
2638  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2639  N->getOperand(0)};
2640  SDNode *CmpSwap = CurDAG->getMachineNode(
2641  Opcode, SDLoc(N),
2642  CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
2643 
2644  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2645  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2646  cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2647 
2648  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2649  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2650  CurDAG->RemoveDeadNode(N);
2651 
2652  return true;
2653 }
2654 
2655 void AArch64DAGToDAGISel::Select(SDNode *Node) {
2656  // Dump information about the Node being selected
2657  DEBUG(errs() << "Selecting: ");
2658  DEBUG(Node->dump(CurDAG));
2659  DEBUG(errs() << "\n");
2660 
2661  // If we have a custom node, we already have selected!
2662  if (Node->isMachineOpcode()) {
2663  DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
2664  Node->setNodeId(-1);
2665  return;
2666  }
2667 
2668  // Few custom selection stuff.
2669  EVT VT = Node->getValueType(0);
2670 
2671  switch (Node->getOpcode()) {
2672  default:
2673  break;
2674 
2675  case ISD::ATOMIC_CMP_SWAP:
2676  if (SelectCMP_SWAP(Node))
2677  return;
2678  break;
2679 
2680  case ISD::READ_REGISTER:
2681  if (tryReadRegister(Node))
2682  return;
2683  break;
2684 
2685  case ISD::WRITE_REGISTER:
2686  if (tryWriteRegister(Node))
2687  return;
2688  break;
2689 
2690  case ISD::ADD:
2691  if (tryMLAV64LaneV128(Node))
2692  return;
2693  break;
2694 
2695  case ISD::LOAD: {
2696  // Try to select as an indexed load. Fall through to normal processing
2697  // if we can't.
2698  if (tryIndexedLoad(Node))
2699  return;
2700  break;
2701  }
2702 
2703  case ISD::SRL:
2704  case ISD::AND:
2705  case ISD::SRA:
2707  if (tryBitfieldExtractOp(Node))
2708  return;
2709  if (tryBitfieldInsertInZeroOp(Node))
2710  return;
2711  break;
2712 
2713  case ISD::SIGN_EXTEND:
2714  if (tryBitfieldExtractOpFromSExt(Node))
2715  return;
2716  break;
2717 
2718  case ISD::OR:
2719  if (tryBitfieldInsertOp(Node))
2720  return;
2721  break;
2722 
2723  case ISD::EXTRACT_VECTOR_ELT: {
2724  // Extracting lane zero is a special case where we can just use a plain
2725  // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
2726  // the rest of the compiler, especially the register allocator and copyi
2727  // propagation, to reason about, so is preferred when it's possible to
2728  // use it.
2729  ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
2730  // Bail and use the default Select() for non-zero lanes.
2731  if (LaneNode->getZExtValue() != 0)
2732  break;
2733  // If the element type is not the same as the result type, likewise
2734  // bail and use the default Select(), as there's more to do than just
2735  // a cross-class COPY. This catches extracts of i8 and i16 elements
2736  // since they will need an explicit zext.
2737  if (VT != Node->getOperand(0).getValueType().getVectorElementType())
2738  break;
2739  unsigned SubReg;
2740  switch (Node->getOperand(0)
2741  .getValueType()
2743  .getSizeInBits()) {
2744  default:
2745  llvm_unreachable("Unexpected vector element type!");
2746  case 64:
2747  SubReg = AArch64::dsub;
2748  break;
2749  case 32:
2750  SubReg = AArch64::ssub;
2751  break;
2752  case 16:
2753  SubReg = AArch64::hsub;
2754  break;
2755  case 8:
2756  llvm_unreachable("unexpected zext-requiring extract element!");
2757  }
2758  SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
2759  Node->getOperand(0));
2760  DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
2761  DEBUG(Extract->dumpr(CurDAG));
2762  DEBUG(dbgs() << "\n");
2763  ReplaceNode(Node, Extract.getNode());
2764  return;
2765  }
2766  case ISD::Constant: {
2767  // Materialize zero constants as copies from WZR/XZR. This allows
2768  // the coalescer to propagate these into other instructions.
2769  ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
2770  if (ConstNode->isNullValue()) {
2771  if (VT == MVT::i32) {
2772  SDValue New = CurDAG->getCopyFromReg(
2773  CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
2774  ReplaceNode(Node, New.getNode());
2775  return;
2776  } else if (VT == MVT::i64) {
2777  SDValue New = CurDAG->getCopyFromReg(
2778  CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
2779  ReplaceNode(Node, New.getNode());
2780  return;
2781  }
2782  }
2783  break;
2784  }
2785 
2786  case ISD::FrameIndex: {
2787  // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
2788  int FI = cast<FrameIndexSDNode>(Node)->getIndex();
2789  unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
2790  const TargetLowering *TLI = getTargetLowering();
2791  SDValue TFI = CurDAG->getTargetFrameIndex(
2792  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2793  SDLoc DL(Node);
2794  SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
2795  CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
2796  CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
2797  return;
2798  }
2799  case ISD::INTRINSIC_W_CHAIN: {
2800  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2801  switch (IntNo) {
2802  default:
2803  break;
2804  case Intrinsic::aarch64_ldaxp:
2805  case Intrinsic::aarch64_ldxp: {
2806  unsigned Op =
2807  IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
2808  SDValue MemAddr = Node->getOperand(2);
2809  SDLoc DL(Node);
2810  SDValue Chain = Node->getOperand(0);
2811 
2812  SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
2813  MVT::Other, MemAddr, Chain);
2814 
2815  // Transfer memoperands.
2816  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2817  MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2818  cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
2819  ReplaceNode(Node, Ld);
2820  return;
2821  }
2822  case Intrinsic::aarch64_stlxp:
2823  case Intrinsic::aarch64_stxp: {
2824  unsigned Op =
2825  IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
2826  SDLoc DL(Node);
2827  SDValue Chain = Node->getOperand(0);
2828  SDValue ValLo = Node->getOperand(2);
2829  SDValue ValHi = Node->getOperand(3);
2830  SDValue MemAddr = Node->getOperand(4);
2831 
2832  // Place arguments in the right order.
2833  SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
2834 
2835  SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
2836  // Transfer memoperands.
2837  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2838  MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2839  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
2840 
2841  ReplaceNode(Node, St);
2842  return;
2843  }
2844  case Intrinsic::aarch64_neon_ld1x2:
2845  if (VT == MVT::v8i8) {
2846  SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
2847  return;
2848  } else if (VT == MVT::v16i8) {
2849  SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
2850  return;
2851  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2852  SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
2853  return;
2854  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2855  SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
2856  return;
2857  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2858  SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
2859  return;
2860  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2861  SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
2862  return;
2863  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2864  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2865  return;
2866  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2867  SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
2868  return;
2869  }
2870  break;
2871  case Intrinsic::aarch64_neon_ld1x3:
2872  if (VT == MVT::v8i8) {
2873  SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
2874  return;
2875  } else if (VT == MVT::v16i8) {
2876  SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
2877  return;
2878  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2879  SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
2880  return;
2881  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2882  SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
2883  return;
2884  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2885  SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
2886  return;
2887  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2888  SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
2889  return;
2890  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2891  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
2892  return;
2893  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2894  SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
2895  return;
2896  }
2897  break;
2898  case Intrinsic::aarch64_neon_ld1x4:
2899  if (VT == MVT::v8i8) {
2900  SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
2901  return;
2902  } else if (VT == MVT::v16i8) {
2903  SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
2904  return;
2905  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2906  SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
2907  return;
2908  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2909  SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
2910  return;
2911  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2912  SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
2913  return;
2914  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2915  SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
2916  return;
2917  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2918  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
2919  return;
2920  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2921  SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
2922  return;
2923  }
2924  break;
2925  case Intrinsic::aarch64_neon_ld2:
2926  if (VT == MVT::v8i8) {
2927  SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
2928  return;
2929  } else if (VT == MVT::v16i8) {
2930  SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
2931  return;
2932  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2933  SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
2934  return;
2935  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2936  SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
2937  return;
2938  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2939  SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
2940  return;
2941  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2942  SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
2943  return;
2944  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2945  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2946  return;
2947  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2948  SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
2949  return;
2950  }
2951  break;
2952  case Intrinsic::aarch64_neon_ld3:
2953  if (VT == MVT::v8i8) {
2954  SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
2955  return;
2956  } else if (VT == MVT::v16i8) {
2957  SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
2958  return;
2959  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2960  SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
2961  return;
2962  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2963  SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
2964  return;
2965  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2966  SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
2967  return;
2968  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2969  SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
2970  return;
2971  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2972  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
2973  return;
2974  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2975  SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
2976  return;
2977  }
2978  break;
2979  case Intrinsic::aarch64_neon_ld4:
2980  if (VT == MVT::v8i8) {
2981  SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
2982  return;
2983  } else if (VT == MVT::v16i8) {
2984  SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
2985  return;
2986  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2987  SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
2988  return;
2989  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2990  SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
2991  return;
2992  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2993  SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
2994  return;
2995  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2996  SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
2997  return;
2998  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2999  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3000  return;
3001  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3002  SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
3003  return;
3004  }
3005  break;
3006  case Intrinsic::aarch64_neon_ld2r:
3007  if (VT == MVT::v8i8) {
3008  SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
3009  return;
3010  } else if (VT == MVT::v16i8) {
3011  SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
3012  return;
3013  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3014  SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
3015  return;
3016  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3017  SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
3018  return;
3019  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3020  SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
3021  return;
3022  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3023  SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
3024  return;
3025  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3026  SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
3027  return;
3028  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3029  SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
3030  return;
3031  }
3032  break;
3033  case Intrinsic::aarch64_neon_ld3r:
3034  if (VT == MVT::v8i8) {
3035  SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
3036  return;
3037  } else if (VT == MVT::v16i8) {
3038  SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
3039  return;
3040  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3041  SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
3042  return;
3043  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3044  SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
3045  return;
3046  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3047  SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
3048  return;
3049  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3050  SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
3051  return;
3052  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3053  SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
3054  return;
3055  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3056  SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
3057  return;
3058  }
3059  break;
3060  case Intrinsic::aarch64_neon_ld4r:
3061  if (VT == MVT::v8i8) {
3062  SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
3063  return;
3064  } else if (VT == MVT::v16i8) {
3065  SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
3066  return;
3067  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3068  SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
3069  return;
3070  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3071  SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
3072  return;
3073  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3074  SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
3075  return;
3076  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3077  SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
3078  return;
3079  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3080  SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
3081  return;
3082  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3083  SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
3084  return;
3085  }
3086  break;
3087  case Intrinsic::aarch64_neon_ld2lane:
3088  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3089  SelectLoadLane(Node, 2, AArch64::LD2i8);
3090  return;
3091  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3092  VT == MVT::v8f16) {
3093  SelectLoadLane(Node, 2, AArch64::LD2i16);
3094  return;
3095  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3096  VT == MVT::v2f32) {
3097  SelectLoadLane(Node, 2, AArch64::LD2i32);
3098  return;
3099  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3100  VT == MVT::v1f64) {
3101  SelectLoadLane(Node, 2, AArch64::LD2i64);
3102  return;
3103  }
3104  break;
3105  case Intrinsic::aarch64_neon_ld3lane:
3106  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3107  SelectLoadLane(Node, 3, AArch64::LD3i8);
3108  return;
3109  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3110  VT == MVT::v8f16) {
3111  SelectLoadLane(Node, 3, AArch64::LD3i16);
3112  return;
3113  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3114  VT == MVT::v2f32) {
3115  SelectLoadLane(Node, 3, AArch64::LD3i32);
3116  return;
3117  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3118  VT == MVT::v1f64) {
3119  SelectLoadLane(Node, 3, AArch64::LD3i64);
3120  return;
3121  }
3122  break;
3123  case Intrinsic::aarch64_neon_ld4lane:
3124  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3125  SelectLoadLane(Node, 4, AArch64::LD4i8);
3126  return;
3127  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3128  VT == MVT::v8f16) {
3129  SelectLoadLane(Node, 4, AArch64::LD4i16);
3130  return;
3131  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3132  VT == MVT::v2f32) {
3133  SelectLoadLane(Node, 4, AArch64::LD4i32);
3134  return;
3135  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3136  VT == MVT::v1f64) {
3137  SelectLoadLane(Node, 4, AArch64::LD4i64);
3138  return;
3139  }
3140  break;
3141  }
3142  } break;
3143  case ISD::INTRINSIC_WO_CHAIN: {
3144  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
3145  switch (IntNo) {
3146  default:
3147  break;
3148  case Intrinsic::aarch64_neon_tbl2:
3149  SelectTable(Node, 2,
3150  VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
3151  false);
3152  return;
3153  case Intrinsic::aarch64_neon_tbl3:
3154  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
3155  : AArch64::TBLv16i8Three,
3156  false);
3157  return;
3158  case Intrinsic::aarch64_neon_tbl4:
3159  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
3160  : AArch64::TBLv16i8Four,
3161  false);
3162  return;
3163  case Intrinsic::aarch64_neon_tbx2:
3164  SelectTable(Node, 2,
3165  VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
3166  true);
3167  return;
3168  case Intrinsic::aarch64_neon_tbx3:
3169  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
3170  : AArch64::TBXv16i8Three,
3171  true);
3172  return;
3173  case Intrinsic::aarch64_neon_tbx4:
3174  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
3175  : AArch64::TBXv16i8Four,
3176  true);
3177  return;
3178  case Intrinsic::aarch64_neon_smull:
3179  case Intrinsic::aarch64_neon_umull:
3180  if (tryMULLV64LaneV128(IntNo, Node))
3181  return;
3182  break;
3183  }
3184  break;
3185  }
3186  case ISD::INTRINSIC_VOID: {
3187  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3188  if (Node->getNumOperands() >= 3)
3189  VT = Node->getOperand(2)->getValueType(0);
3190  switch (IntNo) {
3191  default:
3192  break;
3193  case Intrinsic::aarch64_neon_st1x2: {
3194  if (VT == MVT::v8i8) {
3195  SelectStore(Node, 2, AArch64::ST1Twov8b);
3196  return;
3197  } else if (VT == MVT::v16i8) {
3198  SelectStore(Node, 2, AArch64::ST1Twov16b);
3199  return;
3200  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3201  SelectStore(Node, 2, AArch64::ST1Twov4h);
3202  return;
3203  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3204  SelectStore(Node, 2, AArch64::ST1Twov8h);
3205  return;
3206  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3207  SelectStore(Node, 2, AArch64::ST1Twov2s);
3208  return;
3209  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3210  SelectStore(Node, 2, AArch64::ST1Twov4s);
3211  return;
3212  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3213  SelectStore(Node, 2, AArch64::ST1Twov2d);
3214  return;
3215  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3216  SelectStore(Node, 2, AArch64::ST1Twov1d);
3217  return;
3218  }
3219  break;
3220  }
3221  case Intrinsic::aarch64_neon_st1x3: {
3222  if (VT == MVT::v8i8) {
3223  SelectStore(Node, 3, AArch64::ST1Threev8b);
3224  return;
3225  } else if (VT == MVT::v16i8) {
3226  SelectStore(Node, 3, AArch64::ST1Threev16b);
3227  return;
3228  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3229  SelectStore(Node, 3, AArch64::ST1Threev4h);
3230  return;
3231  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3232  SelectStore(Node, 3, AArch64::ST1Threev8h);
3233  return;
3234  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3235  SelectStore(Node, 3, AArch64::ST1Threev2s);
3236  return;
3237  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3238  SelectStore(Node, 3, AArch64::ST1Threev4s);
3239  return;
3240  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3241  SelectStore(Node, 3, AArch64::ST1Threev2d);
3242  return;
3243  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3244  SelectStore(Node, 3, AArch64::ST1Threev1d);
3245  return;
3246  }
3247  break;
3248  }
3249  case Intrinsic::aarch64_neon_st1x4: {
3250  if (VT == MVT::v8i8) {
3251  SelectStore(Node, 4, AArch64::ST1Fourv8b);
3252  return;
3253  } else if (VT == MVT::v16i8) {
3254  SelectStore(Node, 4, AArch64::ST1Fourv16b);
3255  return;
3256  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3257  SelectStore(Node, 4, AArch64::ST1Fourv4h);
3258  return;
3259  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3260  SelectStore(Node, 4, AArch64::ST1Fourv8h);
3261  return;
3262  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3263  SelectStore(Node, 4, AArch64::ST1Fourv2s);
3264  return;
3265  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3266  SelectStore(Node, 4, AArch64::ST1Fourv4s);
3267  return;
3268  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3269  SelectStore(Node, 4, AArch64::ST1Fourv2d);
3270  return;
3271  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3272  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3273  return;
3274  }
3275  break;
3276  }
3277  case Intrinsic::aarch64_neon_st2: {
3278  if (VT == MVT::v8i8) {
3279  SelectStore(Node, 2, AArch64::ST2Twov8b);
3280  return;
3281  } else if (VT == MVT::v16i8) {
3282  SelectStore(Node, 2, AArch64::ST2Twov16b);
3283  return;
3284  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3285  SelectStore(Node, 2, AArch64::ST2Twov4h);
3286  return;
3287  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3288  SelectStore(Node, 2, AArch64::ST2Twov8h);
3289  return;
3290  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3291  SelectStore(Node, 2, AArch64::ST2Twov2s);
3292  return;
3293  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3294  SelectStore(Node, 2, AArch64::ST2Twov4s);
3295  return;
3296  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3297  SelectStore(Node, 2, AArch64::ST2Twov2d);
3298  return;
3299  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3300  SelectStore(Node, 2, AArch64::ST1Twov1d);
3301  return;
3302  }
3303  break;
3304  }
3305  case Intrinsic::aarch64_neon_st3: {
3306  if (VT == MVT::v8i8) {
3307  SelectStore(Node, 3, AArch64::ST3Threev8b);
3308  return;
3309  } else if (VT == MVT::v16i8) {
3310  SelectStore(Node, 3, AArch64::ST3Threev16b);
3311  return;
3312  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3313  SelectStore(Node, 3, AArch64::ST3Threev4h);
3314  return;
3315  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3316  SelectStore(Node, 3, AArch64::ST3Threev8h);
3317  return;
3318  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3319  SelectStore(Node, 3, AArch64::ST3Threev2s);
3320  return;
3321  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3322  SelectStore(Node, 3, AArch64::ST3Threev4s);
3323  return;
3324  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3325  SelectStore(Node, 3, AArch64::ST3Threev2d);
3326  return;
3327  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3328  SelectStore(Node, 3, AArch64::ST1Threev1d);
3329  return;
3330  }
3331  break;
3332  }
3333  case Intrinsic::aarch64_neon_st4: {
3334  if (VT == MVT::v8i8) {
3335  SelectStore(Node, 4, AArch64::ST4Fourv8b);
3336  return;
3337  } else if (VT == MVT::v16i8) {
3338  SelectStore(Node, 4, AArch64::ST4Fourv16b);
3339  return;
3340  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3341  SelectStore(Node, 4, AArch64::ST4Fourv4h);
3342  return;
3343  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3344  SelectStore(Node, 4, AArch64::ST4Fourv8h);
3345  return;
3346  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3347  SelectStore(Node, 4, AArch64::ST4Fourv2s);
3348  return;
3349  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3350  SelectStore(Node, 4, AArch64::ST4Fourv4s);
3351  return;
3352  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3353  SelectStore(Node, 4, AArch64::ST4Fourv2d);
3354  return;
3355  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3356  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3357  return;
3358  }
3359  break;
3360  }
3361  case Intrinsic::aarch64_neon_st2lane: {
3362  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3363  SelectStoreLane(Node, 2, AArch64::ST2i8);
3364  return;
3365  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3366  VT == MVT::v8f16) {
3367  SelectStoreLane(Node, 2, AArch64::ST2i16);
3368  return;
3369  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3370  VT == MVT::v2f32) {
3371  SelectStoreLane(Node, 2, AArch64::ST2i32);
3372  return;
3373  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3374  VT == MVT::v1f64) {
3375  SelectStoreLane(Node, 2, AArch64::ST2i64);
3376  return;
3377  }
3378  break;
3379  }
3380  case Intrinsic::aarch64_neon_st3lane: {
3381  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3382  SelectStoreLane(Node, 3, AArch64::ST3i8);
3383  return;
3384  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3385  VT == MVT::v8f16) {
3386  SelectStoreLane(Node, 3, AArch64::ST3i16);
3387  return;
3388  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3389  VT == MVT::v2f32) {
3390  SelectStoreLane(Node, 3, AArch64::ST3i32);
3391  return;
3392  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3393  VT == MVT::v1f64) {
3394  SelectStoreLane(Node, 3, AArch64::ST3i64);
3395  return;
3396  }
3397  break;
3398  }
3399  case Intrinsic::aarch64_neon_st4lane: {
3400  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3401  SelectStoreLane(Node, 4, AArch64::ST4i8);
3402  return;
3403  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3404  VT == MVT::v8f16) {
3405  SelectStoreLane(Node, 4, AArch64::ST4i16);
3406  return;
3407  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3408  VT == MVT::v2f32) {
3409  SelectStoreLane(Node, 4, AArch64::ST4i32);
3410  return;
3411  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3412  VT == MVT::v1f64) {
3413  SelectStoreLane(Node, 4, AArch64::ST4i64);
3414  return;
3415  }
3416  break;
3417  }
3418  }
3419  break;
3420  }
3421  case AArch64ISD::LD2post: {
3422  if (VT == MVT::v8i8) {
3423  SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
3424  return;
3425  } else if (VT == MVT::v16i8) {
3426  SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
3427  return;
3428  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3429  SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
3430  return;
3431  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3432  SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
3433  return;
3434  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3435  SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
3436  return;
3437  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3438  SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
3439  return;
3440  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3441  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3442  return;
3443  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3444  SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
3445  return;
3446  }
3447  break;
3448  }
3449  case AArch64ISD::LD3post: {
3450  if (VT == MVT::v8i8) {
3451  SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
3452  return;
3453  } else if (VT == MVT::v16i8) {
3454  SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
3455  return;
3456  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3457  SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
3458  return;
3459  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3460  SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
3461  return;
3462  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3463  SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
3464  return;
3465  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3466  SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
3467  return;
3468  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3469  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3470  return;
3471  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3472  SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
3473  return;
3474  }
3475  break;
3476  }
3477  case AArch64ISD::LD4post: {
3478  if (VT == MVT::v8i8) {
3479  SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
3480  return;
3481  } else if (VT == MVT::v16i8) {
3482  SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
3483  return;
3484  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3485  SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
3486  return;
3487  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3488  SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
3489  return;
3490  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3491  SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
3492  return;
3493  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3494  SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
3495  return;
3496  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3497  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3498  return;
3499  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3500  SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
3501  return;
3502  }
3503  break;
3504  }
3505  case AArch64ISD::LD1x2post: {
3506  if (VT == MVT::v8i8) {
3507  SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
3508  return;
3509  } else if (VT == MVT::v16i8) {
3510  SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
3511  return;
3512  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3513  SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
3514  return;
3515  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3516  SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
3517  return;
3518  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3519  SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
3520  return;
3521  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3522  SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
3523  return;
3524  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3525  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3526  return;
3527  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3528  SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
3529  return;
3530  }
3531  break;
3532  }
3533  case AArch64ISD::LD1x3post: {
3534  if (VT == MVT::v8i8) {
3535  SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
3536  return;
3537  } else if (VT == MVT::v16i8) {
3538  SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
3539  return;
3540  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3541  SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
3542  return;
3543  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3544  SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
3545  return;
3546  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3547  SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
3548  return;
3549  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3550  SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
3551  return;
3552  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3553  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3554  return;
3555  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3556  SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
3557  return;
3558  }
3559  break;
3560  }
3561  case AArch64ISD::LD1x4post: {
3562  if (VT == MVT::v8i8) {
3563  SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
3564  return;
3565  } else if (VT == MVT::v16i8) {
3566  SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
3567  return;
3568  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3569  SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
3570  return;
3571  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3572  SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
3573  return;
3574  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3575  SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
3576  return;
3577  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3578  SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
3579  return;
3580  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3581  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3582  return;
3583  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3584  SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
3585  return;
3586  }
3587  break;
3588  }
3589  case AArch64ISD::LD1DUPpost: {
3590  if (VT == MVT::v8i8) {
3591  SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
3592  return;
3593  } else if (VT == MVT::v16i8) {
3594  SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
3595  return;
3596  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3597  SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
3598  return;
3599  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3600  SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
3601  return;
3602  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3603  SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
3604  return;
3605  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3606  SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
3607  return;
3608  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3609  SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
3610  return;
3611  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3612  SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
3613  return;
3614  }
3615  break;
3616  }
3617  case AArch64ISD::LD2DUPpost: {
3618  if (VT == MVT::v8i8) {
3619  SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
3620  return;
3621  } else if (VT == MVT::v16i8) {
3622  SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
3623  return;
3624  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3625  SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
3626  return;
3627  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3628  SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
3629  return;
3630  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3631  SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
3632  return;
3633  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3634  SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
3635  return;
3636  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3637  SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
3638  return;
3639  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3640  SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
3641  return;
3642  }
3643  break;
3644  }
3645  case AArch64ISD::LD3DUPpost: {
3646  if (VT == MVT::v8i8) {
3647  SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
3648  return;
3649  } else if (VT == MVT::v16i8) {
3650  SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
3651  return;
3652  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3653  SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
3654  return;
3655  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3656  SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
3657  return;
3658  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3659  SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
3660  return;
3661  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3662  SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
3663  return;
3664  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3665  SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
3666  return;
3667  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3668  SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
3669  return;
3670  }
3671  break;
3672  }
3673  case AArch64ISD::LD4DUPpost: {
3674  if (VT == MVT::v8i8) {
3675  SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
3676  return;
3677  } else if (VT == MVT::v16i8) {
3678  SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
3679  return;
3680  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3681  SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
3682  return;
3683  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3684  SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
3685  return;
3686  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3687  SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
3688  return;
3689  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3690  SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
3691  return;
3692  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3693  SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
3694  return;
3695  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3696  SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
3697  return;
3698  }
3699  break;
3700  }
3701  case AArch64ISD::LD1LANEpost: {
3702  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3703  SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
3704  return;
3705  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3706  VT == MVT::v8f16) {
3707  SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
3708  return;
3709  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3710  VT == MVT::v2f32) {
3711  SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
3712  return;
3713  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3714  VT == MVT::v1f64) {
3715  SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
3716  return;
3717  }
3718  break;
3719  }
3720  case AArch64ISD::LD2LANEpost: {
3721  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3722  SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
3723  return;
3724  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3725  VT == MVT::v8f16) {
3726  SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
3727  return;
3728  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3729  VT == MVT::v2f32) {
3730  SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
3731  return;
3732  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3733  VT == MVT::v1f64) {
3734  SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
3735  return;
3736  }
3737  break;
3738  }
3739  case AArch64ISD::LD3LANEpost: {
3740  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3741  SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
3742  return;
3743  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3744  VT == MVT::v8f16) {
3745  SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
3746  return;
3747  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3748  VT == MVT::v2f32) {
3749  SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
3750  return;
3751  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3752  VT == MVT::v1f64) {
3753  SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
3754  return;
3755  }
3756  break;
3757  }
3758  case AArch64ISD::LD4LANEpost: {
3759  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3760  SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
3761  return;
3762  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3763  VT == MVT::v8f16) {
3764  SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
3765  return;
3766  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3767  VT == MVT::v2f32) {
3768  SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
3769  return;
3770  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3771  VT == MVT::v1f64) {
3772  SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
3773  return;
3774  }
3775  break;
3776  }
3777  case AArch64ISD::ST2post: {
3778  VT = Node->getOperand(1).getValueType();
3779  if (VT == MVT::v8i8) {
3780  SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
3781  return;
3782  } else if (VT == MVT::v16i8) {
3783  SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
3784  return;
3785  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3786  SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
3787  return;
3788  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3789  SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
3790  return;
3791  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3792  SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
3793  return;
3794  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3795  SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
3796  return;
3797  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3798  SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
3799  return;
3800  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3801  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3802  return;
3803  }
3804  break;
3805  }
3806  case AArch64ISD::ST3post: {
3807  VT = Node->getOperand(1).getValueType();
3808  if (VT == MVT::v8i8) {
3809  SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
3810  return;
3811  } else if (VT == MVT::v16i8) {
3812  SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
3813  return;
3814  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3815  SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
3816  return;
3817  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3818  SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
3819  return;
3820  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3821  SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
3822  return;
3823  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3824  SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
3825  return;
3826  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3827  SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
3828  return;
3829  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3830  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
3831  return;
3832  }
3833  break;
3834  }
3835  case AArch64ISD::ST4post: {
3836  VT = Node->getOperand(1).getValueType();
3837  if (VT == MVT::v8i8) {
3838  SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
3839  return;
3840  } else if (VT == MVT::v16i8) {
3841  SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
3842  return;
3843  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3844  SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
3845  return;
3846  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3847  SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
3848  return;
3849  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3850  SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
3851  return;
3852  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3853  SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
3854  return;
3855  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3856  SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
3857  return;
3858  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3859  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
3860  return;
3861  }
3862  break;
3863  }
3864  case AArch64ISD::ST1x2post: {
3865  VT = Node->getOperand(1).getValueType();
3866  if (VT == MVT::v8i8) {
3867  SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
3868  return;
3869  } else if (VT == MVT::v16i8) {
3870  SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
3871  return;
3872  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3873  SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
3874  return;
3875  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3876  SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
3877  return;
3878  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3879  SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
3880  return;
3881  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3882  SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
3883  return;
3884  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3885  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3886  return;
3887  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3888  SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
3889  return;
3890  }
3891  break;
3892  }
3893  case AArch64ISD::ST1x3post: {
3894  VT = Node->getOperand(1).getValueType();
3895  if (VT == MVT::v8i8) {
3896  SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
3897  return;
3898  } else if (VT == MVT::v16i8) {
3899  SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
3900  return;
3901  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3902  SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
3903  return;
3904  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3905  SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
3906  return;
3907  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3908  SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
3909  return;
3910  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3911  SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
3912  return;
3913  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3914  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
3915  return;
3916  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3917  SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
3918  return;
3919  }
3920  break;
3921  }
3922  case AArch64ISD::ST1x4post: {
3923  VT = Node->getOperand(1).getValueType();
3924  if (VT == MVT::v8i8) {
3925  SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
3926  return;
3927  } else if (VT == MVT::v16i8) {
3928  SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
3929  return;
3930  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3931  SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
3932  return;
3933  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3934  SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
3935  return;
3936  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3937  SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
3938  return;
3939  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3940  SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
3941  return;
3942  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3943  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
3944  return;
3945  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3946  SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
3947  return;
3948  }
3949  break;
3950  }
3951  case AArch64ISD::ST2LANEpost: {
3952  VT = Node->getOperand(1).getValueType();
3953  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3954  SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
3955  return;
3956  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3957  VT == MVT::v8f16) {
3958  SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
3959  return;
3960  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3961  VT == MVT::v2f32) {
3962  SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
3963  return;
3964  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3965  VT == MVT::v1f64) {
3966  SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
3967  return;
3968  }
3969  break;
3970  }
3971  case AArch64ISD::ST3LANEpost: {
3972  VT = Node->getOperand(1).getValueType();
3973  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3974  SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
3975  return;
3976  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3977  VT == MVT::v8f16) {
3978  SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
3979  return;
3980  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3981  VT == MVT::v2f32) {
3982  SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
3983  return;
3984  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3985  VT == MVT::v1f64) {
3986  SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
3987  return;
3988  }
3989  break;
3990  }
3991  case AArch64ISD::ST4LANEpost: {
3992  VT = Node->getOperand(1).getValueType();
3993  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3994  SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
3995  return;
3996  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3997  VT == MVT::v8f16) {
3998  SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
3999  return;
4000  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4001  VT == MVT::v2f32) {
4002  SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
4003  return;
4004  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4005  VT == MVT::v1f64) {
4006  SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
4007  return;
4008  }
4009  break;
4010  }
4011  }
4012 
4013  // Select the default instruction
4014  SelectCode(Node);
4015 }
4016 
4017 /// createAArch64ISelDag - This pass converts a legalized DAG into a
4018 /// AArch64-specific DAG, ready for instruction scheduling.
4020  CodeGenOpt::Level OptLevel) {
4021  return new AArch64DAGToDAGISel(TM, OptLevel);
4022 }
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1431
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
uint64_t CallInst * C
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &ShiftAmount, int &MaskWidth)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL...
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOffset() const
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1456
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1542
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
DiagnosticInfoOptimizationBase::Argument NV
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:333
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:262
iterator begin() const
Definition: ArrayRef.h:137
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isWorthFoldingADDlow(SDValue N)
If there&#39;s a use of this ADDlow that&#39;s not itself a load/store then we&#39;ll need to create a real ADD i...
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
const SDValue & getChain() const
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1067
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:883
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:328
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1069
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1611
A description of a memory reference used in the backend.
static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, SDValue &LaneOp, int &LaneIdx)
static bool isPreferredADD(int64_t ImmOff)
Shift and rotation operations.
Definition: ISDOpcodes.h:379
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:470
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
unsigned SubReg
const MDNode * getMD() const
unsigned getScalarValueSizeInBits() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Reg
All possible values of the reg field in the ModR/M byte.
SimpleValueType SimpleTy
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:966
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
int64_t getSExtValue() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:403
const RegList & Regs
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
op_iterator op_begin() const
unsigned getAlignment() const
Definition: Globals.cpp:97
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:558
bool isStrongerThanMonotonic(AtomicOrdering ao)
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:85
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:112
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1637
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:891
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:530
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32...
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:763
Extended Value Type.
Definition: ValueTypes.h:34
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
void dump() const
Dump this node, for debugging.
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:314
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:209
SDNode * SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type...
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:727
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:682
An SDNode that represents everything that will be needed to construct a MachineInstr.
const Constant * getConstVal() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
unsigned logBase2() const
Definition: APInt.h:1727
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:531
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< use_iterator > uses()
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:457
const SysReg * lookupSysRegByName(StringRef)
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:445
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:448
uint32_t parseGenericRegister(StringRef Name)
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOpt::Level OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG...
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:600
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:177
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:415
static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx)
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:362
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:182
static int getIntOperandFromRegisterString(StringRef RegString)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:463
void computeKnownBits(SDValue Op, KnownBits &Known, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:581
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted, suitable for use in a BFI instruction.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Type * getValueType() const
Definition: GlobalValue.h:267
unsigned getOpcode() const
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:890
static bool isShiftedMask(uint64_t Mask, EVT VT)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void dumpr() const
Dump (recursively) this node and its use-def subgraph.
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
#define DEBUG(X)
Definition: Debug.h:118
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:409
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A single uniqued string.
Definition: Metadata.h:602
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1575
Conversion operators.
Definition: ISDOpcodes.h:442
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:756
uint64_t getZExtValue() const
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:451
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
std::size_t countLeadingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the most significant bit to the first zero bit.
Definition: MathExtras.h:454
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:752
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
virtual const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const
Returns a TargetRegisterClass used for pointer values.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:871
This class is used to represent ISD::LOAD nodes.