LLVM  14.0.0git
ARMISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the ARM target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARM.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/APSInt.h"
19 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/IR/CallingConv.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DerivedTypes.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/IntrinsicsARM.h"
33 #include "llvm/IR/LLVMContext.h"
35 #include "llvm/Support/Debug.h"
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "arm-isel"
42 
43 static cl::opt<bool>
44 DisableShifterOp("disable-shifter-op", cl::Hidden,
45  cl::desc("Disable isel of shifter-op"),
46  cl::init(false));
47 
48 //===--------------------------------------------------------------------===//
49 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
50 /// instructions for SelectionDAG operations.
51 ///
52 namespace {
53 
54 class ARMDAGToDAGISel : public SelectionDAGISel {
55  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
56  /// make the right decision when generating code for different targets.
57  const ARMSubtarget *Subtarget;
58 
59 public:
60  explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
61  : SelectionDAGISel(tm, OptLevel) {}
62 
63  bool runOnMachineFunction(MachineFunction &MF) override {
64  // Reset the subtarget each time through.
65  Subtarget = &MF.getSubtarget<ARMSubtarget>();
67  return true;
68  }
69 
70  StringRef getPassName() const override { return "ARM Instruction Selection"; }
71 
72  void PreprocessISelDAG() override;
73 
74  /// getI32Imm - Return a target constant of type i32 with the specified
75  /// value.
76  inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
77  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
78  }
79 
80  void Select(SDNode *N) override;
81 
82  /// Return true as some complex patterns, like those that call
83  /// canExtractShiftFromMul can modify the DAG inplace.
84  bool ComplexPatternFuncMutatesDAG() const override { return true; }
85 
86  bool hasNoVMLxHazardUse(SDNode *N) const;
87  bool isShifterOpProfitable(const SDValue &Shift,
88  ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
89  bool SelectRegShifterOperand(SDValue N, SDValue &A,
90  SDValue &B, SDValue &C,
91  bool CheckProfitability = true);
92  bool SelectImmShifterOperand(SDValue N, SDValue &A,
93  SDValue &B, bool CheckProfitability = true);
94  bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
95  SDValue &C) {
96  // Don't apply the profitability check
97  return SelectRegShifterOperand(N, A, B, C, false);
98  }
99  bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
100  // Don't apply the profitability check
101  return SelectImmShifterOperand(N, A, B, false);
102  }
103  bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
104  if (!N.hasOneUse())
105  return false;
106  return SelectImmShifterOperand(N, A, B, false);
107  }
108 
109  bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
110 
111  bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
112  bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
113 
114  bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
115  const ConstantSDNode *CN = cast<ConstantSDNode>(N);
116  Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
117  Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
118  return true;
119  }
120 
121  bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
122  SDValue &Offset, SDValue &Opc);
123  bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
124  SDValue &Offset, SDValue &Opc);
125  bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
126  SDValue &Offset, SDValue &Opc);
127  bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
128  bool SelectAddrMode3(SDValue N, SDValue &Base,
129  SDValue &Offset, SDValue &Opc);
130  bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
131  SDValue &Offset, SDValue &Opc);
132  bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
133  bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
134  bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
135  bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
136  bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
137 
138  bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
139 
140  // Thumb Addressing Modes:
141  bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
142  bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
143  bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
144  SDValue &OffImm);
145  bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
146  SDValue &OffImm);
147  bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
148  SDValue &OffImm);
149  bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
150  SDValue &OffImm);
151  bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
152  template <unsigned Shift>
153  bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
154 
155  // Thumb 2 Addressing Modes:
156  bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
157  template <unsigned Shift>
158  bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
159  bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
160  SDValue &OffImm);
161  bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
162  SDValue &OffImm);
163  template <unsigned Shift>
164  bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
165  bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
166  unsigned Shift);
167  template <unsigned Shift>
168  bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
169  bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
170  SDValue &OffReg, SDValue &ShImm);
171  bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
172 
173  template<int Min, int Max>
174  bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
175 
176  inline bool is_so_imm(unsigned Imm) const {
177  return ARM_AM::getSOImmVal(Imm) != -1;
178  }
179 
180  inline bool is_so_imm_not(unsigned Imm) const {
181  return ARM_AM::getSOImmVal(~Imm) != -1;
182  }
183 
184  inline bool is_t2_so_imm(unsigned Imm) const {
185  return ARM_AM::getT2SOImmVal(Imm) != -1;
186  }
187 
188  inline bool is_t2_so_imm_not(unsigned Imm) const {
189  return ARM_AM::getT2SOImmVal(~Imm) != -1;
190  }
191 
192  // Include the pieces autogenerated from the target description.
193 #include "ARMGenDAGISel.inc"
194 
195 private:
196  void transferMemOperands(SDNode *Src, SDNode *Dst);
197 
198  /// Indexed (pre/post inc/dec) load matching code for ARM.
199  bool tryARMIndexedLoad(SDNode *N);
200  bool tryT1IndexedLoad(SDNode *N);
201  bool tryT2IndexedLoad(SDNode *N);
202  bool tryMVEIndexedLoad(SDNode *N);
203  bool tryFMULFixed(SDNode *N, SDLoc dl);
204  bool tryFP_TO_INT(SDNode *N, SDLoc dl);
205  bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
206  bool IsUnsigned,
207  bool FixedToFloat);
208 
209  /// SelectVLD - Select NEON load intrinsics. NumVecs should be
210  /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
211  /// loads of D registers and even subregs and odd subregs of Q registers.
212  /// For NumVecs <= 2, QOpcodes1 is not used.
213  void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
214  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
215  const uint16_t *QOpcodes1);
216 
217  /// SelectVST - Select NEON store intrinsics. NumVecs should
218  /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
219  /// stores of D registers and even subregs and odd subregs of Q registers.
220  /// For NumVecs <= 2, QOpcodes1 is not used.
221  void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
222  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
223  const uint16_t *QOpcodes1);
224 
225  /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
226  /// be 2, 3 or 4. The opcode arrays specify the instructions used for
227  /// load/store of D registers and Q registers.
228  void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
229  unsigned NumVecs, const uint16_t *DOpcodes,
230  const uint16_t *QOpcodes);
231 
232  /// Helper functions for setting up clusters of MVE predication operands.
233  template <typename SDValueVector>
234  void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
235  SDValue PredicateMask);
236  template <typename SDValueVector>
237  void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
238  SDValue PredicateMask, SDValue Inactive);
239 
240  template <typename SDValueVector>
241  void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
242  template <typename SDValueVector>
243  void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
244 
245  /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
246  void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
247 
248  /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
249  void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
250  bool HasSaturationOperand);
251 
252  /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
253  void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
254  uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
255 
256  /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
257  /// vector lanes.
258  void SelectMVE_VSHLC(SDNode *N, bool Predicated);
259 
260  /// Select long MVE vector reductions with two vector operands
261  /// Stride is the number of vector element widths the instruction can operate
262  /// on:
263  /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
264  /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
265  /// Stride is used when addressing the OpcodesS array which contains multiple
266  /// opcodes for each element width.
267  /// TySize is the index into the list of element types listed above
268  void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
269  const uint16_t *OpcodesS, const uint16_t *OpcodesU,
270  size_t Stride, size_t TySize);
271 
272  /// Select a 64-bit MVE vector reduction with two vector operands
273  /// arm_mve_vmlldava_[predicated]
274  void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
275  const uint16_t *OpcodesU);
276  /// Select a 72-bit MVE vector rounding reduction with two vector operands
277  /// int_arm_mve_vrmlldavha[_predicated]
278  void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
279  const uint16_t *OpcodesU);
280 
281  /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
282  /// should be 2 or 4. The opcode array specifies the instructions
283  /// used for 8, 16 and 32-bit lane sizes respectively, and each
284  /// pointer points to a set of NumVecs sub-opcodes used for the
285  /// different stages (e.g. VLD20 versus VLD21) of each load family.
286  void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
287  const uint16_t *const *Opcodes, bool HasWriteback);
288 
289  /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
290  /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
291  void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
292  bool Wrapping, bool Predicated);
293 
294  /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
295  /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
296  /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
297  /// the accumulator and the immediate operand, i.e. 0
298  /// for CX1*, 1 for CX2*, 2 for CX3*
299  /// \arg \c HasAccum whether the instruction has an accumulator operand
300  void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
301  bool HasAccum);
302 
303  /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
304  /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
305  /// for loading D registers.
306  void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
307  unsigned NumVecs, const uint16_t *DOpcodes,
308  const uint16_t *QOpcodes0 = nullptr,
309  const uint16_t *QOpcodes1 = nullptr);
310 
311  /// Try to select SBFX/UBFX instructions for ARM.
312  bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
313 
314  bool tryInsertVectorElt(SDNode *N);
315 
316  // Select special operations if node forms integer ABS pattern
317  bool tryABSOp(SDNode *N);
318 
319  bool tryReadRegister(SDNode *N);
320  bool tryWriteRegister(SDNode *N);
321 
322  bool tryInlineAsm(SDNode *N);
323 
324  void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
325 
326  void SelectCMP_SWAP(SDNode *N);
327 
328  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
329  /// inline asm expressions.
330  bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
331  std::vector<SDValue> &OutOps) override;
332 
333  // Form pairs of consecutive R, S, D, or Q registers.
335  SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
336  SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
337  SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
338 
339  // Form sequences of 4 consecutive S, D, or Q registers.
340  SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
341  SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
342  SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
343 
344  // Get the alignment operand for a NEON VLD or VST instruction.
345  SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
346  bool is64BitVector);
347 
348  /// Checks if N is a multiplication by a constant where we can extract out a
349  /// power of two from the constant so that it can be used in a shift, but only
350  /// if it simplifies the materialization of the constant. Returns true if it
351  /// is, and assigns to PowerOfTwo the power of two that should be extracted
352  /// out and to NewMulConst the new constant to be multiplied by.
353  bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
354  unsigned &PowerOfTwo, SDValue &NewMulConst) const;
355 
356  /// Replace N with M in CurDAG, in a way that also ensures that M gets
357  /// selected when N would have been selected.
358  void replaceDAGValue(const SDValue &N, SDValue M);
359 };
360 }
361 
362 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
363 /// operand. If so Imm will receive the 32-bit value.
364 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
365  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
366  Imm = cast<ConstantSDNode>(N)->getZExtValue();
367  return true;
368  }
369  return false;
370 }
371 
372 // isInt32Immediate - This method tests to see if a constant operand.
373 // If so Imm will receive the 32 bit value.
374 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
375  return isInt32Immediate(N.getNode(), Imm);
376 }
377 
378 // isOpcWithIntImmediate - This method tests to see if the node is a specific
379 // opcode and that it has a immediate integer right operand.
380 // If so Imm will receive the 32 bit value.
381 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
382  return N->getOpcode() == Opc &&
383  isInt32Immediate(N->getOperand(1).getNode(), Imm);
384 }
385 
386 /// Check whether a particular node is a constant value representable as
387 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
388 ///
389 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
390 static bool isScaledConstantInRange(SDValue Node, int Scale,
391  int RangeMin, int RangeMax,
392  int &ScaledConstant) {
393  assert(Scale > 0 && "Invalid scale!");
394 
395  // Check that this is a constant.
396  const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
397  if (!C)
398  return false;
399 
400  ScaledConstant = (int) C->getZExtValue();
401  if ((ScaledConstant % Scale) != 0)
402  return false;
403 
404  ScaledConstant /= Scale;
405  return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
406 }
407 
408 void ARMDAGToDAGISel::PreprocessISelDAG() {
409  if (!Subtarget->hasV6T2Ops())
410  return;
411 
412  bool isThumb2 = Subtarget->isThumb();
413  // We use make_early_inc_range to avoid invalidation issues.
414  for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
415  if (N.getOpcode() != ISD::ADD)
416  continue;
417 
418  // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
419  // leading zeros, followed by consecutive set bits, followed by 1 or 2
420  // trailing zeros, e.g. 1020.
421  // Transform the expression to
422  // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
423  // of trailing zeros of c2. The left shift would be folded as an shifter
424  // operand of 'add' and the 'and' and 'srl' would become a bits extraction
425  // node (UBFX).
426 
427  SDValue N0 = N.getOperand(0);
428  SDValue N1 = N.getOperand(1);
429  unsigned And_imm = 0;
430  if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
431  if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
432  std::swap(N0, N1);
433  }
434  if (!And_imm)
435  continue;
436 
437  // Check if the AND mask is an immediate of the form: 000.....1111111100
438  unsigned TZ = countTrailingZeros(And_imm);
439  if (TZ != 1 && TZ != 2)
440  // Be conservative here. Shifter operands aren't always free. e.g. On
441  // Swift, left shifter operand of 1 / 2 for free but others are not.
442  // e.g.
443  // ubfx r3, r1, #16, #8
444  // ldr.w r3, [r0, r3, lsl #2]
445  // vs.
446  // mov.w r9, #1020
447  // and.w r2, r9, r1, lsr #14
448  // ldr r2, [r0, r2]
449  continue;
450  And_imm >>= TZ;
451  if (And_imm & (And_imm + 1))
452  continue;
453 
454  // Look for (and (srl X, c1), c2).
455  SDValue Srl = N1.getOperand(0);
456  unsigned Srl_imm = 0;
457  if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
458  (Srl_imm <= 2))
459  continue;
460 
461  // Make sure first operand is not a shifter operand which would prevent
462  // folding of the left shift.
463  SDValue CPTmp0;
464  SDValue CPTmp1;
465  SDValue CPTmp2;
466  if (isThumb2) {
467  if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
468  continue;
469  } else {
470  if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
471  SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
472  continue;
473  }
474 
475  // Now make the transformation.
476  Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
477  Srl.getOperand(0),
478  CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
479  MVT::i32));
480  N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
481  Srl,
482  CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
483  N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
484  N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
485  CurDAG->UpdateNodeOperands(&N, N0, N1);
486  }
487 }
488 
489 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
490 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
491 /// least on current ARM implementations) which should be avoidded.
492 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
493  if (OptLevel == CodeGenOpt::None)
494  return true;
495 
496  if (!Subtarget->hasVMLxHazards())
497  return true;
498 
499  if (!N->hasOneUse())
500  return false;
501 
502  SDNode *Use = *N->use_begin();
503  if (Use->getOpcode() == ISD::CopyToReg)
504  return true;
505  if (Use->isMachineOpcode()) {
506  const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
507  CurDAG->getSubtarget().getInstrInfo());
508 
509  const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
510  if (MCID.mayStore())
511  return true;
512  unsigned Opcode = MCID.getOpcode();
513  if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
514  return true;
515  // vmlx feeding into another vmlx. We actually want to unfold
516  // the use later in the MLxExpansion pass. e.g.
517  // vmla
518  // vmla (stall 8 cycles)
519  //
520  // vmul (5 cycles)
521  // vadd (5 cycles)
522  // vmla
523  // This adds up to about 18 - 19 cycles.
524  //
525  // vmla
526  // vmul (stall 4 cycles)
527  // vadd adds up to about 14 cycles.
528  return TII->isFpMLxInstruction(Opcode);
529  }
530 
531  return false;
532 }
533 
534 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
535  ARM_AM::ShiftOpc ShOpcVal,
536  unsigned ShAmt) {
537  if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
538  return true;
539  if (Shift.hasOneUse())
540  return true;
541  // R << 2 is free.
542  return ShOpcVal == ARM_AM::lsl &&
543  (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
544 }
545 
546 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
547  unsigned MaxShift,
548  unsigned &PowerOfTwo,
549  SDValue &NewMulConst) const {
550  assert(N.getOpcode() == ISD::MUL);
551  assert(MaxShift > 0);
552 
553  // If the multiply is used in more than one place then changing the constant
554  // will make other uses incorrect, so don't.
555  if (!N.hasOneUse()) return false;
556  // Check if the multiply is by a constant
557  ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
558  if (!MulConst) return false;
559  // If the constant is used in more than one place then modifying it will mean
560  // we need to materialize two constants instead of one, which is a bad idea.
561  if (!MulConst->hasOneUse()) return false;
562  unsigned MulConstVal = MulConst->getZExtValue();
563  if (MulConstVal == 0) return false;
564 
565  // Find the largest power of 2 that MulConstVal is a multiple of
566  PowerOfTwo = MaxShift;
567  while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
568  --PowerOfTwo;
569  if (PowerOfTwo == 0) return false;
570  }
571 
572  // Only optimise if the new cost is better
573  unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
574  NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
575  unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
576  unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
577  return NewCost < OldCost;
578 }
579 
580 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
581  CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
582  ReplaceUses(N, M);
583 }
584 
585 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
586  SDValue &BaseReg,
587  SDValue &Opc,
588  bool CheckProfitability) {
589  if (DisableShifterOp)
590  return false;
591 
592  // If N is a multiply-by-constant and it's profitable to extract a shift and
593  // use it in a shifted operand do so.
594  if (N.getOpcode() == ISD::MUL) {
595  unsigned PowerOfTwo = 0;
596  SDValue NewMulConst;
597  if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
598  HandleSDNode Handle(N);
599  SDLoc Loc(N);
600  replaceDAGValue(N.getOperand(1), NewMulConst);
601  BaseReg = Handle.getValue();
602  Opc = CurDAG->getTargetConstant(
603  ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
604  return true;
605  }
606  }
607 
608  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
609 
610  // Don't match base register only case. That is matched to a separate
611  // lower complexity pattern with explicit register operand.
612  if (ShOpcVal == ARM_AM::no_shift) return false;
613 
614  BaseReg = N.getOperand(0);
615  unsigned ShImmVal = 0;
616  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
617  if (!RHS) return false;
618  ShImmVal = RHS->getZExtValue() & 31;
619  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
620  SDLoc(N), MVT::i32);
621  return true;
622 }
623 
624 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
625  SDValue &BaseReg,
626  SDValue &ShReg,
627  SDValue &Opc,
628  bool CheckProfitability) {
629  if (DisableShifterOp)
630  return false;
631 
632  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
633 
634  // Don't match base register only case. That is matched to a separate
635  // lower complexity pattern with explicit register operand.
636  if (ShOpcVal == ARM_AM::no_shift) return false;
637 
638  BaseReg = N.getOperand(0);
639  unsigned ShImmVal = 0;
640  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
641  if (RHS) return false;
642 
643  ShReg = N.getOperand(1);
644  if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
645  return false;
646  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
647  SDLoc(N), MVT::i32);
648  return true;
649 }
650 
651 // Determine whether an ISD::OR's operands are suitable to turn the operation
652 // into an addition, which often has more compact encodings.
653 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
654  assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
655  Out = N;
656  return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
657 }
658 
659 
660 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
661  SDValue &Base,
662  SDValue &OffImm) {
663  // Match simple R + imm12 operands.
664 
665  // Base only.
666  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
667  !CurDAG->isBaseWithConstantOffset(N)) {
668  if (N.getOpcode() == ISD::FrameIndex) {
669  // Match frame index.
670  int FI = cast<FrameIndexSDNode>(N)->getIndex();
671  Base = CurDAG->getTargetFrameIndex(
672  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
673  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
674  return true;
675  }
676 
677  if (N.getOpcode() == ARMISD::Wrapper &&
678  N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
679  N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
680  N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
681  Base = N.getOperand(0);
682  } else
683  Base = N;
684  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
685  return true;
686  }
687 
688  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
689  int RHSC = (int)RHS->getSExtValue();
690  if (N.getOpcode() == ISD::SUB)
691  RHSC = -RHSC;
692 
693  if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
694  Base = N.getOperand(0);
695  if (Base.getOpcode() == ISD::FrameIndex) {
696  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
697  Base = CurDAG->getTargetFrameIndex(
698  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
699  }
700  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
701  return true;
702  }
703  }
704 
705  // Base only.
706  Base = N;
707  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
708  return true;
709 }
710 
711 
712 
713 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
714  SDValue &Opc) {
715  if (N.getOpcode() == ISD::MUL &&
716  ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
717  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
718  // X * [3,5,9] -> X + X * [2,4,8] etc.
719  int RHSC = (int)RHS->getZExtValue();
720  if (RHSC & 1) {
721  RHSC = RHSC & ~1;
723  if (RHSC < 0) {
725  RHSC = - RHSC;
726  }
727  if (isPowerOf2_32(RHSC)) {
728  unsigned ShAmt = Log2_32(RHSC);
729  Base = Offset = N.getOperand(0);
730  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
731  ARM_AM::lsl),
732  SDLoc(N), MVT::i32);
733  return true;
734  }
735  }
736  }
737  }
738 
739  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
740  // ISD::OR that is equivalent to an ISD::ADD.
741  !CurDAG->isBaseWithConstantOffset(N))
742  return false;
743 
744  // Leave simple R +/- imm12 operands for LDRi12
745  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
746  int RHSC;
747  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
748  -0x1000+1, 0x1000, RHSC)) // 12 bits.
749  return false;
750  }
751 
752  // Otherwise this is R +/- [possibly shifted] R.
754  ARM_AM::ShiftOpc ShOpcVal =
755  ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
756  unsigned ShAmt = 0;
757 
758  Base = N.getOperand(0);
759  Offset = N.getOperand(1);
760 
761  if (ShOpcVal != ARM_AM::no_shift) {
762  // Check to see if the RHS of the shift is a constant, if not, we can't fold
763  // it.
764  if (ConstantSDNode *Sh =
765  dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
766  ShAmt = Sh->getZExtValue();
767  if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
768  Offset = N.getOperand(1).getOperand(0);
769  else {
770  ShAmt = 0;
771  ShOpcVal = ARM_AM::no_shift;
772  }
773  } else {
774  ShOpcVal = ARM_AM::no_shift;
775  }
776  }
777 
778  // Try matching (R shl C) + (R).
779  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
780  !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
781  N.getOperand(0).hasOneUse())) {
782  ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
783  if (ShOpcVal != ARM_AM::no_shift) {
784  // Check to see if the RHS of the shift is a constant, if not, we can't
785  // fold it.
786  if (ConstantSDNode *Sh =
787  dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
788  ShAmt = Sh->getZExtValue();
789  if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
790  Offset = N.getOperand(0).getOperand(0);
791  Base = N.getOperand(1);
792  } else {
793  ShAmt = 0;
794  ShOpcVal = ARM_AM::no_shift;
795  }
796  } else {
797  ShOpcVal = ARM_AM::no_shift;
798  }
799  }
800  }
801 
802  // If Offset is a multiply-by-constant and it's profitable to extract a shift
803  // and use it in a shifted operand do so.
804  if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
805  unsigned PowerOfTwo = 0;
806  SDValue NewMulConst;
807  if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
808  HandleSDNode Handle(Offset);
809  replaceDAGValue(Offset.getOperand(1), NewMulConst);
810  Offset = Handle.getValue();
811  ShAmt = PowerOfTwo;
812  ShOpcVal = ARM_AM::lsl;
813  }
814  }
815 
816  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
817  SDLoc(N), MVT::i32);
818  return true;
819 }
820 
821 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
822  SDValue &Offset, SDValue &Opc) {
823  unsigned Opcode = Op->getOpcode();
824  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
825  ? cast<LoadSDNode>(Op)->getAddressingMode()
826  : cast<StoreSDNode>(Op)->getAddressingMode();
829  int Val;
830  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
831  return false;
832 
833  Offset = N;
834  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
835  unsigned ShAmt = 0;
836  if (ShOpcVal != ARM_AM::no_shift) {
837  // Check to see if the RHS of the shift is a constant, if not, we can't fold
838  // it.
839  if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
840  ShAmt = Sh->getZExtValue();
841  if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
842  Offset = N.getOperand(0);
843  else {
844  ShAmt = 0;
845  ShOpcVal = ARM_AM::no_shift;
846  }
847  } else {
848  ShOpcVal = ARM_AM::no_shift;
849  }
850  }
851 
852  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
853  SDLoc(N), MVT::i32);
854  return true;
855 }
856 
857 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
858  SDValue &Offset, SDValue &Opc) {
859  unsigned Opcode = Op->getOpcode();
860  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
861  ? cast<LoadSDNode>(Op)->getAddressingMode()
862  : cast<StoreSDNode>(Op)->getAddressingMode();
865  int Val;
866  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
867  if (AddSub == ARM_AM::sub) Val *= -1;
868  Offset = CurDAG->getRegister(0, MVT::i32);
869  Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
870  return true;
871  }
872 
873  return false;
874 }
875 
876 
877 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
878  SDValue &Offset, SDValue &Opc) {
879  unsigned Opcode = Op->getOpcode();
880  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
881  ? cast<LoadSDNode>(Op)->getAddressingMode()
882  : cast<StoreSDNode>(Op)->getAddressingMode();
885  int Val;
886  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
887  Offset = CurDAG->getRegister(0, MVT::i32);
888  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
890  SDLoc(Op), MVT::i32);
891  return true;
892  }
893 
894  return false;
895 }
896 
897 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
898  Base = N;
899  return true;
900 }
901 
902 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
904  SDValue &Opc) {
905  if (N.getOpcode() == ISD::SUB) {
906  // X - C is canonicalize to X + -C, no need to handle it here.
907  Base = N.getOperand(0);
908  Offset = N.getOperand(1);
909  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
910  MVT::i32);
911  return true;
912  }
913 
914  if (!CurDAG->isBaseWithConstantOffset(N)) {
915  Base = N;
916  if (N.getOpcode() == ISD::FrameIndex) {
917  int FI = cast<FrameIndexSDNode>(N)->getIndex();
918  Base = CurDAG->getTargetFrameIndex(
919  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
920  }
921  Offset = CurDAG->getRegister(0, MVT::i32);
922  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
923  MVT::i32);
924  return true;
925  }
926 
927  // If the RHS is +/- imm8, fold into addr mode.
928  int RHSC;
929  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
930  -256 + 1, 256, RHSC)) { // 8 bits.
931  Base = N.getOperand(0);
932  if (Base.getOpcode() == ISD::FrameIndex) {
933  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
934  Base = CurDAG->getTargetFrameIndex(
935  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
936  }
937  Offset = CurDAG->getRegister(0, MVT::i32);
938 
940  if (RHSC < 0) {
942  RHSC = -RHSC;
943  }
944  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
945  MVT::i32);
946  return true;
947  }
948 
949  Base = N.getOperand(0);
950  Offset = N.getOperand(1);
951  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
952  MVT::i32);
953  return true;
954 }
955 
956 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
957  SDValue &Offset, SDValue &Opc) {
958  unsigned Opcode = Op->getOpcode();
959  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
960  ? cast<LoadSDNode>(Op)->getAddressingMode()
961  : cast<StoreSDNode>(Op)->getAddressingMode();
964  int Val;
965  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
966  Offset = CurDAG->getRegister(0, MVT::i32);
967  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
968  MVT::i32);
969  return true;
970  }
971 
972  Offset = N;
973  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
974  MVT::i32);
975  return true;
976 }
977 
978 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
979  bool FP16) {
980  if (!CurDAG->isBaseWithConstantOffset(N)) {
981  Base = N;
982  if (N.getOpcode() == ISD::FrameIndex) {
983  int FI = cast<FrameIndexSDNode>(N)->getIndex();
984  Base = CurDAG->getTargetFrameIndex(
985  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
986  } else if (N.getOpcode() == ARMISD::Wrapper &&
987  N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
988  N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
989  N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
990  Base = N.getOperand(0);
991  }
992  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
993  SDLoc(N), MVT::i32);
994  return true;
995  }
996 
997  // If the RHS is +/- imm8, fold into addr mode.
998  int RHSC;
999  const int Scale = FP16 ? 2 : 4;
1000 
1001  if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
1002  Base = N.getOperand(0);
1003  if (Base.getOpcode() == ISD::FrameIndex) {
1004  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1005  Base = CurDAG->getTargetFrameIndex(
1006  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1007  }
1008 
1010  if (RHSC < 0) {
1011  AddSub = ARM_AM::sub;
1012  RHSC = -RHSC;
1013  }
1014 
1015  if (FP16)
1016  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
1017  SDLoc(N), MVT::i32);
1018  else
1019  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1020  SDLoc(N), MVT::i32);
1021 
1022  return true;
1023  }
1024 
1025  Base = N;
1026 
1027  if (FP16)
1028  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
1029  SDLoc(N), MVT::i32);
1030  else
1031  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1032  SDLoc(N), MVT::i32);
1033 
1034  return true;
1035 }
1036 
1037 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1038  SDValue &Base, SDValue &Offset) {
1039  return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1040 }
1041 
1042 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1043  SDValue &Base, SDValue &Offset) {
1044  return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1045 }
1046 
1047 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1048  SDValue &Align) {
1049  Addr = N;
1050 
1051  unsigned Alignment = 0;
1052 
1053  MemSDNode *MemN = cast<MemSDNode>(Parent);
1054 
1055  if (isa<LSBaseSDNode>(MemN) ||
1056  ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1057  MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1058  MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1059  // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1060  // The maximum alignment is equal to the memory size being referenced.
1061  unsigned MMOAlign = MemN->getAlignment();
1062  unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1063  if (MMOAlign >= MemSize && MemSize > 1)
1064  Alignment = MemSize;
1065  } else {
1066  // All other uses of addrmode6 are for intrinsics. For now just record
1067  // the raw alignment value; it will be refined later based on the legal
1068  // alignment operands for the intrinsic.
1069  Alignment = MemN->getAlignment();
1070  }
1071 
1072  Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1073  return true;
1074 }
1075 
1076 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1077  SDValue &Offset) {
1078  LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1080  if (AM != ISD::POST_INC)
1081  return false;
1082  Offset = N;
1083  if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1084  if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1085  Offset = CurDAG->getRegister(0, MVT::i32);
1086  }
1087  return true;
1088 }
1089 
1090 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1091  SDValue &Offset, SDValue &Label) {
1092  if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1093  Offset = N.getOperand(0);
1094  SDValue N1 = N.getOperand(1);
1095  Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1096  SDLoc(N), MVT::i32);
1097  return true;
1098  }
1099 
1100  return false;
1101 }
1102 
1103 
1104 //===----------------------------------------------------------------------===//
1105 // Thumb Addressing Modes
1106 //===----------------------------------------------------------------------===//
1107 
1109  // Negative numbers are difficult to materialise in thumb1. If we are
1110  // selecting the add of a negative, instead try to select ri with a zero
1111  // offset, so create the add node directly which will become a sub.
1112  if (N.getOpcode() != ISD::ADD)
1113  return false;
1114 
1115  // Look for an imm which is not legal for ld/st, but is legal for sub.
1116  if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1117  return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1118 
1119  return false;
1120 }
1121 
1122 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1123  SDValue &Offset) {
1124  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1125  ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1126  if (!NC || !NC->isZero())
1127  return false;
1128 
1129  Base = Offset = N;
1130  return true;
1131  }
1132 
1133  Base = N.getOperand(0);
1134  Offset = N.getOperand(1);
1135  return true;
1136 }
1137 
1138 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1139  SDValue &Offset) {
1141  return false; // Select ri instead
1142  return SelectThumbAddrModeRRSext(N, Base, Offset);
1143 }
1144 
1145 bool
1146 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1147  SDValue &Base, SDValue &OffImm) {
1148  if (shouldUseZeroOffsetLdSt(N)) {
1149  Base = N;
1150  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1151  return true;
1152  }
1153 
1154  if (!CurDAG->isBaseWithConstantOffset(N)) {
1155  if (N.getOpcode() == ISD::ADD) {
1156  return false; // We want to select register offset instead
1157  } else if (N.getOpcode() == ARMISD::Wrapper &&
1158  N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1159  N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1160  N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1161  N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1162  Base = N.getOperand(0);
1163  } else {
1164  Base = N;
1165  }
1166 
1167  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1168  return true;
1169  }
1170 
1171  // If the RHS is + imm5 * scale, fold into addr mode.
1172  int RHSC;
1173  if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1174  Base = N.getOperand(0);
1175  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1176  return true;
1177  }
1178 
1179  // Offset is too large, so use register offset instead.
1180  return false;
1181 }
1182 
1183 bool
1184 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1185  SDValue &OffImm) {
1186  return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1187 }
1188 
1189 bool
1190 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1191  SDValue &OffImm) {
1192  return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1193 }
1194 
1195 bool
1196 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1197  SDValue &OffImm) {
1198  return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1199 }
1200 
1201 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1202  SDValue &Base, SDValue &OffImm) {
1203  if (N.getOpcode() == ISD::FrameIndex) {
1204  int FI = cast<FrameIndexSDNode>(N)->getIndex();
1205  // Only multiples of 4 are allowed for the offset, so the frame object
1206  // alignment must be at least 4.
1207  MachineFrameInfo &MFI = MF->getFrameInfo();
1208  if (MFI.getObjectAlign(FI) < Align(4))
1209  MFI.setObjectAlignment(FI, Align(4));
1210  Base = CurDAG->getTargetFrameIndex(
1211  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1212  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1213  return true;
1214  }
1215 
1216  if (!CurDAG->isBaseWithConstantOffset(N))
1217  return false;
1218 
1219  if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1220  // If the RHS is + imm8 * scale, fold into addr mode.
1221  int RHSC;
1222  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1223  Base = N.getOperand(0);
1224  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1225  // Make sure the offset is inside the object, or we might fail to
1226  // allocate an emergency spill slot. (An out-of-range access is UB, but
1227  // it could show up anyway.)
1228  MachineFrameInfo &MFI = MF->getFrameInfo();
1229  if (RHSC * 4 < MFI.getObjectSize(FI)) {
1230  // For LHS+RHS to result in an offset that's a multiple of 4 the object
1231  // indexed by the LHS must be 4-byte aligned.
1232  if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
1233  MFI.setObjectAlignment(FI, Align(4));
1234  if (MFI.getObjectAlign(FI) >= Align(4)) {
1235  Base = CurDAG->getTargetFrameIndex(
1236  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1237  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1238  return true;
1239  }
1240  }
1241  }
1242  }
1243 
1244  return false;
1245 }
1246 
1247 template <unsigned Shift>
1248 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1249  SDValue &OffImm) {
1250  if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1251  int RHSC;
1252  if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1253  RHSC)) {
1254  Base = N.getOperand(0);
1255  if (N.getOpcode() == ISD::SUB)
1256  RHSC = -RHSC;
1257  OffImm =
1258  CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1259  return true;
1260  }
1261  }
1262 
1263  // Base only.
1264  Base = N;
1265  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1266  return true;
1267 }
1268 
1269 
1270 //===----------------------------------------------------------------------===//
1271 // Thumb 2 Addressing Modes
1272 //===----------------------------------------------------------------------===//
1273 
1274 
1275 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1276  SDValue &Base, SDValue &OffImm) {
1277  // Match simple R + imm12 operands.
1278 
1279  // Base only.
1280  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1281  !CurDAG->isBaseWithConstantOffset(N)) {
1282  if (N.getOpcode() == ISD::FrameIndex) {
1283  // Match frame index.
1284  int FI = cast<FrameIndexSDNode>(N)->getIndex();
1285  Base = CurDAG->getTargetFrameIndex(
1286  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1287  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1288  return true;
1289  }
1290 
1291  if (N.getOpcode() == ARMISD::Wrapper &&
1292  N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1293  N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1294  N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1295  Base = N.getOperand(0);
1296  if (Base.getOpcode() == ISD::TargetConstantPool)
1297  return false; // We want to select t2LDRpci instead.
1298  } else
1299  Base = N;
1300  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1301  return true;
1302  }
1303 
1304  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1305  if (SelectT2AddrModeImm8(N, Base, OffImm))
1306  // Let t2LDRi8 handle (R - imm8).
1307  return false;
1308 
1309  int RHSC = (int)RHS->getZExtValue();
1310  if (N.getOpcode() == ISD::SUB)
1311  RHSC = -RHSC;
1312 
1313  if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1314  Base = N.getOperand(0);
1315  if (Base.getOpcode() == ISD::FrameIndex) {
1316  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1317  Base = CurDAG->getTargetFrameIndex(
1318  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1319  }
1320  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1321  return true;
1322  }
1323  }
1324 
1325  // Base only.
1326  Base = N;
1327  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1328  return true;
1329 }
1330 
1331 template <unsigned Shift>
1332 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1333  SDValue &OffImm) {
1334  if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1335  int RHSC;
1336  if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
1337  Base = N.getOperand(0);
1338  if (Base.getOpcode() == ISD::FrameIndex) {
1339  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1340  Base = CurDAG->getTargetFrameIndex(
1341  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1342  }
1343 
1344  if (N.getOpcode() == ISD::SUB)
1345  RHSC = -RHSC;
1346  OffImm =
1347  CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1348  return true;
1349  }
1350  }
1351 
1352  // Base only.
1353  Base = N;
1354  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1355  return true;
1356 }
1357 
1358 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1359  SDValue &Base, SDValue &OffImm) {
1360  // Match simple R - imm8 operands.
1361  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1362  !CurDAG->isBaseWithConstantOffset(N))
1363  return false;
1364 
1365  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1366  int RHSC = (int)RHS->getSExtValue();
1367  if (N.getOpcode() == ISD::SUB)
1368  RHSC = -RHSC;
1369 
1370  if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1371  Base = N.getOperand(0);
1372  if (Base.getOpcode() == ISD::FrameIndex) {
1373  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1374  Base = CurDAG->getTargetFrameIndex(
1375  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1376  }
1377  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1378  return true;
1379  }
1380  }
1381 
1382  return false;
1383 }
1384 
1385 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1386  SDValue &OffImm){
1387  unsigned Opcode = Op->getOpcode();
1388  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1389  ? cast<LoadSDNode>(Op)->getAddressingMode()
1390  : cast<StoreSDNode>(Op)->getAddressingMode();
1391  int RHSC;
1392  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1393  OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1394  ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1395  : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1396  return true;
1397  }
1398 
1399  return false;
1400 }
1401 
1402 template <unsigned Shift>
1403 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1404  SDValue &OffImm) {
1405  if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1406  int RHSC;
1407  if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1408  RHSC)) {
1409  Base = N.getOperand(0);
1410  if (Base.getOpcode() == ISD::FrameIndex) {
1411  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1412  Base = CurDAG->getTargetFrameIndex(
1413  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1414  }
1415 
1416  if (N.getOpcode() == ISD::SUB)
1417  RHSC = -RHSC;
1418  OffImm =
1419  CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1420  return true;
1421  }
1422  }
1423 
1424  // Base only.
1425  Base = N;
1426  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1427  return true;
1428 }
1429 
1430 template <unsigned Shift>
1431 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1432  SDValue &OffImm) {
1433  return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1434 }
1435 
1436 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1437  SDValue &OffImm,
1438  unsigned Shift) {
1439  unsigned Opcode = Op->getOpcode();
1441  switch (Opcode) {
1442  case ISD::LOAD:
1443  AM = cast<LoadSDNode>(Op)->getAddressingMode();
1444  break;
1445  case ISD::STORE:
1446  AM = cast<StoreSDNode>(Op)->getAddressingMode();
1447  break;
1448  case ISD::MLOAD:
1449  AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
1450  break;
1451  case ISD::MSTORE:
1452  AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
1453  break;
1454  default:
1455  llvm_unreachable("Unexpected Opcode for Imm7Offset");
1456  }
1457 
1458  int RHSC;
1459  // 7 bit constant, shifted by Shift.
1460  if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
1461  OffImm =
1462  ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1463  ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1464  : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1465  MVT::i32);
1466  return true;
1467  }
1468  return false;
1469 }
1470 
1471 template <int Min, int Max>
1472 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1473  int Val;
1474  if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
1475  OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
1476  return true;
1477  }
1478  return false;
1479 }
1480 
1481 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1482  SDValue &Base,
1483  SDValue &OffReg, SDValue &ShImm) {
1484  // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1485  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1486  return false;
1487 
1488  // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1489  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1490  int RHSC = (int)RHS->getZExtValue();
1491  if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1492  return false;
1493  else if (RHSC < 0 && RHSC >= -255) // 8 bits
1494  return false;
1495  }
1496 
1497  // Look for (R + R) or (R + (R << [1,2,3])).
1498  unsigned ShAmt = 0;
1499  Base = N.getOperand(0);
1500  OffReg = N.getOperand(1);
1501 
1502  // Swap if it is ((R << c) + R).
1504  if (ShOpcVal != ARM_AM::lsl) {
1505  ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1506  if (ShOpcVal == ARM_AM::lsl)
1507  std::swap(Base, OffReg);
1508  }
1509 
1510  if (ShOpcVal == ARM_AM::lsl) {
1511  // Check to see if the RHS of the shift is a constant, if not, we can't fold
1512  // it.
1513  if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1514  ShAmt = Sh->getZExtValue();
1515  if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1516  OffReg = OffReg.getOperand(0);
1517  else {
1518  ShAmt = 0;
1519  }
1520  }
1521  }
1522 
1523  // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1524  // and use it in a shifted operand do so.
1525  if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1526  unsigned PowerOfTwo = 0;
1527  SDValue NewMulConst;
1528  if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1529  HandleSDNode Handle(OffReg);
1530  replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1531  OffReg = Handle.getValue();
1532  ShAmt = PowerOfTwo;
1533  }
1534  }
1535 
1536  ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1537 
1538  return true;
1539 }
1540 
1541 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1542  SDValue &OffImm) {
1543  // This *must* succeed since it's used for the irreplaceable ldrex and strex
1544  // instructions.
1545  Base = N;
1546  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1547 
1548  if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1549  return true;
1550 
1551  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1552  if (!RHS)
1553  return true;
1554 
1555  uint32_t RHSC = (int)RHS->getZExtValue();
1556  if (RHSC > 1020 || RHSC % 4 != 0)
1557  return true;
1558 
1559  Base = N.getOperand(0);
1560  if (Base.getOpcode() == ISD::FrameIndex) {
1561  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1562  Base = CurDAG->getTargetFrameIndex(
1563  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1564  }
1565 
1566  OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1567  return true;
1568 }
1569 
1570 //===--------------------------------------------------------------------===//
1571 
1572 /// getAL - Returns a ARMCC::AL immediate node.
1573 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1574  return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1575 }
1576 
1577 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1578  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1579  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1580 }
1581 
1582 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1583  LoadSDNode *LD = cast<LoadSDNode>(N);
1584  ISD::MemIndexedMode AM = LD->getAddressingMode();
1585  if (AM == ISD::UNINDEXED)
1586  return false;
1587 
1588  EVT LoadedVT = LD->getMemoryVT();
1589  SDValue Offset, AMOpc;
1590  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1591  unsigned Opcode = 0;
1592  bool Match = false;
1593  if (LoadedVT == MVT::i32 && isPre &&
1594  SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1595  Opcode = ARM::LDR_PRE_IMM;
1596  Match = true;
1597  } else if (LoadedVT == MVT::i32 && !isPre &&
1598  SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1599  Opcode = ARM::LDR_POST_IMM;
1600  Match = true;
1601  } else if (LoadedVT == MVT::i32 &&
1602  SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1603  Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1604  Match = true;
1605 
1606  } else if (LoadedVT == MVT::i16 &&
1607  SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1608  Match = true;
1609  Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1610  ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1611  : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1612  } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1613  if (LD->getExtensionType() == ISD::SEXTLOAD) {
1614  if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1615  Match = true;
1616  Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1617  }
1618  } else {
1619  if (isPre &&
1620  SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1621  Match = true;
1622  Opcode = ARM::LDRB_PRE_IMM;
1623  } else if (!isPre &&
1624  SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1625  Match = true;
1626  Opcode = ARM::LDRB_POST_IMM;
1627  } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1628  Match = true;
1629  Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1630  }
1631  }
1632  }
1633 
1634  if (Match) {
1635  if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1636  SDValue Chain = LD->getChain();
1637  SDValue Base = LD->getBasePtr();
1638  SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1639  CurDAG->getRegister(0, MVT::i32), Chain };
1640  SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1641  MVT::Other, Ops);
1642  transferMemOperands(N, New);
1643  ReplaceNode(N, New);
1644  return true;
1645  } else {
1646  SDValue Chain = LD->getChain();
1647  SDValue Base = LD->getBasePtr();
1648  SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1649  CurDAG->getRegister(0, MVT::i32), Chain };
1650  SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1651  MVT::Other, Ops);
1652  transferMemOperands(N, New);
1653  ReplaceNode(N, New);
1654  return true;
1655  }
1656  }
1657 
1658  return false;
1659 }
1660 
1661 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1662  LoadSDNode *LD = cast<LoadSDNode>(N);
1663  EVT LoadedVT = LD->getMemoryVT();
1664  ISD::MemIndexedMode AM = LD->getAddressingMode();
1665  if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1666  LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1667  return false;
1668 
1669  auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1670  if (!COffs || COffs->getZExtValue() != 4)
1671  return false;
1672 
1673  // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1674  // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1675  // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1676  // ISel.
1677  SDValue Chain = LD->getChain();
1678  SDValue Base = LD->getBasePtr();
1679  SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1680  CurDAG->getRegister(0, MVT::i32), Chain };
1681  SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1682  MVT::i32, MVT::Other, Ops);
1683  transferMemOperands(N, New);
1684  ReplaceNode(N, New);
1685  return true;
1686 }
1687 
1688 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1689  LoadSDNode *LD = cast<LoadSDNode>(N);
1690  ISD::MemIndexedMode AM = LD->getAddressingMode();
1691  if (AM == ISD::UNINDEXED)
1692  return false;
1693 
1694  EVT LoadedVT = LD->getMemoryVT();
1695  bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1696  SDValue Offset;
1697  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1698  unsigned Opcode = 0;
1699  bool Match = false;
1700  if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1701  switch (LoadedVT.getSimpleVT().SimpleTy) {
1702  case MVT::i32:
1703  Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1704  break;
1705  case MVT::i16:
1706  if (isSExtLd)
1707  Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1708  else
1709  Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1710  break;
1711  case MVT::i8:
1712  case MVT::i1:
1713  if (isSExtLd)
1714  Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1715  else
1716  Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1717  break;
1718  default:
1719  return false;
1720  }
1721  Match = true;
1722  }
1723 
1724  if (Match) {
1725  SDValue Chain = LD->getChain();
1726  SDValue Base = LD->getBasePtr();
1727  SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1728  CurDAG->getRegister(0, MVT::i32), Chain };
1729  SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1730  MVT::Other, Ops);
1731  transferMemOperands(N, New);
1732  ReplaceNode(N, New);
1733  return true;
1734  }
1735 
1736  return false;
1737 }
1738 
1739 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1740  EVT LoadedVT;
1741  unsigned Opcode = 0;
1742  bool isSExtLd, isPre;
1743  Align Alignment;
1744  ARMVCC::VPTCodes Pred;
1745  SDValue PredReg;
1746  SDValue Chain, Base, Offset;
1747 
1748  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1749  ISD::MemIndexedMode AM = LD->getAddressingMode();
1750  if (AM == ISD::UNINDEXED)
1751  return false;
1752  LoadedVT = LD->getMemoryVT();
1753  if (!LoadedVT.isVector())
1754  return false;
1755 
1756  Chain = LD->getChain();
1757  Base = LD->getBasePtr();
1758  Offset = LD->getOffset();
1759  Alignment = LD->getAlign();
1760  isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1761  isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1762  Pred = ARMVCC::None;
1763  PredReg = CurDAG->getRegister(0, MVT::i32);
1764  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
1765  ISD::MemIndexedMode AM = LD->getAddressingMode();
1766  if (AM == ISD::UNINDEXED)
1767  return false;
1768  LoadedVT = LD->getMemoryVT();
1769  if (!LoadedVT.isVector())
1770  return false;
1771 
1772  Chain = LD->getChain();
1773  Base = LD->getBasePtr();
1774  Offset = LD->getOffset();
1775  Alignment = LD->getAlign();
1776  isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1777  isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1778  Pred = ARMVCC::Then;
1779  PredReg = LD->getMask();
1780  } else
1781  llvm_unreachable("Expected a Load or a Masked Load!");
1782 
1783  // We allow LE non-masked loads to change the type (for example use a vldrb.8
1784  // as opposed to a vldrw.32). This can allow extra addressing modes or
1785  // alignments for what is otherwise an equivalent instruction.
1786  bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
1787 
1788  SDValue NewOffset;
1789  if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1790  SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
1791  if (isSExtLd)
1792  Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1793  else
1794  Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1795  } else if (LoadedVT == MVT::v8i8 &&
1796  SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1797  if (isSExtLd)
1798  Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1799  else
1800  Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1801  } else if (LoadedVT == MVT::v4i8 &&
1802  SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1803  if (isSExtLd)
1804  Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1805  else
1806  Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1807  } else if (Alignment >= Align(4) &&
1808  (CanChangeType || LoadedVT == MVT::v4i32 ||
1809  LoadedVT == MVT::v4f32) &&
1810  SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
1811  Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1812  else if (Alignment >= Align(2) &&
1813  (CanChangeType || LoadedVT == MVT::v8i16 ||
1814  LoadedVT == MVT::v8f16) &&
1815  SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
1816  Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1817  else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1818  SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
1819  Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1820  else
1821  return false;
1822 
1823  SDValue Ops[] = {Base,
1824  NewOffset,
1825  CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
1826  PredReg,
1827  CurDAG->getRegister(0, MVT::i32), // tp_reg
1828  Chain};
1829  SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1830  N->getValueType(0), MVT::Other, Ops);
1831  transferMemOperands(N, New);
1832  ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1833  ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1834  ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1835  CurDAG->RemoveDeadNode(N);
1836  return true;
1837 }
1838 
1839 /// Form a GPRPair pseudo register from a pair of GPR regs.
1841  SDLoc dl(V0.getNode());
1842  SDValue RegClass =
1843  CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1844  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1845  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1846  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1847  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1848 }
1849 
1850 /// Form a D register from a pair of S registers.
1851 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1852  SDLoc dl(V0.getNode());
1853  SDValue RegClass =
1854  CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1855  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1856  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1857  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1858  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1859 }
1860 
1861 /// Form a quad register from a pair of D registers.
1862 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1863  SDLoc dl(V0.getNode());
1864  SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1865  MVT::i32);
1866  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1867  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1868  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1869  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1870 }
1871 
1872 /// Form 4 consecutive D registers from a pair of Q registers.
1873 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1874  SDLoc dl(V0.getNode());
1875  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1876  MVT::i32);
1877  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1878  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1879  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1880  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1881 }
1882 
1883 /// Form 4 consecutive S registers.
1884 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1885  SDValue V2, SDValue V3) {
1886  SDLoc dl(V0.getNode());
1887  SDValue RegClass =
1888  CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1889  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1890  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1891  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1892  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1893  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1894  V2, SubReg2, V3, SubReg3 };
1895  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1896 }
1897 
1898 /// Form 4 consecutive D registers.
1899 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1900  SDValue V2, SDValue V3) {
1901  SDLoc dl(V0.getNode());
1902  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1903  MVT::i32);
1904  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1905  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1906  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1907  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1908  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1909  V2, SubReg2, V3, SubReg3 };
1910  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1911 }
1912 
1913 /// Form 4 consecutive Q registers.
1914 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1915  SDValue V2, SDValue V3) {
1916  SDLoc dl(V0.getNode());
1917  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1918  MVT::i32);
1919  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1920  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1921  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1922  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1923  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1924  V2, SubReg2, V3, SubReg3 };
1925  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1926 }
1927 
1928 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1929 /// of a NEON VLD or VST instruction. The supported values depend on the
1930 /// number of registers being loaded.
1931 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1932  unsigned NumVecs, bool is64BitVector) {
1933  unsigned NumRegs = NumVecs;
1934  if (!is64BitVector && NumVecs < 3)
1935  NumRegs *= 2;
1936 
1937  unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1938  if (Alignment >= 32 && NumRegs == 4)
1939  Alignment = 32;
1940  else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1941  Alignment = 16;
1942  else if (Alignment >= 8)
1943  Alignment = 8;
1944  else
1945  Alignment = 0;
1946 
1947  return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1948 }
1949 
1950 static bool isVLDfixed(unsigned Opc)
1951 {
1952  switch (Opc) {
1953  default: return false;
1954  case ARM::VLD1d8wb_fixed : return true;
1955  case ARM::VLD1d16wb_fixed : return true;
1956  case ARM::VLD1d64Qwb_fixed : return true;
1957  case ARM::VLD1d32wb_fixed : return true;
1958  case ARM::VLD1d64wb_fixed : return true;
1959  case ARM::VLD1d8TPseudoWB_fixed : return true;
1960  case ARM::VLD1d16TPseudoWB_fixed : return true;
1961  case ARM::VLD1d32TPseudoWB_fixed : return true;
1962  case ARM::VLD1d64TPseudoWB_fixed : return true;
1963  case ARM::VLD1d8QPseudoWB_fixed : return true;
1964  case ARM::VLD1d16QPseudoWB_fixed : return true;
1965  case ARM::VLD1d32QPseudoWB_fixed : return true;
1966  case ARM::VLD1d64QPseudoWB_fixed : return true;
1967  case ARM::VLD1q8wb_fixed : return true;
1968  case ARM::VLD1q16wb_fixed : return true;
1969  case ARM::VLD1q32wb_fixed : return true;
1970  case ARM::VLD1q64wb_fixed : return true;
1971  case ARM::VLD1DUPd8wb_fixed : return true;
1972  case ARM::VLD1DUPd16wb_fixed : return true;
1973  case ARM::VLD1DUPd32wb_fixed : return true;
1974  case ARM::VLD1DUPq8wb_fixed : return true;
1975  case ARM::VLD1DUPq16wb_fixed : return true;
1976  case ARM::VLD1DUPq32wb_fixed : return true;
1977  case ARM::VLD2d8wb_fixed : return true;
1978  case ARM::VLD2d16wb_fixed : return true;
1979  case ARM::VLD2d32wb_fixed : return true;
1980  case ARM::VLD2q8PseudoWB_fixed : return true;
1981  case ARM::VLD2q16PseudoWB_fixed : return true;
1982  case ARM::VLD2q32PseudoWB_fixed : return true;
1983  case ARM::VLD2DUPd8wb_fixed : return true;
1984  case ARM::VLD2DUPd16wb_fixed : return true;
1985  case ARM::VLD2DUPd32wb_fixed : return true;
1986  case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
1987  case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
1988  case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
1989  }
1990 }
1991 
1992 static bool isVSTfixed(unsigned Opc)
1993 {
1994  switch (Opc) {
1995  default: return false;
1996  case ARM::VST1d8wb_fixed : return true;
1997  case ARM::VST1d16wb_fixed : return true;
1998  case ARM::VST1d32wb_fixed : return true;
1999  case ARM::VST1d64wb_fixed : return true;
2000  case ARM::VST1q8wb_fixed : return true;
2001  case ARM::VST1q16wb_fixed : return true;
2002  case ARM::VST1q32wb_fixed : return true;
2003  case ARM::VST1q64wb_fixed : return true;
2004  case ARM::VST1d8TPseudoWB_fixed : return true;
2005  case ARM::VST1d16TPseudoWB_fixed : return true;
2006  case ARM::VST1d32TPseudoWB_fixed : return true;
2007  case ARM::VST1d64TPseudoWB_fixed : return true;
2008  case ARM::VST1d8QPseudoWB_fixed : return true;
2009  case ARM::VST1d16QPseudoWB_fixed : return true;
2010  case ARM::VST1d32QPseudoWB_fixed : return true;
2011  case ARM::VST1d64QPseudoWB_fixed : return true;
2012  case ARM::VST2d8wb_fixed : return true;
2013  case ARM::VST2d16wb_fixed : return true;
2014  case ARM::VST2d32wb_fixed : return true;
2015  case ARM::VST2q8PseudoWB_fixed : return true;
2016  case ARM::VST2q16PseudoWB_fixed : return true;
2017  case ARM::VST2q32PseudoWB_fixed : return true;
2018  }
2019 }
2020 
2021 // Get the register stride update opcode of a VLD/VST instruction that
2022 // is otherwise equivalent to the given fixed stride updating instruction.
2023 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2024  assert((isVLDfixed(Opc) || isVSTfixed(Opc))
2025  && "Incorrect fixed stride updating instruction.");
2026  switch (Opc) {
2027  default: break;
2028  case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2029  case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2030  case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2031  case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2032  case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2033  case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2034  case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2035  case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2036  case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2037  case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2038  case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2039  case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2040  case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2041  case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2042  case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2043  case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2044  case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2045  case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2046  case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2047  case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2048  case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2049  case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2050  case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2051  case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2052  case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2053  case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2054  case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2055 
2056  case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2057  case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2058  case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2059  case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2060  case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2061  case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2062  case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2063  case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2064  case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2065  case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2066  case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2067  case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2068  case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2069  case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2070  case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2071  case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2072 
2073  case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2074  case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2075  case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2076  case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2077  case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2078  case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2079 
2080  case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2081  case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2082  case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2083  case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2084  case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2085  case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2086 
2087  case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2088  case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2089  case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2090  }
2091  return Opc; // If not one we handle, return it unchanged.
2092 }
2093 
2094 /// Returns true if the given increment is a Constant known to be equal to the
2095 /// access size performed by a NEON load/store. This means the "[rN]!" form can
2096 /// be used.
2097 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2098  auto C = dyn_cast<ConstantSDNode>(Inc);
2099  return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2100 }
2101 
2102 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2103  const uint16_t *DOpcodes,
2104  const uint16_t *QOpcodes0,
2105  const uint16_t *QOpcodes1) {
2106  assert(Subtarget->hasNEON());
2107  assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2108  SDLoc dl(N);
2109 
2110  SDValue MemAddr, Align;
2111  bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2112  // nodes are not intrinsics.
2113  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2114  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2115  return;
2116 
2117  SDValue Chain = N->getOperand(0);
2118  EVT VT = N->getValueType(0);
2119  bool is64BitVector = VT.is64BitVector();
2120  Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2121 
2122  unsigned OpcodeIndex;
2123  switch (VT.getSimpleVT().SimpleTy) {
2124  default: llvm_unreachable("unhandled vld type");
2125  // Double-register operations:
2126  case MVT::v8i8: OpcodeIndex = 0; break;
2127  case MVT::v4f16:
2128  case MVT::v4bf16:
2129  case MVT::v4i16: OpcodeIndex = 1; break;
2130  case MVT::v2f32:
2131  case MVT::v2i32: OpcodeIndex = 2; break;
2132  case MVT::v1i64: OpcodeIndex = 3; break;
2133  // Quad-register operations:
2134  case MVT::v16i8: OpcodeIndex = 0; break;
2135  case MVT::v8f16:
2136  case MVT::v8bf16:
2137  case MVT::v8i16: OpcodeIndex = 1; break;
2138  case MVT::v4f32:
2139  case MVT::v4i32: OpcodeIndex = 2; break;
2140  case MVT::v2f64:
2141  case MVT::v2i64: OpcodeIndex = 3; break;
2142  }
2143 
2144  EVT ResTy;
2145  if (NumVecs == 1)
2146  ResTy = VT;
2147  else {
2148  unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2149  if (!is64BitVector)
2150  ResTyElts *= 2;
2151  ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2152  }
2153  std::vector<EVT> ResTys;
2154  ResTys.push_back(ResTy);
2155  if (isUpdating)
2156  ResTys.push_back(MVT::i32);
2157  ResTys.push_back(MVT::Other);
2158 
2159  SDValue Pred = getAL(CurDAG, dl);
2160  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2161  SDNode *VLd;
2163 
2164  // Double registers and VLD1/VLD2 quad registers are directly supported.
2165  if (is64BitVector || NumVecs <= 2) {
2166  unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2167  QOpcodes0[OpcodeIndex]);
2168  Ops.push_back(MemAddr);
2169  Ops.push_back(Align);
2170  if (isUpdating) {
2171  SDValue Inc = N->getOperand(AddrOpIdx + 1);
2172  bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2173  if (!IsImmUpdate) {
2174  // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2175  // check for the opcode rather than the number of vector elements.
2176  if (isVLDfixed(Opc))
2177  Opc = getVLDSTRegisterUpdateOpcode(Opc);
2178  Ops.push_back(Inc);
2179  // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2180  // the operands if not such an opcode.
2181  } else if (!isVLDfixed(Opc))
2182  Ops.push_back(Reg0);
2183  }
2184  Ops.push_back(Pred);
2185  Ops.push_back(Reg0);
2186  Ops.push_back(Chain);
2187  VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2188 
2189  } else {
2190  // Otherwise, quad registers are loaded with two separate instructions,
2191  // where one loads the even registers and the other loads the odd registers.
2192  EVT AddrTy = MemAddr.getValueType();
2193 
2194  // Load the even subregs. This is always an updating load, so that it
2195  // provides the address to the second load for the odd subregs.
2196  SDValue ImplDef =
2197  SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2198  const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2199  SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2200  ResTy, AddrTy, MVT::Other, OpsA);
2201  Chain = SDValue(VLdA, 2);
2202 
2203  // Load the odd subregs.
2204  Ops.push_back(SDValue(VLdA, 1));
2205  Ops.push_back(Align);
2206  if (isUpdating) {
2207  SDValue Inc = N->getOperand(AddrOpIdx + 1);
2208  assert(isa<ConstantSDNode>(Inc.getNode()) &&
2209  "only constant post-increment update allowed for VLD3/4");
2210  (void)Inc;
2211  Ops.push_back(Reg0);
2212  }
2213  Ops.push_back(SDValue(VLdA, 0));
2214  Ops.push_back(Pred);
2215  Ops.push_back(Reg0);
2216  Ops.push_back(Chain);
2217  VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2218  }
2219 
2220  // Transfer memoperands.
2221  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2222  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2223 
2224  if (NumVecs == 1) {
2225  ReplaceNode(N, VLd);
2226  return;
2227  }
2228 
2229  // Extract out the subregisters.
2230  SDValue SuperReg = SDValue(VLd, 0);
2231  static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2232  ARM::qsub_3 == ARM::qsub_0 + 3,
2233  "Unexpected subreg numbering");
2234  unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2235  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2236  ReplaceUses(SDValue(N, Vec),
2237  CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2238  ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2239  if (isUpdating)
2240  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2241  CurDAG->RemoveDeadNode(N);
2242 }
2243 
2244 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2245  const uint16_t *DOpcodes,
2246  const uint16_t *QOpcodes0,
2247  const uint16_t *QOpcodes1) {
2248  assert(Subtarget->hasNEON());
2249  assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2250  SDLoc dl(N);
2251 
2252  SDValue MemAddr, Align;
2253  bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2254  // nodes are not intrinsics.
2255  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2256  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2257  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2258  return;
2259 
2260  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2261 
2262  SDValue Chain = N->getOperand(0);
2263  EVT VT = N->getOperand(Vec0Idx).getValueType();
2264  bool is64BitVector = VT.is64BitVector();
2265  Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2266 
2267  unsigned OpcodeIndex;
2268  switch (VT.getSimpleVT().SimpleTy) {
2269  default: llvm_unreachable("unhandled vst type");
2270  // Double-register operations:
2271  case MVT::v8i8: OpcodeIndex = 0; break;
2272  case MVT::v4f16:
2273  case MVT::v4bf16:
2274  case MVT::v4i16: OpcodeIndex = 1; break;
2275  case MVT::v2f32:
2276  case MVT::v2i32: OpcodeIndex = 2; break;
2277  case MVT::v1i64: OpcodeIndex = 3; break;
2278  // Quad-register operations:
2279  case MVT::v16i8: OpcodeIndex = 0; break;
2280  case MVT::v8f16:
2281  case MVT::v8bf16:
2282  case MVT::v8i16: OpcodeIndex = 1; break;
2283  case MVT::v4f32:
2284  case MVT::v4i32: OpcodeIndex = 2; break;
2285  case MVT::v2f64:
2286  case MVT::v2i64: OpcodeIndex = 3; break;
2287  }
2288 
2289  std::vector<EVT> ResTys;
2290  if (isUpdating)
2291  ResTys.push_back(MVT::i32);
2292  ResTys.push_back(MVT::Other);
2293 
2294  SDValue Pred = getAL(CurDAG, dl);
2295  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2297 
2298  // Double registers and VST1/VST2 quad registers are directly supported.
2299  if (is64BitVector || NumVecs <= 2) {
2300  SDValue SrcReg;
2301  if (NumVecs == 1) {
2302  SrcReg = N->getOperand(Vec0Idx);
2303  } else if (is64BitVector) {
2304  // Form a REG_SEQUENCE to force register allocation.
2305  SDValue V0 = N->getOperand(Vec0Idx + 0);
2306  SDValue V1 = N->getOperand(Vec0Idx + 1);
2307  if (NumVecs == 2)
2308  SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2309  else {
2310  SDValue V2 = N->getOperand(Vec0Idx + 2);
2311  // If it's a vst3, form a quad D-register and leave the last part as
2312  // an undef.
2313  SDValue V3 = (NumVecs == 3)
2314  ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2315  : N->getOperand(Vec0Idx + 3);
2316  SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2317  }
2318  } else {
2319  // Form a QQ register.
2320  SDValue Q0 = N->getOperand(Vec0Idx);
2321  SDValue Q1 = N->getOperand(Vec0Idx + 1);
2322  SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2323  }
2324 
2325  unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2326  QOpcodes0[OpcodeIndex]);
2327  Ops.push_back(MemAddr);
2328  Ops.push_back(Align);
2329  if (isUpdating) {
2330  SDValue Inc = N->getOperand(AddrOpIdx + 1);
2331  bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2332  if (!IsImmUpdate) {
2333  // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2334  // check for the opcode rather than the number of vector elements.
2335  if (isVSTfixed(Opc))
2336  Opc = getVLDSTRegisterUpdateOpcode(Opc);
2337  Ops.push_back(Inc);
2338  }
2339  // VST1/VST2 fixed increment does not need Reg0 so only include it in
2340  // the operands if not such an opcode.
2341  else if (!isVSTfixed(Opc))
2342  Ops.push_back(Reg0);
2343  }
2344  Ops.push_back(SrcReg);
2345  Ops.push_back(Pred);
2346  Ops.push_back(Reg0);
2347  Ops.push_back(Chain);
2348  SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2349 
2350  // Transfer memoperands.
2351  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2352 
2353  ReplaceNode(N, VSt);
2354  return;
2355  }
2356 
2357  // Otherwise, quad registers are stored with two separate instructions,
2358  // where one stores the even registers and the other stores the odd registers.
2359 
2360  // Form the QQQQ REG_SEQUENCE.
2361  SDValue V0 = N->getOperand(Vec0Idx + 0);
2362  SDValue V1 = N->getOperand(Vec0Idx + 1);
2363  SDValue V2 = N->getOperand(Vec0Idx + 2);
2364  SDValue V3 = (NumVecs == 3)
2365  ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2366  : N->getOperand(Vec0Idx + 3);
2367  SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2368 
2369  // Store the even D registers. This is always an updating store, so that it
2370  // provides the address to the second store for the odd subregs.
2371  const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2372  SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2373  MemAddr.getValueType(),
2374  MVT::Other, OpsA);
2375  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2376  Chain = SDValue(VStA, 1);
2377 
2378  // Store the odd D registers.
2379  Ops.push_back(SDValue(VStA, 0));
2380  Ops.push_back(Align);
2381  if (isUpdating) {
2382  SDValue Inc = N->getOperand(AddrOpIdx + 1);
2383  assert(isa<ConstantSDNode>(Inc.getNode()) &&
2384  "only constant post-increment update allowed for VST3/4");
2385  (void)Inc;
2386  Ops.push_back(Reg0);
2387  }
2388  Ops.push_back(RegSeq);
2389  Ops.push_back(Pred);
2390  Ops.push_back(Reg0);
2391  Ops.push_back(Chain);
2392  SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2393  Ops);
2394  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2395  ReplaceNode(N, VStB);
2396 }
2397 
2398 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2399  unsigned NumVecs,
2400  const uint16_t *DOpcodes,
2401  const uint16_t *QOpcodes) {
2402  assert(Subtarget->hasNEON());
2403  assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2404  SDLoc dl(N);
2405 
2406  SDValue MemAddr, Align;
2407  bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2408  // nodes are not intrinsics.
2409  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2410  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2411  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2412  return;
2413 
2414  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2415 
2416  SDValue Chain = N->getOperand(0);
2417  unsigned Lane =
2418  cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2419  EVT VT = N->getOperand(Vec0Idx).getValueType();
2420  bool is64BitVector = VT.is64BitVector();
2421 
2422  unsigned Alignment = 0;
2423  if (NumVecs != 3) {
2424  Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2425  unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2426  if (Alignment > NumBytes)
2427  Alignment = NumBytes;
2428  if (Alignment < 8 && Alignment < NumBytes)
2429  Alignment = 0;
2430  // Alignment must be a power of two; make sure of that.
2431  Alignment = (Alignment & -Alignment);
2432  if (Alignment == 1)
2433  Alignment = 0;
2434  }
2435  Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2436 
2437  unsigned OpcodeIndex;
2438  switch (VT.getSimpleVT().SimpleTy) {
2439  default: llvm_unreachable("unhandled vld/vst lane type");
2440  // Double-register operations:
2441  case MVT::v8i8: OpcodeIndex = 0; break;
2442  case MVT::v4f16:
2443  case MVT::v4bf16:
2444  case MVT::v4i16: OpcodeIndex = 1; break;
2445  case MVT::v2f32:
2446  case MVT::v2i32: OpcodeIndex = 2; break;
2447  // Quad-register operations:
2448  case MVT::v8f16:
2449  case MVT::v8bf16:
2450  case MVT::v8i16: OpcodeIndex = 0; break;
2451  case MVT::v4f32:
2452  case MVT::v4i32: OpcodeIndex = 1; break;
2453  }
2454 
2455  std::vector<EVT> ResTys;
2456  if (IsLoad) {
2457  unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2458  if (!is64BitVector)
2459  ResTyElts *= 2;
2460  ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2461  MVT::i64, ResTyElts));
2462  }
2463  if (isUpdating)
2464  ResTys.push_back(MVT::i32);
2465  ResTys.push_back(MVT::Other);
2466 
2467  SDValue Pred = getAL(CurDAG, dl);
2468  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2469 
2471  Ops.push_back(MemAddr);
2472  Ops.push_back(Align);
2473  if (isUpdating) {
2474  SDValue Inc = N->getOperand(AddrOpIdx + 1);
2475  bool IsImmUpdate =
2476  isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2477  Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2478  }
2479 
2480  SDValue SuperReg;
2481  SDValue V0 = N->getOperand(Vec0Idx + 0);
2482  SDValue V1 = N->getOperand(Vec0Idx + 1);
2483  if (NumVecs == 2) {
2484  if (is64BitVector)
2485  SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2486  else
2487  SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2488  } else {
2489  SDValue V2 = N->getOperand(Vec0Idx + 2);
2490  SDValue V3 = (NumVecs == 3)
2491  ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2492  : N->getOperand(Vec0Idx + 3);
2493  if (is64BitVector)
2494  SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2495  else
2496  SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2497  }
2498  Ops.push_back(SuperReg);
2499  Ops.push_back(getI32Imm(Lane, dl));
2500  Ops.push_back(Pred);
2501  Ops.push_back(Reg0);
2502  Ops.push_back(Chain);
2503 
2504  unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2505  QOpcodes[OpcodeIndex]);
2506  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2507  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2508  if (!IsLoad) {
2509  ReplaceNode(N, VLdLn);
2510  return;
2511  }
2512 
2513  // Extract the subregisters.
2514  SuperReg = SDValue(VLdLn, 0);
2515  static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2516  ARM::qsub_3 == ARM::qsub_0 + 3,
2517  "Unexpected subreg numbering");
2518  unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2519  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2520  ReplaceUses(SDValue(N, Vec),
2521  CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2522  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2523  if (isUpdating)
2524  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2525  CurDAG->RemoveDeadNode(N);
2526 }
2527 
2528 template <typename SDValueVector>
2529 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2530  SDValue PredicateMask) {
2531  Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2532  Ops.push_back(PredicateMask);
2533  Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2534 }
2535 
2536 template <typename SDValueVector>
2537 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2538  SDValue PredicateMask,
2539  SDValue Inactive) {
2540  Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2541  Ops.push_back(PredicateMask);
2542  Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2543  Ops.push_back(Inactive);
2544 }
2545 
2546 template <typename SDValueVector>
2547 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2548  Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2549  Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2550  Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2551 }
2552 
2553 template <typename SDValueVector>
2554 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2555  EVT InactiveTy) {
2556  Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2557  Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2558  Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2559  Ops.push_back(SDValue(
2560  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
2561 }
2562 
2563 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2564  bool Predicated) {
2565  SDLoc Loc(N);
2567 
2568  uint16_t Opcode;
2569  switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
2570  case 32:
2571  Opcode = Opcodes[0];
2572  break;
2573  case 64:
2574  Opcode = Opcodes[1];
2575  break;
2576  default:
2577  llvm_unreachable("bad vector element size in SelectMVE_WB");
2578  }
2579 
2580  Ops.push_back(N->getOperand(2)); // vector of base addresses
2581 
2582  int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2583  Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
2584 
2585  if (Predicated)
2586  AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2587  else
2588  AddEmptyMVEPredicateToOps(Ops, Loc);
2589 
2590  Ops.push_back(N->getOperand(0)); // chain
2591 
2592  SmallVector<EVT, 8> VTs;
2593  VTs.push_back(N->getValueType(1));
2594  VTs.push_back(N->getValueType(0));
2595  VTs.push_back(N->getValueType(2));
2596 
2597  SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
2598  ReplaceUses(SDValue(N, 0), SDValue(New, 1));
2599  ReplaceUses(SDValue(N, 1), SDValue(New, 0));
2600  ReplaceUses(SDValue(N, 2), SDValue(New, 2));
2601  transferMemOperands(N, New);
2602  CurDAG->RemoveDeadNode(N);
2603 }
2604 
2605 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2606  bool Immediate,
2607  bool HasSaturationOperand) {
2608  SDLoc Loc(N);
2610 
2611  // Two 32-bit halves of the value to be shifted
2612  Ops.push_back(N->getOperand(1));
2613  Ops.push_back(N->getOperand(2));
2614 
2615  // The shift count
2616  if (Immediate) {
2617  int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2618  Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2619  } else {
2620  Ops.push_back(N->getOperand(3));
2621  }
2622 
2623  // The immediate saturation operand, if any
2624  if (HasSaturationOperand) {
2625  int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
2626  int SatBit = (SatOp == 64 ? 0 : 1);
2627  Ops.push_back(getI32Imm(SatBit, Loc));
2628  }
2629 
2630  // MVE scalar shifts are IT-predicable, so include the standard
2631  // predicate arguments.
2632  Ops.push_back(getAL(CurDAG, Loc));
2633  Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2634 
2635  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2636 }
2637 
2638 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2639  uint16_t OpcodeWithNoCarry,
2640  bool Add, bool Predicated) {
2641  SDLoc Loc(N);
2643  uint16_t Opcode;
2644 
2645  unsigned FirstInputOp = Predicated ? 2 : 1;
2646 
2647  // Two input vectors and the input carry flag
2648  Ops.push_back(N->getOperand(FirstInputOp));
2649  Ops.push_back(N->getOperand(FirstInputOp + 1));
2650  SDValue CarryIn = N->getOperand(FirstInputOp + 2);
2651  ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
2652  uint32_t CarryMask = 1 << 29;
2653  uint32_t CarryExpected = Add ? 0 : CarryMask;
2654  if (CarryInConstant &&
2655  (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2656  Opcode = OpcodeWithNoCarry;
2657  } else {
2658  Ops.push_back(CarryIn);
2659  Opcode = OpcodeWithCarry;
2660  }
2661 
2662  if (Predicated)
2663  AddMVEPredicateToOps(Ops, Loc,
2664  N->getOperand(FirstInputOp + 3), // predicate
2665  N->getOperand(FirstInputOp - 1)); // inactive
2666  else
2667  AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2668 
2669  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2670 }
2671 
2672 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2673  SDLoc Loc(N);
2675 
2676  // One vector input, followed by a 32-bit word of bits to shift in
2677  // and then an immediate shift count
2678  Ops.push_back(N->getOperand(1));
2679  Ops.push_back(N->getOperand(2));
2680  int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2681  Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2682 
2683  if (Predicated)
2684  AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2685  else
2686  AddEmptyMVEPredicateToOps(Ops, Loc);
2687 
2688  CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), makeArrayRef(Ops));
2689 }
2690 
2691 static bool SDValueToConstBool(SDValue SDVal) {
2692  assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2693  ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
2694  uint64_t Value = SDValConstant->getZExtValue();
2695  assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2696  return Value;
2697 }
2698 
2699 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2700  const uint16_t *OpcodesS,
2701  const uint16_t *OpcodesU,
2702  size_t Stride, size_t TySize) {
2703  assert(TySize < Stride && "Invalid TySize");
2704  bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
2705  bool IsSub = SDValueToConstBool(N->getOperand(2));
2706  bool IsExchange = SDValueToConstBool(N->getOperand(3));
2707  if (IsUnsigned) {
2708  assert(!IsSub &&
2709  "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2710  assert(!IsExchange &&
2711  "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2712  }
2713 
2714  auto OpIsZero = [N](size_t OpNo) {
2715  if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo)))
2716  if (OpConst->getZExtValue() == 0)
2717  return true;
2718  return false;
2719  };
2720 
2721  // If the input accumulator value is not zero, select an instruction with
2722  // accumulator, otherwise select an instruction without accumulator
2723  bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2724 
2725  const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2726  if (IsSub)
2727  Opcodes += 4 * Stride;
2728  if (IsExchange)
2729  Opcodes += 2 * Stride;
2730  if (IsAccum)
2731  Opcodes += Stride;
2732  uint16_t Opcode = Opcodes[TySize];
2733 
2734  SDLoc Loc(N);
2736  // Push the accumulator operands, if they are used
2737  if (IsAccum) {
2738  Ops.push_back(N->getOperand(4));
2739  Ops.push_back(N->getOperand(5));
2740  }
2741  // Push the two vector operands
2742  Ops.push_back(N->getOperand(6));
2743  Ops.push_back(N->getOperand(7));
2744 
2745  if (Predicated)
2746  AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
2747  else
2748  AddEmptyMVEPredicateToOps(Ops, Loc);
2749 
2750  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2751 }
2752 
2753 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2754  const uint16_t *OpcodesS,
2755  const uint16_t *OpcodesU) {
2756  EVT VecTy = N->getOperand(6).getValueType();
2757  size_t SizeIndex;
2758  switch (VecTy.getVectorElementType().getSizeInBits()) {
2759  case 16:
2760  SizeIndex = 0;
2761  break;
2762  case 32:
2763  SizeIndex = 1;
2764  break;
2765  default:
2766  llvm_unreachable("bad vector element size");
2767  }
2768 
2769  SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
2770 }
2771 
2772 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2773  const uint16_t *OpcodesS,
2774  const uint16_t *OpcodesU) {
2775  assert(
2776  N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2777  32 &&
2778  "bad vector element size");
2779  SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
2780 }
2781 
2782 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2783  const uint16_t *const *Opcodes,
2784  bool HasWriteback) {
2785  EVT VT = N->getValueType(0);
2786  SDLoc Loc(N);
2787 
2788  const uint16_t *OurOpcodes;
2789  switch (VT.getVectorElementType().getSizeInBits()) {
2790  case 8:
2791  OurOpcodes = Opcodes[0];
2792  break;
2793  case 16:
2794  OurOpcodes = Opcodes[1];
2795  break;
2796  case 32:
2797  OurOpcodes = Opcodes[2];
2798  break;
2799  default:
2800  llvm_unreachable("bad vector element size in SelectMVE_VLD");
2801  }
2802 
2803  EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
2804  SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2805  unsigned PtrOperand = HasWriteback ? 1 : 2;
2806 
2807  auto Data = SDValue(
2808  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
2809  SDValue Chain = N->getOperand(0);
2810  // Add a MVE_VLDn instruction for each Vec, except the last
2811  for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2812  SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2813  auto LoadInst =
2814  CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
2815  Data = SDValue(LoadInst, 0);
2816  Chain = SDValue(LoadInst, 1);
2817  transferMemOperands(N, LoadInst);
2818  }
2819  // The last may need a writeback on it
2820  if (HasWriteback)
2821  ResultTys = {DataTy, MVT::i32, MVT::Other};
2822  SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2823  auto LoadInst =
2824  CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
2825  transferMemOperands(N, LoadInst);
2826 
2827  unsigned i;
2828  for (i = 0; i < NumVecs; i++)
2829  ReplaceUses(SDValue(N, i),
2830  CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
2831  SDValue(LoadInst, 0)));
2832  if (HasWriteback)
2833  ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
2834  ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
2835  CurDAG->RemoveDeadNode(N);
2836 }
2837 
2838 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2839  bool Wrapping, bool Predicated) {
2840  EVT VT = N->getValueType(0);
2841  SDLoc Loc(N);
2842 
2843  uint16_t Opcode;
2844  switch (VT.getScalarSizeInBits()) {
2845  case 8:
2846  Opcode = Opcodes[0];
2847  break;
2848  case 16:
2849  Opcode = Opcodes[1];
2850  break;
2851  case 32:
2852  Opcode = Opcodes[2];
2853  break;
2854  default:
2855  llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2856  }
2857 
2859  unsigned OpIdx = 1;
2860 
2861  SDValue Inactive;
2862  if (Predicated)
2863  Inactive = N->getOperand(OpIdx++);
2864 
2865  Ops.push_back(N->getOperand(OpIdx++)); // base
2866  if (Wrapping)
2867  Ops.push_back(N->getOperand(OpIdx++)); // limit
2868 
2869  SDValue ImmOp = N->getOperand(OpIdx++); // step
2870  int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue();
2871  Ops.push_back(getI32Imm(ImmValue, Loc));
2872 
2873  if (Predicated)
2874  AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
2875  else
2876  AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2877 
2878  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2879 }
2880 
2881 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2882  size_t NumExtraOps, bool HasAccum) {
2883  bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2884  SDLoc Loc(N);
2886 
2887  unsigned OpIdx = 1;
2888 
2889  // Convert and append the immediate operand designating the coprocessor.
2890  SDValue ImmCorpoc = N->getOperand(OpIdx++);
2891  uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue();
2892  Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
2893 
2894  // For accumulating variants copy the low and high order parts of the
2895  // accumulator into a register pair and add it to the operand vector.
2896  if (HasAccum) {
2897  SDValue AccLo = N->getOperand(OpIdx++);
2898  SDValue AccHi = N->getOperand(OpIdx++);
2899  if (IsBigEndian)
2900  std::swap(AccLo, AccHi);
2901  Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
2902  }
2903 
2904  // Copy extra operands as-is.
2905  for (size_t I = 0; I < NumExtraOps; I++)
2906  Ops.push_back(N->getOperand(OpIdx++));
2907 
2908  // Convert and append the immediate operand
2909  SDValue Imm = N->getOperand(OpIdx);
2910  uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue();
2911  Ops.push_back(getI32Imm(ImmVal, Loc));
2912 
2913  // Accumulating variants are IT-predicable, add predicate operands.
2914  if (HasAccum) {
2915  SDValue Pred = getAL(CurDAG, Loc);
2916  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2917  Ops.push_back(Pred);
2918  Ops.push_back(PredReg);
2919  }
2920 
2921  // Create the CDE intruction
2922  SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
2923  SDValue ResultPair = SDValue(InstrNode, 0);
2924 
2925  // The original intrinsic had two outputs, and the output of the dual-register
2926  // CDE instruction is a register pair. We need to extract the two subregisters
2927  // and replace all uses of the original outputs with the extracted
2928  // subregisters.
2929  uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2930  if (IsBigEndian)
2931  std::swap(SubRegs[0], SubRegs[1]);
2932 
2933  for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2934  if (SDValue(N, ResIdx).use_empty())
2935  continue;
2936  SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
2937  MVT::i32, ResultPair);
2938  ReplaceUses(SDValue(N, ResIdx), SubReg);
2939  }
2940 
2941  CurDAG->RemoveDeadNode(N);
2942 }
2943 
2944 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2945  bool isUpdating, unsigned NumVecs,
2946  const uint16_t *DOpcodes,
2947  const uint16_t *QOpcodes0,
2948  const uint16_t *QOpcodes1) {
2949  assert(Subtarget->hasNEON());
2950  assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2951  SDLoc dl(N);
2952 
2953  SDValue MemAddr, Align;
2954  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2955  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2956  return;
2957 
2958  SDValue Chain = N->getOperand(0);
2959  EVT VT = N->getValueType(0);
2960  bool is64BitVector = VT.is64BitVector();
2961 
2962  unsigned Alignment = 0;
2963  if (NumVecs != 3) {
2964  Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2965  unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2966  if (Alignment > NumBytes)
2967  Alignment = NumBytes;
2968  if (Alignment < 8 && Alignment < NumBytes)
2969  Alignment = 0;
2970  // Alignment must be a power of two; make sure of that.
2971  Alignment = (Alignment & -Alignment);
2972  if (Alignment == 1)
2973  Alignment = 0;
2974  }
2975  Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2976 
2977  unsigned OpcodeIndex;
2978  switch (VT.getSimpleVT().SimpleTy) {
2979  default: llvm_unreachable("unhandled vld-dup type");
2980  case MVT::v8i8:
2981  case MVT::v16i8: OpcodeIndex = 0; break;
2982  case MVT::v4i16:
2983  case MVT::v8i16:
2984  case MVT::v4f16:
2985  case MVT::v8f16:
2986  case MVT::v4bf16:
2987  case MVT::v8bf16:
2988  OpcodeIndex = 1; break;
2989  case MVT::v2f32:
2990  case MVT::v2i32:
2991  case MVT::v4f32:
2992  case MVT::v4i32: OpcodeIndex = 2; break;
2993  case MVT::v1f64:
2994  case MVT::v1i64: OpcodeIndex = 3; break;
2995  }
2996 
2997  unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2998  if (!is64BitVector)
2999  ResTyElts *= 2;
3000  EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
3001 
3002  std::vector<EVT> ResTys;
3003  ResTys.push_back(ResTy);
3004  if (isUpdating)
3005  ResTys.push_back(MVT::i32);
3006  ResTys.push_back(MVT::Other);
3007 
3008  SDValue Pred = getAL(CurDAG, dl);
3009  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3010 
3012  Ops.push_back(MemAddr);
3013  Ops.push_back(Align);
3014  unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3015  : (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
3016  : QOpcodes1[OpcodeIndex];
3017  if (isUpdating) {
3018  SDValue Inc = N->getOperand(2);
3019  bool IsImmUpdate =
3020  isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
3021  if (IsImmUpdate) {
3022  if (!isVLDfixed(Opc))
3023  Ops.push_back(Reg0);
3024  } else {
3025  if (isVLDfixed(Opc))
3026  Opc = getVLDSTRegisterUpdateOpcode(Opc);
3027  Ops.push_back(Inc);
3028  }
3029  }
3030  if (is64BitVector || NumVecs == 1) {
3031  // Double registers and VLD1 quad registers are directly supported.
3032  } else if (NumVecs == 2) {
3033  const SDValue OpsA[] = {MemAddr, Align, Pred, Reg0, Chain};
3034  SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3035  MVT::Other, OpsA);
3036  Chain = SDValue(VLdA, 1);
3037  } else {
3038  SDValue ImplDef = SDValue(
3039  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
3040  const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3041  SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3042  MVT::Other, OpsA);
3043  Ops.push_back(SDValue(VLdA, 0));
3044  Chain = SDValue(VLdA, 1);
3045  }
3046 
3047  Ops.push_back(Pred);
3048  Ops.push_back(Reg0);
3049  Ops.push_back(Chain);
3050 
3051  SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
3052 
3053  // Transfer memoperands.
3054  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3055  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
3056 
3057  // Extract the subregisters.
3058  if (NumVecs == 1) {
3059  ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
3060  } else {
3061  SDValue SuperReg = SDValue(VLdDup, 0);
3062  static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3063  unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3064  for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3065  ReplaceUses(SDValue(N, Vec),
3066  CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
3067  }
3068  }
3069  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
3070  if (isUpdating)
3071  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
3072  CurDAG->RemoveDeadNode(N);
3073 }
3074 
3075 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3076  if (!Subtarget->hasMVEIntegerOps())
3077  return false;
3078 
3079  SDLoc dl(N);
3080 
3081  // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3082  // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3083  // inserts of the correct type:
3084  SDValue Ins1 = SDValue(N, 0);
3085  SDValue Ins2 = N->getOperand(0);
3086  EVT VT = Ins1.getValueType();
3087  if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
3088  !isa<ConstantSDNode>(Ins1.getOperand(2)) ||
3089  !isa<ConstantSDNode>(Ins2.getOperand(2)) ||
3090  (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
3091  return false;
3092 
3093  unsigned Lane1 = Ins1.getConstantOperandVal(2);
3094  unsigned Lane2 = Ins2.getConstantOperandVal(2);
3095  if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
3096  return false;
3097 
3098  // If the inserted values will be able to use T/B already, leave it to the
3099  // existing tablegen patterns. For example VCVTT/VCVTB.
3100  SDValue Val1 = Ins1.getOperand(1);
3101  SDValue Val2 = Ins2.getOperand(1);
3102  if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
3103  return false;
3104 
3105  // Check if the inserted values are both extracts.
3106  if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3107  Val1.getOpcode() == ARMISD::VGETLANEu) &&
3108  (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3109  Val2.getOpcode() == ARMISD::VGETLANEu) &&
3110  isa<ConstantSDNode>(Val1.getOperand(1)) &&
3111  isa<ConstantSDNode>(Val2.getOperand(1)) &&
3112  (Val1.getOperand(0).getValueType() == MVT::v8f16 ||
3113  Val1.getOperand(0).getValueType() == MVT::v8i16) &&
3114  (Val2.getOperand(0).getValueType() == MVT::v8f16 ||
3115  Val2.getOperand(0).getValueType() == MVT::v8i16)) {
3116  unsigned ExtractLane1 = Val1.getConstantOperandVal(1);
3117  unsigned ExtractLane2 = Val2.getConstantOperandVal(1);
3118 
3119  // If the two extracted lanes are from the same place and adjacent, this
3120  // simplifies into a f32 lane move.
3121  if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 &&
3122  ExtractLane1 == ExtractLane2 + 1) {
3123  SDValue NewExt = CurDAG->getTargetExtractSubreg(
3124  ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0));
3125  SDValue NewIns = CurDAG->getTargetInsertSubreg(
3126  ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0),
3127  NewExt);
3128  ReplaceUses(Ins1, NewIns);
3129  return true;
3130  }
3131 
3132  // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3133  // extracting odd lanes.
3134  if (VT == MVT::v8i16) {
3135  SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3136  ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));
3137  SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3138  ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0));
3139  if (ExtractLane1 % 2 != 0)
3140  Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0);
3141  if (ExtractLane2 % 2 != 0)
3142  Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0);
3143  SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);
3144  SDValue NewIns =
3145  CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3146  Ins2.getOperand(0), SDValue(VINS, 0));
3147  ReplaceUses(Ins1, NewIns);
3148  return true;
3149  }
3150  }
3151 
3152  // The inserted values are not extracted - if they are f16 then insert them
3153  // directly using a VINS.
3154  if (VT == MVT::v8f16) {
3155  SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
3156  SDValue NewIns =
3157  CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3158  Ins2.getOperand(0), SDValue(VINS, 0));
3159  ReplaceUses(Ins1, NewIns);
3160  return true;
3161  }
3162 
3163  return false;
3164 }
3165 
3166 bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3167  SDNode *FMul,
3168  bool IsUnsigned,
3169  bool FixedToFloat) {
3170  auto Type = N->getValueType(0);
3171  unsigned ScalarBits = Type.getScalarSizeInBits();
3172  if (ScalarBits > 32)
3173  return false;
3174 
3175  SDNodeFlags FMulFlags = FMul->getFlags();
3176  // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3177  // allowed in 16 bit unsigned floats
3178  if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
3179  return false;
3180 
3181  SDValue ImmNode = FMul->getOperand(1);
3182  SDValue VecVal = FMul->getOperand(0);
3183  if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
3184  VecVal->getOpcode() == ISD::SINT_TO_FP)
3185  VecVal = VecVal->getOperand(0);
3186 
3187  if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3188  return false;
3189 
3190  if (ImmNode.getOpcode() == ISD::BITCAST) {
3191  if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3192  return false;
3193  ImmNode = ImmNode.getOperand(0);
3194  }
3195 
3196  if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3197  return false;
3198 
3199  APFloat ImmAPF(0.0f);
3200  switch (ImmNode.getOpcode()) {
3201  case ARMISD::VMOVIMM:
3202  case ARMISD::VDUP: {
3203  if (!isa<ConstantSDNode>(ImmNode.getOperand(0)))
3204  return false;
3205  unsigned Imm = ImmNode.getConstantOperandVal(0);
3206  if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3207  Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits);
3208  ImmAPF =
3209  APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3210  APInt(ScalarBits, Imm));
3211  break;
3212  }
3213  case ARMISD::VMOVFPIMM: {
3214  ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0)));
3215  break;
3216  }
3217  default:
3218  return false;
3219  }
3220 
3221  // Where n is the number of fractional bits, multiplying by 2^n will convert
3222  // from float to fixed and multiplying by 2^-n will convert from fixed to
3223  // float. Taking log2 of the factor (after taking the inverse in the case of
3224  // float to fixed) will give n.
3225  APFloat ToConvert = ImmAPF;
3226  if (FixedToFloat) {
3227  if (!ImmAPF.getExactInverse(&ToConvert))
3228  return false;
3229  }
3230  APSInt Converted(64, 0);
3231  bool IsExact;
3233  &IsExact);
3234  if (!IsExact || !Converted.isPowerOf2())
3235  return false;
3236 
3237  unsigned FracBits = Converted.logBase2();
3238  if (FracBits > ScalarBits)
3239  return false;
3240 
3242  VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)};
3243  AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type);
3244 
3245  unsigned int Opcode;
3246  switch (ScalarBits) {
3247  case 16:
3248  if (FixedToFloat)
3249  Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3250  else
3251  Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3252  break;
3253  case 32:
3254  if (FixedToFloat)
3255  Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3256  else
3257  Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3258  break;
3259  default:
3260  llvm_unreachable("unexpected number of scalar bits");
3261  break;
3262  }
3263 
3264  ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops));
3265  return true;
3266 }
3267 
3268 bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3269  // Transform a floating-point to fixed-point conversion to a VCVT
3270  if (!Subtarget->hasMVEFloatOps())
3271  return false;
3272  EVT Type = N->getValueType(0);
3273  if (!Type.isVector())
3274  return false;
3275  unsigned int ScalarBits = Type.getScalarSizeInBits();
3276 
3277  bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT;
3278  SDNode *Node = N->getOperand(0).getNode();
3279 
3280  // floating-point to fixed-point with one fractional bit gets turned into an
3281  // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
3282  if (Node->getOpcode() == ISD::FADD) {
3283  if (Node->getOperand(0) != Node->getOperand(1))
3284  return false;
3285  SDNodeFlags Flags = Node->getFlags();
3286  // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3287  // allowed in 16 bit unsigned floats
3288  if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
3289  return false;
3290 
3291  unsigned Opcode;
3292  switch (ScalarBits) {
3293  case 16:
3294  Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3295  break;
3296  case 32:
3297  Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3298  break;
3299  }
3300  SmallVector<SDValue, 3> Ops{Node->getOperand(0),
3301  CurDAG->getConstant(1, dl, MVT::i32)};
3302  AddEmptyMVEPredicateToOps(Ops, dl, Type);
3303 
3304  ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops));
3305  return true;
3306  }
3307 
3308  if (Node->getOpcode() != ISD::FMUL)
3309  return false;
3310 
3311  return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false);
3312 }
3313 
3314 bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3315  // Transform a fixed-point to floating-point conversion to a VCVT
3316  if (!Subtarget->hasMVEFloatOps())
3317  return false;
3318  auto Type = N->getValueType(0);
3319  if (!Type.isVector())
3320  return false;
3321 
3322  auto LHS = N->getOperand(0);
3323  if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3324  return false;
3325 
3326  return transformFixedFloatingPointConversion(
3327  N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
3328 }
3329 
3330 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3331  if (!Subtarget->hasV6T2Ops())
3332  return false;
3333 
3334  unsigned Opc = isSigned
3335  ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3336  : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3337  SDLoc dl(N);
3338 
3339  // For unsigned extracts, check for a shift right and mask
3340  unsigned And_imm = 0;
3341  if (N->getOpcode() == ISD::AND) {
3342  if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
3343 
3344  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3345  if (And_imm & (And_imm + 1))
3346  return false;
3347 
3348  unsigned Srl_imm = 0;
3349  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
3350  Srl_imm)) {
3351  assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3352 
3353  // Mask off the unnecessary bits of the AND immediate; normally
3354  // DAGCombine will do this, but that might not happen if
3355  // targetShrinkDemandedConstant chooses a different immediate.
3356  And_imm &= -1U >> Srl_imm;
3357 
3358  // Note: The width operand is encoded as width-1.
3359  unsigned Width = countTrailingOnes(And_imm) - 1;
3360  unsigned LSB = Srl_imm;
3361 
3362  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3363 
3364  if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
3365  // It's cheaper to use a right shift to extract the top bits.
3366  if (Subtarget->isThumb()) {
3367  Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3368  SDValue Ops[] = { N->getOperand(0).getOperand(0),
3369  CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3370  getAL(CurDAG, dl), Reg0, Reg0 };
3371  CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3372  return true;
3373  }
3374 
3375  // ARM models shift instructions as MOVsi with shifter operand.
3377  SDValue ShOpc =
3378  CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
3379  MVT::i32);
3380  SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
3381  getAL(CurDAG, dl), Reg0, Reg0 };
3382  CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
3383  return true;
3384  }
3385 
3386  assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3387  SDValue Ops[] = { N->getOperand(0).getOperand(0),
3388  CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3389  CurDAG->getTargetConstant(Width, dl, MVT::i32),
3390  getAL(CurDAG, dl), Reg0 };
3391  CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3392  return true;
3393  }
3394  }
3395  return false;
3396  }
3397 
3398  // Otherwise, we're looking for a shift of a shift
3399  unsigned Shl_imm = 0;
3400  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
3401  assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3402  unsigned Srl_imm = 0;
3403  if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
3404  assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3405  // Note: The width operand is encoded as width-1.
3406  unsigned Width = 32 - Srl_imm - 1;
3407  int LSB = Srl_imm - Shl_imm;
3408  if (LSB < 0)
3409  return false;
3410  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3411  assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3412  SDValue Ops[] = { N->getOperand(0).getOperand(0),
3413  CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3414  CurDAG->getTargetConstant(Width, dl, MVT::i32),
3415  getAL(CurDAG, dl), Reg0 };
3416  CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3417  return true;
3418  }
3419  }
3420 
3421  // Or we are looking for a shift of an and, with a mask operand
3422  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
3423  isShiftedMask_32(And_imm)) {
3424  unsigned Srl_imm = 0;
3425  unsigned LSB = countTrailingZeros(And_imm);
3426  // Shift must be the same as the ands lsb
3427  if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
3428  assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3429  unsigned MSB = 31 - countLeadingZeros(And_imm);
3430  // Note: The width operand is encoded as width-1.
3431  unsigned Width = MSB - LSB;
3432  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3433  assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3434  SDValue Ops[] = { N->getOperand(0).getOperand(0),
3435  CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
3436  CurDAG->getTargetConstant(Width, dl, MVT::i32),
3437  getAL(CurDAG, dl), Reg0 };
3438  CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3439  return true;
3440  }
3441  }
3442 
3443  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3444  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
3445  unsigned LSB = 0;
3446  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
3447  !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
3448  return false;
3449 
3450  if (LSB + Width > 32)
3451  return false;
3452 
3453  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3454  assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3455  SDValue Ops[] = { N->getOperand(0).getOperand(0),
3456  CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3457  CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
3458  getAL(CurDAG, dl), Reg0 };
3459  CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3460  return true;
3461  }
3462 
3463  return false;
3464 }
3465 
3466 /// Target-specific DAG combining for ISD::XOR.
3467 /// Target-independent combining lowers SELECT_CC nodes of the form
3468 /// select_cc setg[ge] X, 0, X, -X
3469 /// select_cc setgt X, -1, X, -X
3470 /// select_cc setl[te] X, 0, -X, X
3471 /// select_cc setlt X, 1, -X, X
3472 /// which represent Integer ABS into:
3473 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
3474 /// ARM instruction selection detects the latter and matches it to
3475 /// ARM::ABS or ARM::t2ABS machine node.
3476 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
3477  SDValue XORSrc0 = N->getOperand(0);
3478  SDValue XORSrc1 = N->getOperand(1);
3479  EVT VT = N->getValueType(0);
3480 
3481  if (Subtarget->isThumb1Only())
3482  return false;
3483 
3484  if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
3485  return false;
3486 
3487  SDValue ADDSrc0 = XORSrc0.getOperand(0);
3488  SDValue ADDSrc1 = XORSrc0.getOperand(1);
3489  SDValue SRASrc0 = XORSrc1.getOperand(0);
3490  SDValue SRASrc1 = XORSrc1.getOperand(1);
3491  ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
3492  EVT XType = SRASrc0.getValueType();
3493  unsigned Size = XType.getSizeInBits() - 1;
3494 
3495  if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
3496  XType.isInteger() && SRAConstant != nullptr &&
3497  Size == SRAConstant->getZExtValue()) {
3498  unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
3499  CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
3500  return true;
3501  }
3502 
3503  return false;
3504 }
3505 
3506 /// We've got special pseudo-instructions for these
3507 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3508  unsigned Opcode;
3509  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3510  if (MemTy == MVT::i8)
3511  Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3512  else if (MemTy == MVT::i16)
3513  Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3514  else if (MemTy == MVT::i32)
3515  Opcode = ARM::CMP_SWAP_32;
3516  else
3517  llvm_unreachable("Unknown AtomicCmpSwap type");
3518 
3519  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3520  N->getOperand(0)};
3521  SDNode *CmpSwap = CurDAG->getMachineNode(
3522  Opcode, SDLoc(N),
3523  CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3524 
3525  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3526  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3527 
3528  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3529  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3530  CurDAG->RemoveDeadNode(N);
3531 }
3532 
3535  unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
3536  unsigned LastOne = A.countTrailingZeros();
3537  if (A.countPopulation() != (FirstOne - LastOne + 1))
3539  return std::make_pair(FirstOne, LastOne);
3540 }
3541 
3542 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3543  assert(N->getOpcode() == ARMISD::CMPZ);
3544  SwitchEQNEToPLMI = false;
3545 
3546  if (!Subtarget->isThumb())
3547  // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3548  // LSR don't exist as standalone instructions - they need the barrel shifter.
3549  return;
3550 
3551  // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3552  SDValue And = N->getOperand(0);
3553  if (!And->hasOneUse())
3554  return;
3555 
3556  SDValue Zero = N->getOperand(1);
3557  if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isZero() ||
3558  And->getOpcode() != ISD::AND)
3559  return;
3560  SDValue X = And.getOperand(0);
3561  auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
3562 
3563  if (!C)
3564  return;
3565  auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
3566  if (!Range)
3567  return;
3568 
3569  // There are several ways to lower this:
3570  SDNode *NewN;
3571  SDLoc dl(N);
3572 
3573  auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3574  if (Subtarget->isThumb2()) {
3575  Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3576  SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3577  getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3578  CurDAG->getRegister(0, MVT::i32) };
3579  return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3580  } else {
3581  SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3582  CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3583  getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3584  return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3585  }
3586  };
3587 
3588  if (Range->second == 0) {
3589  // 1. Mask includes the LSB -> Simply shift the top N bits off
3590  NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3591  ReplaceNode(And.getNode(), NewN);
3592  } else if (Range->first == 31) {
3593  // 2. Mask includes the MSB -> Simply shift the bottom N bits off
3594  NewN = EmitShift(ARM::tLSRri, X, Range->second);
3595  ReplaceNode(And.getNode(), NewN);
3596  } else if (Range->first == Range->second) {
3597  // 3. Only one bit is set. We can shift this into the sign bit and use a
3598  // PL/MI comparison.
3599  NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3600  ReplaceNode(And.getNode(), NewN);
3601 
3602  SwitchEQNEToPLMI = true;
3603  } else if (!Subtarget->hasV6T2Ops()) {
3604  // 4. Do a double shift to clear bottom and top bits, but only in
3605  // thumb-1 mode as in thumb-2 we can use UBFX.
3606  NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3607  NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3608  Range->second + (31 - Range->first));
3609  ReplaceNode(And.getNode(), NewN);
3610  }
3611 
3612 }
3613 
3615  SDLoc dl(N);
3616 
3617  if (N->isMachineOpcode()) {
3618  N->setNodeId(-1);
3619  return; // Already selected.
3620  }
3621 
3622  switch (N->getOpcode()) {
3623  default: break;
3624  case ISD::STORE: {
3625  // For Thumb1, match an sp-relative store in C++. This is a little
3626  // unfortunate, but I don't think I can make the chain check work
3627  // otherwise. (The chain of the store has to be the same as the chain
3628  // of the CopyFromReg, or else we can't replace the CopyFromReg with
3629  // a direct reference to "SP".)
3630  //
3631  // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3632  // a different addressing mode from other four-byte stores.
3633  //
3634  // This pattern usually comes up with call arguments.
3635  StoreSDNode *ST = cast<StoreSDNode>(N);
3636  SDValue Ptr = ST->getBasePtr();
3637  if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3638  int RHSC = 0;
3639  if (Ptr.getOpcode() == ISD::ADD &&
3640  isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
3641  Ptr = Ptr.getOperand(0);
3642 
3643  if (Ptr.getOpcode() == ISD::CopyFromReg &&
3644  cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
3645  Ptr.getOperand(0) == ST->getChain()) {
3646  SDValue Ops[] = {ST->getValue(),
3647  CurDAG->getRegister(ARM::SP, MVT::i32),
3648  CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3649  getAL(CurDAG, dl),
3650  CurDAG->getRegister(0, MVT::i32),
3651  ST->getChain()};
3652  MachineSDNode *ResNode =
3653  CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3654  MachineMemOperand *MemOp = ST->getMemOperand();
3655  CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3656  ReplaceNode(N, ResNode);
3657  return;
3658  }
3659  }
3660  break;
3661  }
3662  case ISD::WRITE_REGISTER:
3663  if (tryWriteRegister(N))
3664  return;
3665  break;
3666  case ISD::READ_REGISTER:
3667  if (tryReadRegister(N))
3668  return;
3669  break;
3670  case ISD::INLINEASM:
3671  case ISD::INLINEASM_BR:
3672  if (tryInlineAsm(N))
3673  return;
3674  break;
3675  case ISD::XOR:
3676  // Select special operations if XOR node forms integer ABS pattern
3677  if (tryABSOp(N))
3678  return;
3679  // Other cases are autogenerated.
3680  break;
3681  case ISD::Constant: {
3682  unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
3683  // If we can't materialize the constant we need to use a literal pool
3684  if (ConstantMaterializationCost(Val, Subtarget) > 2) {
3685  SDValue CPIdx = CurDAG->getTargetConstantPool(
3686  ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
3687  TLI->getPointerTy(CurDAG->getDataLayout()));
3688 
3689  SDNode *ResNode;
3690  if (Subtarget->isThumb()) {
3691  SDValue Ops[] = {
3692  CPIdx,
3693  getAL(CurDAG, dl),
3694  CurDAG->getRegister(0, MVT::i32),
3695  CurDAG->getEntryNode()
3696  };
3697  ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3698  Ops);
3699  } else {
3700  SDValue Ops[] = {
3701  CPIdx,
3702  CurDAG->getTargetConstant(0, dl, MVT::i32),
3703  getAL(CurDAG, dl),
3704  CurDAG->getRegister(0, MVT::i32),
3705  CurDAG->getEntryNode()
3706  };
3707  ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3708  Ops);
3709  }
3710  // Annotate the Node with memory operand information so that MachineInstr
3711  // queries work properly. This e.g. gives the register allocation the
3712  // required information for rematerialization.
3713  MachineFunction& MF = CurDAG->getMachineFunction();
3717 
3718  CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3719 
3720  ReplaceNode(N, ResNode);
3721  return;
3722  }
3723 
3724  // Other cases are autogenerated.
3725  break;
3726  }
3727  case ISD::FrameIndex: {
3728  // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3729  int FI = cast<FrameIndexSDNode>(N)->getIndex();
3730  SDValue TFI = CurDAG->getTargetFrameIndex(
3731  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3732  if (Subtarget->isThumb1Only()) {
3733  // Set the alignment of the frame object to 4, to avoid having to generate
3734  // more than one ADD
3735  MachineFrameInfo &MFI = MF->getFrameInfo();
3736  if (MFI.getObjectAlign(FI) < Align(4))
3737  MFI.setObjectAlignment(FI, Align(4));
3738  CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3739  CurDAG->getTargetConstant(0, dl, MVT::i32));
3740  return;
3741  } else {
3742  unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3743  ARM::t2ADDri : ARM::ADDri);
3744  SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
3745  getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3746  CurDAG->getRegister(0, MVT::i32) };
3747  CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3748  return;
3749  }
3750  }
3751  case ISD::INSERT_VECTOR_ELT: {
3752  if (tryInsertVectorElt(N))
3753  return;
3754  break;
3755  }
3756  case ISD::SRL:
3757  if (tryV6T2BitfieldExtractOp(N, false))
3758  return;
3759  break;
3761  case ISD::SRA:
3762  if (tryV6T2BitfieldExtractOp(N, true))
3763  return;
3764  break;
3765  case ISD::FP_TO_UINT:
3766  case ISD::FP_TO_SINT:
3767  if (tryFP_TO_INT(N, dl))
3768  return;
3769  break;
3770  case ISD::FMUL:
3771  if (tryFMULFixed(N, dl))
3772  return;
3773  break;
3774  case ISD::MUL:
3775  if (Subtarget->isThumb1Only())
3776  break;
3777  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
3778  unsigned RHSV = C->getZExtValue();
3779  if (!RHSV) break;
3780  if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
3781  unsigned ShImm = Log2_32(RHSV-1);
3782  if (ShImm >= 32)
3783  break;
3784  SDValue V = N->getOperand(0);
3785  ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3786  SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3787  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3788  if (Subtarget->isThumb()) {
3789  SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3790  CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3791  return;
3792  } else {
3793  SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3794  Reg0 };
3795  CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3796  return;
3797  }
3798  }
3799  if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
3800  unsigned ShImm = Log2_32(RHSV+1);
3801  if (ShImm >= 32)
3802  break;
3803  SDValue V = N->getOperand(0);
3804  ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3805  SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3806  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3807  if (Subtarget->isThumb()) {
3808  SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3809  CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3810  return;
3811  } else {
3812  SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3813  Reg0 };
3814  CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3815  return;
3816  }
3817  }
3818  }
3819  break;
3820  case ISD::AND: {
3821  // Check for unsigned bitfield extract
3822  if (tryV6T2BitfieldExtractOp(N, false))
3823  return;
3824 
3825  // If an immediate is used in an AND node, it is possible that the immediate
3826  // can be more optimally materialized when negated. If this is the case we
3827  // can negate the immediate and use a BIC instead.
3828  auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3829  if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3830  uint32_t Imm = (uint32_t) N1C->getZExtValue();
3831 
3832  // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3833  // immediate can be negated and fit in the immediate operand of
3834  // a t2BIC, don't do any manual transform here as this can be
3835  // handled by the generic ISel machinery.
3836  bool PreferImmediateEncoding =
3837  Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3838  if (!PreferImmediateEncoding &&
3839  ConstantMaterializationCost(Imm, Subtarget) >
3840  ConstantMaterializationCost(~Imm, Subtarget)) {
3841  // The current immediate costs more to materialize than a negated
3842  // immediate, so negate the immediate and use a BIC.
3843  SDValue NewImm =
3844  CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
3845  // If the new constant didn't exist before, reposition it in the topological
3846  // ordering so it is just before N. Otherwise, don't touch its location.
3847  if (NewImm->getNodeId() == -1)
3848  CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
3849 
3850  if (!Subtarget->hasThumb2()) {
3851  SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3852  N->getOperand(0), NewImm, getAL(CurDAG, dl),
3853  CurDAG->getRegister(0, MVT::i32)};
3854  ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3855  return;
3856  } else {
3857  SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3858  CurDAG->getRegister(0, MVT::i32),
3859  CurDAG->getRegister(0, MVT::i32)};
3860  ReplaceNode(N,
3861  CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3862  return;
3863  }
3864  }
3865  }
3866 
3867  // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3868  // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3869  // are entirely contributed by c2 and lower 16-bits are entirely contributed
3870  // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3871  // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3872  EVT VT = N->getValueType(0);
3873  if (VT != MVT::i32)
3874  break;
3875  unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3876  ? ARM::t2MOVTi16
3877  : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3878  if (!Opc)
3879  break;
3880  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3881  N1C = dyn_cast<ConstantSDNode>(N1);
3882  if (!N1C)
3883  break;
3884  if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3885  SDValue N2 = N0.getOperand(1);
3886  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3887  if (!N2C)
3888  break;
3889  unsigned N1CVal = N1C->getZExtValue();
3890  unsigned N2CVal = N2C->getZExtValue();
3891  if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3892  (N1CVal & 0xffffU) == 0xffffU &&
3893  (N2CVal & 0xffffU) == 0x0U) {
3894  SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3895  dl, MVT::i32);
3896  SDValue Ops[] = { N0.getOperand(0), Imm16,
3897  getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3898  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3899  return;
3900  }
3901  }
3902 
3903  break;
3904  }
3905  case ARMISD::UMAAL: {
3906  unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3907  SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3908  N->getOperand(2), N->getOperand(3),
3909  getAL(CurDAG, dl),
3910  CurDAG->getRegister(0, MVT::i32) };
3911  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3912  return;
3913  }
3914  case ARMISD::UMLAL:{
3915  if (Subtarget->isThumb()) {
3916  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3917  N->getOperand(3), getAL(CurDAG, dl),
3918  CurDAG->getRegister(0, MVT::i32)};
3919  ReplaceNode(
3920  N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3921  return;
3922  }else{
3923  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3924  N->getOperand(3), getAL(CurDAG, dl),
3925  CurDAG->getRegister(0, MVT::i32),
3926  CurDAG->getRegister(0, MVT::i32) };
3927  ReplaceNode(N, CurDAG->getMachineNode(
3928  Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3929  MVT::i32, MVT::i32, Ops));
3930  return;
3931  }
3932  }
3933  case ARMISD::SMLAL:{
3934  if (Subtarget->isThumb()) {
3935  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3936  N->getOperand(3), getAL(CurDAG, dl),
3937  CurDAG->getRegister(0, MVT::i32)};
3938  ReplaceNode(
3939  N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3940  return;
3941  }else{
3942  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3943  N->getOperand(3), getAL(CurDAG, dl),
3944  CurDAG->getRegister(0, MVT::i32),
3945  CurDAG->getRegister(0, MVT::i32) };
3946  ReplaceNode(N, CurDAG->getMachineNode(
3947  Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3948  MVT::i32, MVT::i32, Ops));
3949  return;
3950  }
3951  }
3952  case ARMISD::SUBE: {
3953  if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3954  break;
3955  // Look for a pattern to match SMMLS
3956  // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3957  if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3958  N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3959  !SDValue(N, 1).use_empty())
3960  break;
3961 
3962  if (Subtarget->isThumb())
3963  assert(Subtarget->hasThumb2() &&
3964  "This pattern should not be generated for Thumb");
3965 
3966  SDValue SmulLoHi = N->getOperand(1);
3967  SDValue Subc = N->getOperand(2);
3968  auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
3969 
3970  if (!Zero || Zero->getZExtValue() != 0 ||
3971  Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3972  N->getOperand(1) != SmulLoHi.getValue(1) ||
3973  N->getOperand(2) != Subc.getValue(1))
3974  break;
3975 
3976  unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3977  SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3978  N->getOperand(0), getAL(CurDAG, dl),
3979  CurDAG->getRegister(0, MVT::i32) };
3980  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3981  return;
3982  }
3983  case ISD::LOAD: {
3984  if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3985  return;
3986  if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3987  if (tryT2IndexedLoad(N))
3988  return;
3989  } else if (Subtarget->isThumb()) {
3990  if (tryT1IndexedLoad(N))
3991  return;
3992  } else if (tryARMIndexedLoad(N))
3993  return;
3994  // Other cases are autogenerated.
3995  break;
3996  }
3997  case ISD::MLOAD:
3998  if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3999  return;
4000  // Other cases are autogenerated.
4001  break;
4002  case ARMISD::WLSSETUP: {
4003  SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
4004  N->getOperand(0));
4005  ReplaceUses(N, New);
4006  CurDAG->RemoveDeadNode(N);
4007  return;
4008  }
4009  case ARMISD::WLS: {
4010  SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
4011  N->getOperand(1), N->getOperand(2),
4012  N->getOperand(0));
4013  ReplaceUses(N, New);
4014  CurDAG->RemoveDeadNode(N);
4015  return;
4016  }
4017  case ARMISD::LE: {
4018  SDValue Ops[] = { N->getOperand(1),
4019  N->getOperand(2),
4020  N->getOperand(0) };
4021  unsigned Opc = ARM::t2LoopEnd;
4022  SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
4023  ReplaceUses(N, New);
4024  CurDAG->RemoveDeadNode(N);
4025  return;
4026  }
4027  case ARMISD::LDRD: {
4028  if (Subtarget->isThumb2())
4029  break; // TableGen handles isel in this case.
4030  SDValue Base, RegOffset, ImmOffset;
4031  const SDValue &Chain = N->getOperand(0);
4032  const SDValue &Addr = N->getOperand(1);
4033  SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4034  if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4035  // The register-offset variant of LDRD mandates that the register
4036  // allocated to RegOffset is not reused in any of the remaining operands.
4037  // This restriction is currently not enforced. Therefore emitting this
4038  // variant is explicitly avoided.
4039  Base = Addr;
4040  RegOffset = CurDAG->getRegister(0, MVT::i32);
4041  }
4042  SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
4043  SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
4044  {MVT::Untyped, MVT::Other}, Ops);
4045  SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4046  SDValue(New, 0));
4047  SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4048  SDValue(New, 0));
4049  transferMemOperands(N, New);
4050  ReplaceUses(SDValue(N, 0), Lo);
4051  ReplaceUses(SDValue(N, 1), Hi);
4052  ReplaceUses(SDValue(N, 2), SDValue(New, 1));
4053  CurDAG->RemoveDeadNode(N);
4054  return;
4055  }
4056  case ARMISD::STRD: {
4057  if (Subtarget->isThumb2())
4058  break; // TableGen handles isel in this case.
4059  SDValue Base, RegOffset, ImmOffset;
4060  const SDValue &Chain = N->getOperand(0);
4061  const SDValue &Addr = N->getOperand(3);
4062  SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4063  if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4064  // The register-offset variant of STRD mandates that the register
4065  // allocated to RegOffset is not reused in any of the remaining operands.
4066  // This restriction is currently not enforced. Therefore emitting this
4067  // variant is explicitly avoided.
4068  Base = Addr;
4069  RegOffset = CurDAG->getRegister(0, MVT::i32);
4070  }
4071  SDNode *RegPair =
4072  createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
4073  SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
4074  SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
4075  transferMemOperands(N, New);
4076  ReplaceUses(SDValue(N, 0), SDValue(New, 0));
4077  CurDAG->RemoveDeadNode(N);
4078  return;
4079  }
4080  case ARMISD::LOOP_DEC: {
4081  SDValue Ops[] = { N->getOperand(1),
4082  N->getOperand(2),
4083  N->getOperand(0) };
4084  SDNode *Dec =
4085  CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4086  CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
4087  ReplaceUses(N, Dec);
4088  CurDAG->RemoveDeadNode(N);
4089  return;
4090  }
4091  case ARMISD::BRCOND: {
4092  // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4093  // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4094  // Pattern complexity = 6 cost = 1 size = 0
4095 
4096  // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4097  // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4098  // Pattern complexity = 6 cost = 1 size = 0
4099 
4100  // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4101  // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4102  // Pattern complexity = 6 cost = 1 size = 0
4103 
4104  unsigned Opc = Subtarget->isThumb() ?
4105  ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4106  SDValue Chain = N->getOperand(0);
4107  SDValue N1 = N->getOperand(1);
4108  SDValue N2 = N->getOperand(2);
4109  SDValue N3 = N->getOperand(3);
4110  SDValue InFlag = N->getOperand(4);
4111  assert(N1.getOpcode() == ISD::BasicBlock);
4112  assert(N2.getOpcode() == ISD::Constant);
4113  assert(N3.getOpcode() == ISD::Register);
4114 
4115  unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
4116 
4117  if (InFlag.getOpcode() == ARMISD::CMPZ) {
4118  if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4119  SDValue Int = InFlag.getOperand(0);
4120  uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
4121 
4122  // Handle low-overhead loops.
4123  if (ID == Intrinsic::loop_decrement_reg) {
4124  SDValue Elements = Int.getOperand(2);
4125  SDValue Size = CurDAG->getTargetConstant(
4126  cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
4127  MVT::i32);
4128 
4129  SDValue Args[] = { Elements, Size, Int.getOperand(0) };
4130  SDNode *LoopDec =
4131  CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4132  CurDAG->getVTList(MVT::i32, MVT::Other),
4133  Args);
4134  ReplaceUses(Int.getNode(), LoopDec);
4135 
4136  SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
4137  SDNode *LoopEnd =
4138  CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
4139 
4140  ReplaceUses(N, LoopEnd);
4141  CurDAG->RemoveDeadNode(N);
4142  CurDAG->RemoveDeadNode(InFlag.getNode());
4143  CurDAG->RemoveDeadNode(Int.getNode());
4144  return;
4145  }
4146  }
4147 
4148  bool SwitchEQNEToPLMI;
4149  SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
4150  InFlag = N->getOperand(4);
4151 
4152  if (SwitchEQNEToPLMI) {
4153  switch ((ARMCC::CondCodes)CC) {
4154  default: llvm_unreachable("CMPZ must be either NE or EQ!");
4155  case ARMCC::NE:
4156  CC = (unsigned)ARMCC::MI;
4157  break;
4158  case ARMCC::EQ:
4159  CC = (unsigned)ARMCC::PL;
4160  break;
4161  }
4162  }
4163  }
4164 
4165  SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
4166  SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
4167  SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
4168  MVT::Glue, Ops);
4169  Chain = SDValue(ResNode, 0);
4170  if (N->getNumValues() == 2) {
4171  InFlag = SDValue(ResNode, 1);
4172  ReplaceUses(SDValue(N, 1), InFlag);
4173  }
4174  ReplaceUses(SDValue(N, 0),
4175  SDValue(Chain.getNode(), Chain.getResNo()));
4176  CurDAG->RemoveDeadNode(N);
4177  return;
4178  }
4179 
4180  case ARMISD::CMPZ: {
4181  // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4182  // This allows us to avoid materializing the expensive negative constant.
4183  // The CMPZ #0 is useless and will be peepholed away but we need to keep it
4184  // for its glue output.
4185  SDValue X = N->getOperand(0);
4186  auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
4187  if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
4188  int64_t Addend = -C->getSExtValue();
4189 
4190  SDNode *Add = nullptr;
4191  // ADDS can be better than CMN if the immediate fits in a
4192  // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4193  // Outside that range we can just use a CMN which is 32-bit but has a
4194  // 12-bit immediate range.
4195  if (Addend < 1<<8) {
4196  if (Subtarget->isThumb2()) {
4197  SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4198  getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
4199  CurDAG->getRegister(0, MVT::i32) };
4200  Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
4201  } else {
4202  unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
4203  SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
4204  CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4205  getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
4206  Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
4207  }
4208  }
4209  if (Add) {
4210  SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
4211  CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
4212  }
4213  }
4214  // Other cases are autogenerated.
4215  break;
4216  }
4217 
4218  case ARMISD::CMOV: {
4219  SDValue InFlag = N->getOperand(4);
4220 
4221  if (InFlag.getOpcode() == ARMISD::CMPZ) {
4222  bool SwitchEQNEToPLMI;
4223  SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
4224 
4225  if (SwitchEQNEToPLMI) {
4226  SDValue ARMcc = N->getOperand(2);
4227  ARMCC::CondCodes CC =
4228  (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
4229 
4230  switch (CC) {
4231  default: llvm_unreachable("CMPZ must be either NE or EQ!");
4232  case ARMCC::NE:
4233  CC = ARMCC::MI;
4234  break;
4235  case ARMCC::EQ:
4236  CC = ARMCC::PL;
4237  break;
4238  }
4239  SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
4240  SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
4241  N->getOperand(3), N->getOperand(4)};
4242  CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
4243  }
4244 
4245  }
4246  // Other cases are autogenerated.
4247  break;
4248  }
4249 
4250  case ARMISD::VZIP: {
4251  unsigned Opc = 0;
4252  EVT VT = N->getValueType(0);
4253  switch (VT.getSimpleVT().SimpleTy) {
4254  default: return;
4255  case MVT::v8i8: Opc = ARM::VZIPd8; break;
4256  case MVT::v4f16:
4257  case MVT::v4i16: Opc = ARM::VZIPd16; break;
4258  case MVT::v2f32:
4259  // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4260  case MVT::v2i32: Opc = ARM::VTRNd32; break;
4261  case MVT::v16i8: Opc = ARM::VZIPq8; break;
4262  case MVT::v8f16:
4263  case MVT::v8i16: Opc = ARM::VZIPq16; break;
4264  case MVT::v4f32:
4265  case MVT::v4i32: Opc = ARM::VZIPq32; break;
4266  }
4267  SDValue Pred = getAL(CurDAG, dl);
4268  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4269  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
4270  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4271  return;
4272  }
4273  case ARMISD::VUZP: {
4274  unsigned Opc = 0;
4275  EVT VT = N->getValueType(0);
4276  switch (VT.getSimpleVT().SimpleTy) {
4277  default: return;
4278  case MVT::v8i8: Opc = ARM::VUZPd8; break;
4279  case MVT::v4f16:
4280  case MVT::v4i16: Opc = ARM::VUZPd16; break;
4281  case MVT::v2f32:
4282  // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4283  case MVT::v2i32: Opc = ARM::VTRNd32; break;
4284  case MVT::v16i8: Opc = ARM::VUZPq8; break;
4285  case MVT::v8f16:
4286  case MVT::v8i16: Opc = ARM::VUZPq16; break;
4287  case MVT::v4f32:
4288  case MVT::v4i32: Opc = ARM::VUZPq32; break;
4289  }
4290  SDValue Pred = getAL(CurDAG, dl);
4291  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4292  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
4293  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4294  return;
4295  }
4296  case ARMISD::VTRN: {
4297  unsigned Opc = 0;
4298  EVT VT = N->getValueType(0);
4299  switch (VT.getSimpleVT().SimpleTy) {
4300  default: return;
4301  case MVT::v8i8: Opc = ARM::VTRNd8; break;
4302  case MVT::v4f16:
4303  case MVT::v4i16: Opc = ARM::VTRNd16; break;
4304  case MVT::v2f32:
4305  case MVT::v2i32: Opc = ARM::VTRNd32; break;
4306  case MVT::v16i8: Opc = ARM::VTRNq8; break;
4307  case MVT::v8f16:
4308  case MVT::v8i16: Opc = ARM::VTRNq16; break;
4309  case MVT::v4f32:
4310  case MVT::v4i32: Opc = ARM::VTRNq32; break;
4311  }
4312  SDValue Pred = getAL(CurDAG, dl);
4313  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4314  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
4315  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4316  return;
4317  }
4318  case ARMISD::BUILD_VECTOR: {
4319  EVT VecVT = N->getValueType(0);
4320  EVT EltVT = VecVT.getVectorElementType();
4321  unsigned NumElts = VecVT.getVectorNumElements();
4322  if (EltVT == MVT::f64) {
4323  assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
4324  ReplaceNode(
4325  N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4326  return;
4327  }
4328  assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4329  if (NumElts == 2) {
4330  ReplaceNode(
4331  N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4332  return;
4333  }
4334  assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4335  ReplaceNode(N,
4336  createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
4337  N->getOperand(2), N->getOperand(3)));
4338  return;
4339  }
4340 
4341  case ARMISD::VLD1DUP: {
4342  static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4343  ARM::VLD1DUPd32 };
4344  static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4345  ARM::VLD1DUPq32 };
4346  SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
4347  return;
4348  }
4349 
4350  case ARMISD::VLD2DUP: {
4351  static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4352  ARM::VLD2DUPd32 };
4353  SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
4354  return;
4355  }
4356 
4357  case ARMISD::VLD3DUP: {
4358  static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4359  ARM::VLD3DUPd16Pseudo,
4360  ARM::VLD3DUPd32Pseudo };
4361  SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
4362  return;
4363  }
4364 
4365  case ARMISD::VLD4DUP: {
4366  static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4367  ARM::VLD4DUPd16Pseudo,
4368  ARM::VLD4DUPd32Pseudo };
4369  SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
4370  return;
4371  }
4372 
4373  case ARMISD::VLD1DUP_UPD: {
4374  static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4375  ARM::VLD1DUPd16wb_fixed,
4376  ARM::VLD1DUPd32wb_fixed };
4377  static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4378  ARM::VLD1DUPq16wb_fixed,
4379  ARM::VLD1DUPq32wb_fixed };
4380  SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
4381  return;
4382  }
4383 
4384  case ARMISD::VLD2DUP_UPD: {
4385  static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4386  ARM::VLD2DUPd16wb_fixed,
4387  ARM::VLD2DUPd32wb_fixed,
4388  ARM::VLD1q64wb_fixed };
4389  static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4390  ARM::VLD2DUPq16EvenPseudo,
4391  ARM::VLD2DUPq32EvenPseudo };
4392  static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4393  ARM::VLD2DUPq16OddPseudoWB_fixed,
4394  ARM::VLD2DUPq32OddPseudoWB_fixed };
4395  SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
4396  return;
4397  }
4398 
4399  case ARMISD::VLD3DUP_UPD: {
4400  static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4401  ARM::VLD3DUPd16Pseudo_UPD,
4402  ARM::VLD3DUPd32Pseudo_UPD,
4403  ARM::VLD1d64TPseudoWB_fixed };
4404  static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4405  ARM::VLD3DUPq16EvenPseudo,
4406  ARM::VLD3DUPq32EvenPseudo };
4407  static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4408  ARM::VLD3DUPq16OddPseudo_UPD,
4409  ARM::VLD3DUPq32OddPseudo_UPD };
4410  SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4411  return;
4412  }
4413 
4414  case ARMISD::VLD4DUP_UPD: {
4415  static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4416  ARM::VLD4DUPd16Pseudo_UPD,
4417  ARM::VLD4DUPd32Pseudo_UPD,
4418  ARM::VLD1d64QPseudoWB_fixed };
4419  static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4420  ARM::VLD4DUPq16EvenPseudo,
4421  ARM::VLD4DUPq32EvenPseudo };
4422  static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4423  ARM::VLD4DUPq16OddPseudo_UPD,
4424  ARM::VLD4DUPq32OddPseudo_UPD };
4425  SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4426  return;
4427  }
4428 
4429  case ARMISD::VLD1_UPD: {
4430  static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4431  ARM::VLD1d16wb_fixed,
4432  ARM::VLD1d32wb_fixed,
4433  ARM::VLD1d64wb_fixed };
4434  static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4435  ARM::VLD1q16wb_fixed,
4436  ARM::VLD1q32wb_fixed,
4437  ARM::VLD1q64wb_fixed };
4438  SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
4439  return;
4440  }
4441 
4442  case ARMISD::VLD2_UPD: {
4443  if (Subtarget->hasNEON()) {
4444  static const uint16_t DOpcodes[] = {
4445  ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4446  ARM::VLD1q64wb_fixed};
4447  static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4448  ARM::VLD2q16PseudoWB_fixed,
4449  ARM::VLD2q32PseudoWB_fixed};
4450  SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4451  } else {
4452  static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4453  ARM::MVE_VLD21_8_wb};
4454  static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4455  ARM::MVE_VLD21_16_wb};
4456  static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4457  ARM::MVE_VLD21_32_wb};
4458  static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4459  SelectMVE_VLD(N, 2, Opcodes, true);
4460  }
4461  return;
4462  }
4463 
4464  case ARMISD::VLD3_UPD: {
4465  static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4466  ARM::VLD3d16Pseudo_UPD,
4467  ARM::VLD3d32Pseudo_UPD,
4468  ARM::VLD1d64TPseudoWB_fixed};
4469  static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4470  ARM::VLD3q16Pseudo_UPD,
4471  ARM::VLD3q32Pseudo_UPD };
4472  static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4473  ARM::VLD3q16oddPseudo_UPD,
4474  ARM::VLD3q32oddPseudo_UPD };
4475  SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4476  return;
4477  }
4478 
4479  case ARMISD::VLD4_UPD: {
4480  if (Subtarget->hasNEON()) {
4481  static const uint16_t DOpcodes[] = {
4482  ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4483  ARM::VLD1d64QPseudoWB_fixed};
4484  static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4485  ARM::VLD4q16Pseudo_UPD,
4486  ARM::VLD4q32Pseudo_UPD};
4487  static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4488  ARM::VLD4q16oddPseudo_UPD,
4489  ARM::VLD4q32oddPseudo_UPD};
4490  SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4491  } else {
4492  static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4493  ARM::MVE_VLD42_8,
4494  ARM::MVE_VLD43_8_wb};
4495  static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4496  ARM::MVE_VLD42_16,
4497  ARM::MVE_VLD43_16_wb};
4498  static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4499  ARM::MVE_VLD42_32,
4500  ARM::MVE_VLD43_32_wb};
4501  static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4502  SelectMVE_VLD(N, 4, Opcodes, true);
4503  }
4504  return;
4505  }
4506 
4507  case ARMISD::VLD1x2_UPD: {
4508  if (Subtarget->hasNEON()) {
4509  static const uint16_t DOpcodes[] = {
4510  ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4511  ARM::VLD1q64wb_fixed};
4512  static const uint16_t QOpcodes[] = {
4513  ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4514  ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4515  SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4516  return;
4517  }
4518  break;
4519  }
4520 
4521  case ARMISD::VLD1x3_UPD: {
4522  if (Subtarget->hasNEON()) {
4523  static const uint16_t DOpcodes[] = {
4524  ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4525  ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4526  static const uint16_t QOpcodes0[] = {
4527  ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4528  ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4529  static const uint16_t QOpcodes1[] = {
4530  ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4531  ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4532  SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4533  return;
4534  }
4535  break;
4536  }
4537 
4538  case ARMISD::VLD1x4_UPD: {
4539  if (Subtarget->hasNEON()) {
4540  static const uint16_t DOpcodes[] = {
4541  ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4542  ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4543  static const uint16_t QOpcodes0[] = {
4544  ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4545  ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4546  static const uint16_t QOpcodes1[] = {
4547  ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4548  ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4549  SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4550  return;
4551  }
4552  break;
4553  }
4554 
4555  case ARMISD::VLD2LN_UPD: {
4556  static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4557  ARM::VLD2LNd16Pseudo_UPD,
4558  ARM::VLD2LNd32Pseudo_UPD };
4559  static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4560  ARM::VLD2LNq32Pseudo_UPD };
4561  SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
4562  return;
4563  }
4564 
4565  case ARMISD::VLD3LN_UPD: {
4566  static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4567  ARM::VLD3LNd16Pseudo_UPD,
4568  ARM::VLD3LNd32Pseudo_UPD };
4569  static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4570  ARM::VLD3LNq32Pseudo_UPD };
4571  SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
4572  return;
4573  }
4574 
4575  case ARMISD::VLD4LN_UPD: {
4576  static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4577  ARM::VLD4LNd16Pseudo_UPD,
4578  ARM::VLD4LNd32Pseudo_UPD };
4579  static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4580  ARM::VLD4LNq32Pseudo_UPD };
4581  SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
4582  return;
4583  }
4584 
4585  case ARMISD::VST1_UPD: {
4586  static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4587  ARM::VST1d16wb_fixed,
4588  ARM::VST1d32wb_fixed,
4589  ARM::VST1d64wb_fixed };
4590  static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4591  ARM::VST1q16wb_fixed,
4592  ARM::VST1q32wb_fixed,
4593  ARM::VST1q64wb_fixed };
4594  SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
4595  return;
4596  }
4597 
4598  case ARMISD::VST2_UPD: {
4599  if (Subtarget->hasNEON()) {
4600  static const uint16_t DOpcodes[] = {
4601  ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4602  ARM::VST1q64wb_fixed};
4603  static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4604  ARM::VST2q16PseudoWB_fixed,
4605  ARM::VST2q32PseudoWB_fixed};
4606  SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4607  return;
4608  }
4609  break;
4610  }
4611 
4612  case ARMISD::VST3_UPD: {
4613  static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4614  ARM::VST3d16Pseudo_UPD,
4615  ARM::VST3d32Pseudo_UPD,
4616  ARM::VST1d64TPseudoWB_fixed};
4617  static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4618  ARM::VST3q16Pseudo_UPD,
4619  ARM::VST3q32Pseudo_UPD };
4620  static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4621  ARM::VST3q16oddPseudo_UPD,
4622  ARM::VST3q32oddPseudo_UPD };
4623  SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4624  return;
4625  }
4626 
4627  case ARMISD::VST4_UPD: {
4628  if (Subtarget->hasNEON()) {
4629  static const uint16_t DOpcodes[] = {
4630  ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4631  ARM::VST1d64QPseudoWB_fixed};
4632  static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4633  ARM::VST4q16Pseudo_UPD,
4634  ARM::VST4q32Pseudo_UPD};
4635  static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4636  ARM::VST4q16oddPseudo_UPD,
4637  ARM::VST4q32oddPseudo_UPD};
4638  SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4639  return;
4640  }
4641  break;
4642  }
4643 
4644  case ARMISD::VST1x2_UPD: {
4645  if (Subtarget->hasNEON()) {
4646  static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4647  ARM::VST1q16wb_fixed,
4648  ARM::VST1q32wb_fixed,
4649  ARM::VST1q64wb_fixed};
4650  static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4651  ARM::VST1d16QPseudoWB_fixed,
4652  ARM::VST1d32QPseudoWB_fixed,
4653  ARM::VST1d64QPseudoWB_fixed };
4654  SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4655  return;
4656  }
4657  break;
4658  }
4659 
4660  case ARMISD::VST1x3_UPD: {
4661  if (Subtarget->hasNEON()) {
4662  static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4663  ARM::VST1d16TPseudoWB_fixed,
4664  ARM::VST1d32TPseudoWB_fixed,
4665  ARM::VST1d64TPseudoWB_fixed };
4666  static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4667  ARM::VST1q16LowTPseudo_UPD,
4668  ARM::VST1q32LowTPseudo_UPD,
4669  ARM::VST1q64LowTPseudo_UPD };
4670  static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4671  ARM::VST1q16HighTPseudo_UPD,
4672  ARM::VST1q32HighTPseudo_UPD,
4673  ARM::VST1q64HighTPseudo_UPD };
4674  SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4675  return;
4676  }
4677  break;
4678  }
4679 
4680  case ARMISD::VST1x4_UPD: {
4681  if (Subtarget->hasNEON()) {
4682  static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4683  ARM::VST1d16QPseudoWB_fixed,
4684  ARM::VST1d32QPseudoWB_fixed,
4685  ARM::VST1d64QPseudoWB_fixed };
4686  static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4687  ARM::VST1q16LowQPseudo_UPD,
4688  ARM::VST1q32LowQPseudo_UPD,
4689  ARM::VST1q64LowQPseudo_UPD };
4690  static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4691  ARM::VST1q16HighQPseudo_UPD,
4692  ARM::VST1q32HighQPseudo_UPD,
4693  ARM::VST1q64HighQPseudo_UPD };
4694  SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4695  return;
4696  }
4697  break;
4698  }
4699  case ARMISD::VST2LN_UPD: {
4700  static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4701  ARM::VST2LNd16Pseudo_UPD,
4702  ARM::VST2LNd32Pseudo_UPD };
4703  static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4704  ARM::VST2LNq32Pseudo_UPD };
4705  SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
4706  return;
4707  }
4708 
4709  case ARMISD::VST3LN_UPD: {
4710  static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4711  ARM::VST3LNd16Pseudo_UPD,
4712  ARM::VST3LNd32Pseudo_UPD };
4713  static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4714  ARM::VST3LNq32Pseudo_UPD };
4715  SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
4716  return;
4717  }
4718 
4719  case ARMISD::VST4LN_UPD: {
4720  static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4721  ARM::VST4LNd16Pseudo_UPD,
4722  ARM::VST4LNd32Pseudo_UPD };
4723  static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4724  ARM::VST4LNq32Pseudo_UPD };
4725  SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
4726  return;
4727  }
4728 
4729  case ISD::INTRINSIC_VOID:
4730  case ISD::INTRINSIC_W_CHAIN: {
4731  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
4732  switch (IntNo) {
4733  default:
4734  break;
4735 
4736  case Intrinsic::arm_mrrc:
4737  case Intrinsic::arm_mrrc2: {
4738  SDLoc dl(N);
4739  SDValue Chain = N->getOperand(0);
4740  unsigned Opc;
4741 
4742  if (Subtarget->isThumb())
4743  Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4744  else
4745  Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4746 
4748  Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
4749  Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
4750  Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
4751 
4752  // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4753  // instruction will always be '1111' but it is possible in assembly language to specify
4754  // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4755  if (Opc != ARM::MRRC2) {
4756  Ops.push_back(getAL(CurDAG, dl));
4757  Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4758  }
4759 
4760  Ops.push_back(Chain);
4761 
4762  // Writes to two registers.
4763  const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4764 
4765  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4766  return;
4767  }
4768  case Intrinsic::arm_ldaexd:
4769  case Intrinsic::arm_ldrexd: {
4770  SDLoc dl(N);
4771  SDValue Chain = N->getOperand(0);
4772  SDValue MemAddr = N->getOperand(2);
4773  bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4774 
4775  bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4776  unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4777  : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4778 
4779  // arm_ldrexd returns a i64 value in {i32, i32}
4780  std::vector<EVT> ResTys;
4781  if (isThumb) {
4782  ResTys.push_back(MVT::i32);
4783  ResTys.push_back(MVT::i32);
4784  } else
4785  ResTys.push_back(MVT::Untyped);
4786  ResTys.push_back(MVT::Other);
4787 
4788  // Place arguments in the right order.
4789  SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4790  CurDAG->getRegister(0, MVT::i32), Chain};
4791  SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4792  // Transfer memoperands.
4793  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4794  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4795 
4796  // Remap uses.
4797  SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4798  if (!SDValue(N, 0).use_empty()) {
4799  SDValue Result;
4800  if (isThumb)
4801  Result = SDValue(Ld, 0);
4802  else {
4803  SDValue SubRegIdx =
4804  CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4805  SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4806  dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4807  Result = SDValue(ResNode,0);
4808  }
4809  ReplaceUses(SDValue(N, 0), Result);
4810  }
4811  if (!SDValue(N, 1).use_empty()) {
4812  SDValue Result;
4813  if (isThumb)
4814  Result = SDValue(Ld, 1);
4815  else {
4816  SDValue SubRegIdx =
4817  CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
4818  SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4819  dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4820  Result = SDValue(ResNode,0);
4821  }
4822  ReplaceUses(SDValue(N, 1), Result);
4823  }
4824  ReplaceUses(SDValue(N, 2), OutChain);
4825  CurDAG->RemoveDeadNode(N);
4826  return;
4827  }
4828  case Intrinsic::arm_stlexd:
4829  case Intrinsic::arm_strexd: {
4830  SDLoc dl(N);
4831  SDValue Chain = N->getOperand(0);
4832  SDValue Val0 = N->getOperand(2);
4833  SDValue Val1 = N->getOperand(3);
4834  SDValue MemAddr = N->getOperand(4);
4835 
4836  // Store exclusive double return a i32 value which is the return status
4837  // of the issued store.
4838  const EVT ResTys[] = {MVT::i32, MVT::Other};
4839 
4840  bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4841  // Place arguments in the right order.
4843  if (isThumb) {
4844  Ops.push_back(Val0);
4845  Ops.push_back(Val1);
4846  } else
4847  // arm_strexd uses GPRPair.
4848  Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
4849  Ops.push_back(MemAddr);
4850  Ops.push_back(getAL(CurDAG, dl));
4851  Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4852  Ops.push_back(Chain);
4853 
4854  bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4855  unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4856  : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4857 
4858  SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4859  // Transfer memoperands.
4860  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4861  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4862 
4863  ReplaceNode(N, St);
4864  return;
4865  }
4866 
4867  case Intrinsic::arm_neon_vld1: {
4868  static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4869  ARM::VLD1d32, ARM::VLD1d64 };
4870  static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4871  ARM::VLD1q32, ARM::VLD1q64};
4872  SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
4873  return;
4874  }
4875 
4876  case Intrinsic::arm_neon_vld1x2: {
4877  static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4878  ARM::VLD1q32, ARM::VLD1q64 };
4879  static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4880  ARM::VLD1d16QPseudo,
4881  ARM::VLD1d32QPseudo,
4882  ARM::VLD1d64QPseudo };
4883  SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4884  return;
4885  }
4886 
4887  case Intrinsic::arm_neon_vld1x3: {
4888  static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4889  ARM::VLD1d16TPseudo,
4890  ARM::VLD1d32TPseudo,
4891  ARM::VLD1d64TPseudo };
4892  static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4893  ARM::VLD1q16LowTPseudo_UPD,
4894  ARM::VLD1q32LowTPseudo_UPD,
4895  ARM::VLD1q64LowTPseudo_UPD };
4896  static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4897  ARM::VLD1q16HighTPseudo,
4898  ARM::VLD1q32HighTPseudo,
4899  ARM::VLD1q64HighTPseudo };
4900  SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4901  return;
4902  }
4903 
4904  case Intrinsic::arm_neon_vld1x4: {
4905  static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4906  ARM::VLD1d16QPseudo,
4907  ARM::VLD1d32QPseudo,
4908  ARM::VLD1d64QPseudo };
4909  static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4910  ARM::VLD1q16LowQPseudo_UPD,
4911  ARM::VLD1q32LowQPseudo_UPD,
4912  ARM::VLD1q64LowQPseudo_UPD };
4913  static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4914  ARM::VLD1q16HighQPseudo,
4915  ARM::VLD1q32HighQPseudo,
4916  ARM::VLD1q64HighQPseudo };
4917  SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4918  return;
4919  }
4920 
4921  case Intrinsic::arm_neon_vld2: {
4922  static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4923  ARM::VLD2d32, ARM::VLD1q64 };
4924  static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4925  ARM::VLD2q32Pseudo };
4926  SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4927  return;
4928  }
4929 
4930  case Intrinsic::arm_neon_vld3: {
4931  static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4932  ARM::VLD3d16Pseudo,