LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64ISelDAGToDAG.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 2178 2276 95.7 %
Date: 2017-09-14 15:23:50 Functions: 64 66 97.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file defines an instruction selector for the AArch64 target.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "AArch64TargetMachine.h"
      15             : #include "MCTargetDesc/AArch64AddressingModes.h"
      16             : #include "llvm/ADT/APSInt.h"
      17             : #include "llvm/CodeGen/SelectionDAGISel.h"
      18             : #include "llvm/IR/Function.h" // To access function attributes.
      19             : #include "llvm/IR/GlobalValue.h"
      20             : #include "llvm/IR/Intrinsics.h"
      21             : #include "llvm/Support/Debug.h"
      22             : #include "llvm/Support/ErrorHandling.h"
      23             : #include "llvm/Support/KnownBits.h"
      24             : #include "llvm/Support/MathExtras.h"
      25             : #include "llvm/Support/raw_ostream.h"
      26             : 
      27             : using namespace llvm;
      28             : 
      29             : #define DEBUG_TYPE "aarch64-isel"
      30             : 
      31             : //===--------------------------------------------------------------------===//
      32             : /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
      33             : /// instructions for SelectionDAG operations.
      34             : ///
      35             : namespace {
      36             : 
      37         978 : class AArch64DAGToDAGISel : public SelectionDAGISel {
      38             : 
      39             :   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
      40             :   /// make the right decision when generating code for different targets.
      41             :   const AArch64Subtarget *Subtarget;
      42             : 
      43             :   bool ForCodeSize;
      44             : 
      45             : public:
      46             :   explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
      47             :                                CodeGenOpt::Level OptLevel)
      48         986 :       : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
      49         986 :         ForCodeSize(false) {}
      50             : 
      51           5 :   StringRef getPassName() const override {
      52           5 :     return "AArch64 Instruction Selection";
      53             :   }
      54             : 
      55       11664 :   bool runOnMachineFunction(MachineFunction &MF) override {
      56       11664 :     ForCodeSize = MF.getFunction()->optForSize();
      57       11664 :     Subtarget = &MF.getSubtarget<AArch64Subtarget>();
      58       11664 :     return SelectionDAGISel::runOnMachineFunction(MF);
      59             :   }
      60             : 
      61             :   void Select(SDNode *Node) override;
      62             : 
      63             :   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
      64             :   /// inline asm expressions.
      65             :   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
      66             :                                     unsigned ConstraintID,
      67             :                                     std::vector<SDValue> &OutOps) override;
      68             : 
      69             :   bool tryMLAV64LaneV128(SDNode *N);
      70             :   bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
      71             :   bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
      72             :   bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
      73             :   bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
      74             :   bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
      75        1727 :     return SelectShiftedRegister(N, false, Reg, Shift);
      76             :   }
      77             :   bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
      78         405 :     return SelectShiftedRegister(N, true, Reg, Shift);
      79             :   }
      80             :   bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
      81             :     return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
      82             :   }
      83             :   bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
      84             :     return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
      85             :   }
      86             :   bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
      87          15 :     return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
      88             :   }
      89             :   bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
      90          18 :     return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
      91             :   }
      92             :   bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
      93             :     return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
      94             :   }
      95             :   bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
      96         352 :     return SelectAddrModeIndexed(N, 1, Base, OffImm);
      97             :   }
      98             :   bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
      99         300 :     return SelectAddrModeIndexed(N, 2, Base, OffImm);
     100             :   }
     101             :   bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
     102        1729 :     return SelectAddrModeIndexed(N, 4, Base, OffImm);
     103             :   }
     104             :   bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
     105        4260 :     return SelectAddrModeIndexed(N, 8, Base, OffImm);
     106             :   }
     107             :   bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
     108        1665 :     return SelectAddrModeIndexed(N, 16, Base, OffImm);
     109             :   }
     110             :   bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
     111          52 :     return SelectAddrModeUnscaled(N, 1, Base, OffImm);
     112             :   }
     113             :   bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
     114          75 :     return SelectAddrModeUnscaled(N, 2, Base, OffImm);
     115             :   }
     116             :   bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
     117         112 :     return SelectAddrModeUnscaled(N, 4, Base, OffImm);
     118             :   }
     119             :   bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
     120          96 :     return SelectAddrModeUnscaled(N, 8, Base, OffImm);
     121             :   }
     122             :   bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
     123          39 :     return SelectAddrModeUnscaled(N, 16, Base, OffImm);
     124             :   }
     125             : 
     126             :   template<int Width>
     127             :   bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
     128             :                          SDValue &SignExtend, SDValue &DoShift) {
     129        8719 :     return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
     130             :   }
     131             : 
     132             :   template<int Width>
     133             :   bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
     134             :                          SDValue &SignExtend, SDValue &DoShift) {
     135        8622 :     return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
     136             :   }
     137             : 
     138             : 
     139             :   /// Form sequences of consecutive 64/128-bit registers for use in NEON
     140             :   /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
     141             :   /// between 1 and 4 elements. If it contains a single element that is returned
     142             :   /// unchanged; otherwise a REG_SEQUENCE value is returned.
     143             :   SDValue createDTuple(ArrayRef<SDValue> Vecs);
     144             :   SDValue createQTuple(ArrayRef<SDValue> Vecs);
     145             : 
     146             :   /// Generic helper for the createDTuple/createQTuple
     147             :   /// functions. Those should almost always be called instead.
     148             :   SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
     149             :                       const unsigned SubRegs[]);
     150             : 
     151             :   void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
     152             : 
     153             :   bool tryIndexedLoad(SDNode *N);
     154             : 
     155             :   void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
     156             :                      unsigned SubRegIdx);
     157             :   void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
     158             :                          unsigned SubRegIdx);
     159             :   void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
     160             :   void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
     161             : 
     162             :   void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
     163             :   void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
     164             :   void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
     165             :   void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
     166             : 
     167             :   bool tryBitfieldExtractOp(SDNode *N);
     168             :   bool tryBitfieldExtractOpFromSExt(SDNode *N);
     169             :   bool tryBitfieldInsertOp(SDNode *N);
     170             :   bool tryBitfieldInsertInZeroOp(SDNode *N);
     171             : 
     172             :   bool tryReadRegister(SDNode *N);
     173             :   bool tryWriteRegister(SDNode *N);
     174             : 
     175             : // Include the pieces autogenerated from the target description.
     176             : #include "AArch64GenDAGISel.inc"
     177             : 
     178             : private:
     179             :   bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
     180             :                              SDValue &Shift);
     181             :   bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
     182             :                                SDValue &OffImm);
     183             :   bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
     184             :                              SDValue &OffImm);
     185             :   bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
     186             :                               SDValue &OffImm);
     187             :   bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
     188             :                          SDValue &Offset, SDValue &SignExtend,
     189             :                          SDValue &DoShift);
     190             :   bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
     191             :                          SDValue &Offset, SDValue &SignExtend,
     192             :                          SDValue &DoShift);
     193             :   bool isWorthFolding(SDValue V) const;
     194             :   bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
     195             :                          SDValue &Offset, SDValue &SignExtend);
     196             : 
     197             :   template<unsigned RegWidth>
     198             :   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
     199          32 :     return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
     200             :   }
     201             : 
     202             :   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
     203             : 
     204             :   bool SelectCMP_SWAP(SDNode *N);
     205             : 
     206             : };
     207             : } // end anonymous namespace
     208             : 
     209             : /// isIntImmediate - This method tests to see if the node is a constant
     210             : /// operand. If so Imm will receive the 32-bit value.
     211             : static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
     212        1396 :   if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
     213         658 :     Imm = C->getZExtValue();
     214             :     return true;
     215             :   }
     216             :   return false;
     217             : }
     218             : 
     219             : // isIntImmediate - This method tests to see if a constant operand.
     220             : // If so Imm will receive the value.
     221             : static bool isIntImmediate(SDValue N, uint64_t &Imm) {
     222          58 :   return isIntImmediate(N.getNode(), Imm);
     223             : }
     224             : 
     225             : // isOpcWithIntImmediate - This method tests to see if the node is a specific
     226             : // opcode and that it has a immediate integer right operand.
     227             : // If so Imm will receive the 32 bit value.
     228             : static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
     229             :                                   uint64_t &Imm) {
     230        4077 :   return N->getOpcode() == Opc &&
     231        3800 :          isIntImmediate(N->getOperand(1).getNode(), Imm);
     232             : }
     233             : 
     234           2 : bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
     235             :     const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
     236             :   switch(ConstraintID) {
     237           0 :   default:
     238           0 :     llvm_unreachable("Unexpected asm memory constraint");
     239           2 :   case InlineAsm::Constraint_i:
     240             :   case InlineAsm::Constraint_m:
     241             :   case InlineAsm::Constraint_Q:
     242             :     // We need to make sure that this one operand does not end up in XZR, thus
     243             :     // require the address to be in a PointerRegClass register.
     244           4 :     const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
     245           2 :     const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
     246           4 :     SDLoc dl(Op);
     247           8 :     SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
     248             :     SDValue NewOp =
     249           4 :         SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
     250             :                                        dl, Op.getValueType(),
     251           2 :                                        Op, RC), 0);
     252           2 :     OutOps.push_back(NewOp);
     253           2 :     return false;
     254             :   }
     255             :   return true;
     256             : }
     257             : 
     258             : /// SelectArithImmed - Select an immediate value that can be represented as
     259             : /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
     260             : /// Val set to the 12-bit value and Shift set to the shifter operand.
     261        3320 : bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
     262             :                                            SDValue &Shift) {
     263             :   // This function is called from the addsub_shifted_imm ComplexPattern,
     264             :   // which lists [imm] as the list of opcode it's interested in, however
     265             :   // we still need to check whether the operand is actually an immediate
     266             :   // here because the ComplexPattern opcode list is only used in
     267             :   // root-level opcode matching.
     268        3320 :   if (!isa<ConstantSDNode>(N.getNode()))
     269             :     return false;
     270             : 
     271        3666 :   uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
     272             :   unsigned ShiftAmt;
     273             : 
     274        1222 :   if (Immed >> 12 == 0) {
     275             :     ShiftAmt = 0;
     276         226 :   } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
     277             :     ShiftAmt = 12;
     278             :     Immed = Immed >> 12;
     279             :   } else
     280             :     return false;
     281             : 
     282        1016 :   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
     283        1016 :   SDLoc dl(N);
     284        3048 :   Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
     285        3048 :   Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
     286        1016 :   return true;
     287             : }
     288             : 
     289             : /// SelectNegArithImmed - As above, but negates the value before trying to
     290             : /// select it.
     291        2333 : bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
     292             :                                               SDValue &Shift) {
     293             :   // This function is called from the addsub_shifted_imm ComplexPattern,
     294             :   // which lists [imm] as the list of opcode it's interested in, however
     295             :   // we still need to check whether the operand is actually an immediate
     296             :   // here because the ComplexPattern opcode list is only used in
     297             :   // root-level opcode matching.
     298        2333 :   if (!isa<ConstantSDNode>(N.getNode()))
     299             :     return false;
     300             : 
     301             :   // The immediate operand must be a 24-bit zero-extended immediate.
     302        1698 :   uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
     303             : 
     304             :   // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
     305             :   // have the opposite effect on the C flag, so this pattern mustn't match under
     306             :   // those circumstances.
     307         566 :   if (Immed == 0)
     308             :     return false;
     309             : 
     310        1383 :   if (N.getValueType() == MVT::i32)
     311         285 :     Immed = ~((uint32_t)Immed) + 1;
     312             :   else
     313         176 :     Immed = ~Immed + 1ULL;
     314         461 :   if (Immed & 0xFFFFFFFFFF000000ULL)
     315             :     return false;
     316             : 
     317         201 :   Immed &= 0xFFFFFFULL;
     318         804 :   return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
     319         201 :                           Shift);
     320             : }
     321             : 
     322             : /// getShiftTypeForNode - Translate a shift node to the corresponding
     323             : /// ShiftType value.
     324             : static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
     325             :   switch (N.getOpcode()) {
     326             :   default:
     327             :     return AArch64_AM::InvalidShiftExtend;
     328             :   case ISD::SHL:
     329             :     return AArch64_AM::LSL;
     330             :   case ISD::SRL:
     331             :     return AArch64_AM::LSR;
     332             :   case ISD::SRA:
     333             :     return AArch64_AM::ASR;
     334             :   case ISD::ROTR:
     335             :     return AArch64_AM::ROR;
     336             :   }
     337             : }
     338             : 
     339             : /// \brief Determine whether it is worth it to fold SHL into the addressing
     340             : /// mode.
     341          10 : static bool isWorthFoldingSHL(SDValue V) {
     342             :   assert(V.getOpcode() == ISD::SHL && "invalid opcode");
     343             :   // It is worth folding logical shift of up to three places.
     344          30 :   auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
     345             :   if (!CSD)
     346             :     return false;
     347          10 :   unsigned ShiftVal = CSD->getZExtValue();
     348          10 :   if (ShiftVal > 3)
     349             :     return false;
     350             : 
     351             :   // Check if this particular node is reused in any non-memory related
     352             :   // operation.  If yes, do not try to fold this node into the address
     353             :   // computation, since the computation will be kept.
     354          10 :   const SDNode *Node = V.getNode();
     355          61 :   for (SDNode *UI : Node->uses())
     356          16 :     if (!isa<MemSDNode>(*UI))
     357          91 :       for (SDNode *UII : UI->uses())
     358          22 :         if (!isa<MemSDNode>(*UII))
     359             :           return false;
     360             :   return true;
     361             : }
     362             : 
     363             : /// \brief Determine whether it is worth to fold V into an extended register.
     364        1252 : bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
     365             :   // Trivial if we are optimizing for code size or if there is only
     366             :   // one use of the value.
     367        2215 :   if (ForCodeSize || V.hasOneUse())
     368             :     return true;
     369             :   // If a subtarget has a fastpath LSL we can fold a logical shift into
     370             :   // the addressing mode and save a cycle.
     371         116 :   if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
     372           4 :       isWorthFoldingSHL(V))
     373             :     return true;
     374         106 :   if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
     375          12 :     const SDValue LHS = V.getOperand(0);
     376          12 :     const SDValue RHS = V.getOperand(1);
     377          12 :     if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
     378           6 :       return true;
     379          12 :     if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
     380             :       return true;
     381             :   }
     382             : 
     383             :   // It hurts otherwise, since the value will be reused.
     384             :   return false;
     385             : }
     386             : 
     387             : /// SelectShiftedRegister - Select a "shifted register" operand.  If the value
     388             : /// is not shifted, set the Shift operand to default of "LSL 0".  The logical
     389             : /// instructions allow the shifted register to be rotated, but the arithmetic
     390             : /// instructions do not.  The AllowROR parameter specifies whether ROR is
     391             : /// supported.
     392        2132 : bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
     393             :                                                 SDValue &Reg, SDValue &Shift) {
     394        2355 :   AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
     395         223 :   if (ShType == AArch64_AM::InvalidShiftExtend)
     396             :     return false;
     397         223 :   if (!AllowROR && ShType == AArch64_AM::ROR)
     398             :     return false;
     399             : 
     400         649 :   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
     401         203 :     unsigned BitSize = N.getValueSizeInBits();
     402         203 :     unsigned Val = RHS->getZExtValue() & (BitSize - 1);
     403         203 :     unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
     404             : 
     405         406 :     Reg = N.getOperand(0);
     406        1015 :     Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
     407         203 :     return isWorthFolding(N);
     408             :   }
     409             : 
     410             :   return false;
     411             : }
     412             : 
     413             : /// getExtendTypeForNode - Translate an extend node to the corresponding
     414             : /// ExtendType value.
     415             : static AArch64_AM::ShiftExtendType
     416        3259 : getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
     417        9663 :   if (N.getOpcode() == ISD::SIGN_EXTEND ||
     418        3145 :       N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
     419         166 :     EVT SrcVT;
     420         332 :     if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
     421         156 :       SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
     422             :     else
     423         342 :       SrcVT = N.getOperand(0).getValueType();
     424             : 
     425         272 :     if (!IsLoadStore && SrcVT == MVT::i8)
     426             :       return AArch64_AM::SXTB;
     427         234 :     else if (!IsLoadStore && SrcVT == MVT::i16)
     428             :       return AArch64_AM::SXTH;
     429         236 :     else if (SrcVT == MVT::i32)
     430             :       return AArch64_AM::SXTW;
     431             :     assert(SrcVT != MVT::i64 && "extend from 64-bits?");
     432             : 
     433           0 :     return AArch64_AM::InvalidShiftExtend;
     434        9221 :   } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
     435        3035 :              N.getOpcode() == ISD::ANY_EXTEND) {
     436         174 :     EVT SrcVT = N.getOperand(0).getValueType();
     437          82 :     if (!IsLoadStore && SrcVT == MVT::i8)
     438             :       return AArch64_AM::UXTB;
     439          82 :     else if (!IsLoadStore && SrcVT == MVT::i16)
     440             :       return AArch64_AM::UXTH;
     441         116 :     else if (SrcVT == MVT::i32)
     442             :       return AArch64_AM::UXTW;
     443             :     assert(SrcVT != MVT::i64 && "extend from 64-bits?");
     444             : 
     445           0 :     return AArch64_AM::InvalidShiftExtend;
     446        6070 :   } else if (N.getOpcode() == ISD::AND) {
     447         364 :     ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
     448             :     if (!CSD)
     449             :       return AArch64_AM::InvalidShiftExtend;
     450         120 :     uint64_t AndMask = CSD->getZExtValue();
     451             : 
     452         120 :     switch (AndMask) {
     453             :     default:
     454             :       return AArch64_AM::InvalidShiftExtend;
     455          28 :     case 0xFF:
     456          28 :       return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
     457          19 :     case 0xFFFF:
     458          19 :       return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
     459          12 :     case 0xFFFFFFFF:
     460          12 :       return AArch64_AM::UXTW;
     461             :     }
     462             :   }
     463             : 
     464             :   return AArch64_AM::InvalidShiftExtend;
     465             : }
     466             : 
     467             : // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
     468         438 : static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
     469        1276 :   if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
     470         400 :       DL->getOpcode() != AArch64ISD::DUPLANE32)
     471             :     return false;
     472             : 
     473         154 :   SDValue SV = DL->getOperand(0);
     474         154 :   if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
     475             :     return false;
     476             : 
     477          72 :   SDValue EV = SV.getOperand(1);
     478          72 :   if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
     479             :     return false;
     480             : 
     481           0 :   ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
     482           0 :   ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
     483           0 :   LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
     484           0 :   LaneOp = EV.getOperand(0);
     485             : 
     486           0 :   return true;
     487             : }
     488             : 
     489             : // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
     490             : // high lane extract.
     491         219 : static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
     492             :                              SDValue &LaneOp, int &LaneIdx) {
     493             : 
     494         219 :   if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
     495         219 :     std::swap(Op0, Op1);
     496         219 :     if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
     497             :       return false;
     498             :   }
     499           0 :   StdOp = Op1;
     500             :   return true;
     501             : }
     502             : 
     503             : /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
     504             : /// is a lane in the upper half of a 128-bit vector.  Recognize and select this
     505             : /// so that we don't emit unnecessary lane extracts.
     506        2282 : bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
     507        4564 :   SDLoc dl(N);
     508        4564 :   SDValue Op0 = N->getOperand(0);
     509        4564 :   SDValue Op1 = N->getOperand(1);
     510        2282 :   SDValue MLAOp1;   // Will hold ordinary multiplicand for MLA.
     511        2282 :   SDValue MLAOp2;   // Will hold lane-accessed multiplicand for MLA.
     512        2282 :   int LaneIdx = -1; // Will hold the lane index.
     513             : 
     514        4617 :   if (Op1.getOpcode() != ISD::MUL ||
     515         159 :       !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
     516             :                         LaneIdx)) {
     517        2282 :     std::swap(Op0, Op1);
     518        4616 :     if (Op1.getOpcode() != ISD::MUL ||
     519         156 :         !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
     520             :                           LaneIdx))
     521             :       return false;
     522             :   }
     523             : 
     524           0 :   SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
     525             : 
     526           0 :   SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
     527             : 
     528           0 :   unsigned MLAOpc = ~0U;
     529             : 
     530           0 :   switch (N->getSimpleValueType(0).SimpleTy) {
     531           0 :   default:
     532           0 :     llvm_unreachable("Unrecognized MLA.");
     533             :   case MVT::v4i16:
     534             :     MLAOpc = AArch64::MLAv4i16_indexed;
     535             :     break;
     536           0 :   case MVT::v8i16:
     537           0 :     MLAOpc = AArch64::MLAv8i16_indexed;
     538           0 :     break;
     539           0 :   case MVT::v2i32:
     540           0 :     MLAOpc = AArch64::MLAv2i32_indexed;
     541           0 :     break;
     542           0 :   case MVT::v4i32:
     543           0 :     MLAOpc = AArch64::MLAv4i32_indexed;
     544           0 :     break;
     545             :   }
     546             : 
     547           0 :   ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
     548           0 :   return true;
     549             : }
     550             : 
     551         114 : bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
     552         228 :   SDLoc dl(N);
     553         114 :   SDValue SMULLOp0;
     554         114 :   SDValue SMULLOp1;
     555             :   int LaneIdx;
     556             : 
     557         342 :   if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
     558             :                         LaneIdx))
     559             :     return false;
     560             : 
     561           0 :   SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
     562             : 
     563           0 :   SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
     564             : 
     565           0 :   unsigned SMULLOpc = ~0U;
     566             : 
     567           0 :   if (IntNo == Intrinsic::aarch64_neon_smull) {
     568           0 :     switch (N->getSimpleValueType(0).SimpleTy) {
     569           0 :     default:
     570           0 :       llvm_unreachable("Unrecognized SMULL.");
     571             :     case MVT::v4i32:
     572             :       SMULLOpc = AArch64::SMULLv4i16_indexed;
     573             :       break;
     574           0 :     case MVT::v2i64:
     575           0 :       SMULLOpc = AArch64::SMULLv2i32_indexed;
     576           0 :       break;
     577             :     }
     578           0 :   } else if (IntNo == Intrinsic::aarch64_neon_umull) {
     579           0 :     switch (N->getSimpleValueType(0).SimpleTy) {
     580           0 :     default:
     581           0 :       llvm_unreachable("Unrecognized SMULL.");
     582             :     case MVT::v4i32:
     583             :       SMULLOpc = AArch64::UMULLv4i16_indexed;
     584             :       break;
     585           0 :     case MVT::v2i64:
     586           0 :       SMULLOpc = AArch64::UMULLv2i32_indexed;
     587           0 :       break;
     588             :     }
     589             :   } else
     590           0 :     llvm_unreachable("Unrecognized intrinsic.");
     591             : 
     592           0 :   ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
     593           0 :   return true;
     594             : }
     595             : 
     596             : /// Instructions that accept extend modifiers like UXTW expect the register
     597             : /// being extended to be a GPR32, but the incoming DAG might be acting on a
     598             : /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
     599             : /// this is the case.
     600         263 : static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
     601         789 :   if (N.getValueType() == MVT::i32)
     602         227 :     return N;
     603             : 
     604          36 :   SDLoc dl(N);
     605          72 :   SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
     606          36 :   MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     607          36 :                                                dl, MVT::i32, N, SubReg);
     608          36 :   return SDValue(Node, 0);
     609             : }
     610             : 
     611             : 
     612             : /// SelectArithExtendedRegister - Select a "extended register" operand.  This
     613             : /// operand folds in an extend followed by an optional left shift.
     614        2474 : bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
     615             :                                                       SDValue &Shift) {
     616        2474 :   unsigned ShiftVal = 0;
     617             :   AArch64_AM::ShiftExtendType Ext;
     618             : 
     619        4948 :   if (N.getOpcode() == ISD::SHL) {
     620         410 :     ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
     621             :     if (!CSD)
     622             :       return false;
     623         134 :     ShiftVal = CSD->getZExtValue();
     624         134 :     if (ShiftVal > 4)
     625             :       return false;
     626             : 
     627         224 :     Ext = getExtendTypeForNode(N.getOperand(0));
     628         112 :     if (Ext == AArch64_AM::InvalidShiftExtend)
     629             :       return false;
     630             : 
     631         174 :     Reg = N.getOperand(0).getOperand(0);
     632             :   } else {
     633        2336 :     Ext = getExtendTypeForNode(N);
     634        2336 :     if (Ext == AArch64_AM::InvalidShiftExtend)
     635             :       return false;
     636             : 
     637         212 :     Reg = N.getOperand(0);
     638             : 
     639             :     // Don't match if free 32-bit -> 64-bit zext can be used instead.
     640          21 :     if (Ext == AArch64_AM::UXTW &&
     641         247 :         Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
     642             :       return false;
     643             :   }
     644             : 
     645             :   // AArch64 mandates that the RHS of the operation must use the smallest
     646             :   // register class that could contain the size being extended from.  Thus,
     647             :   // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
     648             :   // there might not be an actual 32-bit value in the program.  We can
     649             :   // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
     650             :   assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
     651         158 :   Reg = narrowIfNeeded(CurDAG, Reg);
     652         790 :   Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
     653         316 :                                     MVT::i32);
     654         158 :   return isWorthFolding(N);
     655             : }
     656             : 
     657             : /// If there's a use of this ADDlow that's not itself a load/store then we'll
     658             : /// need to create a real ADD instruction from it anyway and there's no point in
     659             : /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
     660             : /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
     661             : /// leads to duplicated ADRP instructions.
     662        1413 : static bool isWorthFoldingADDlow(SDValue N) {
     663       15193 :   for (auto Use : N->uses()) {
     664        2232 :     if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
     665        4884 :         Use->getOpcode() != ISD::ATOMIC_LOAD &&
     666             :         Use->getOpcode() != ISD::ATOMIC_STORE)
     667             :       return false;
     668             : 
     669             :     // ldar and stlr have much more restrictive addressing modes (just a
     670             :     // register).
     671       14202 :     if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering()))
     672             :       return false;
     673             :   }
     674             : 
     675             :   return true;
     676             : }
     677             : 
     678             : /// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
     679             : /// immediate" address.  The "Size" argument is the size in bytes of the memory
     680             : /// reference, which determines the scale.
     681          33 : bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
     682             :                                                   SDValue &Base,
     683             :                                                   SDValue &OffImm) {
     684          66 :   SDLoc dl(N);
     685          66 :   const DataLayout &DL = CurDAG->getDataLayout();
     686          33 :   const TargetLowering *TLI = getTargetLowering();
     687          66 :   if (N.getOpcode() == ISD::FrameIndex) {
     688           1 :     int FI = cast<FrameIndexSDNode>(N)->getIndex();
     689           3 :     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
     690           3 :     OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
     691             :     return true;
     692             :   }
     693             : 
     694             :   // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
     695             :   // selected here doesn't support labels/immediates, only base+offset.
     696             : 
     697          32 :   if (CurDAG->isBaseWithConstantOffset(N)) {
     698          57 :     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
     699          19 :       int64_t RHSC = RHS->getSExtValue();
     700          19 :       unsigned Scale = Log2_32(Size);
     701          33 :       if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) &&
     702          14 :           RHSC < (0x40 << Scale)) {
     703          24 :         Base = N.getOperand(0);
     704          24 :         if (Base.getOpcode() == ISD::FrameIndex) {
     705           1 :           int FI = cast<FrameIndexSDNode>(Base)->getIndex();
     706           3 :           Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
     707             :         }
     708          36 :         OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
     709             :         return true;
     710             :       }
     711             :     }
     712             :   }
     713             : 
     714             :   // Base only. The address will be materialized into a register before
     715             :   // the memory is accessed.
     716             :   //    add x0, Xbase, #offset
     717             :   //    stp x1, x2, [x0]
     718          20 :   Base = N;
     719          60 :   OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
     720             :   return true;
     721             : }
     722             : 
     723             : /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
     724             : /// immediate" address.  The "Size" argument is the size in bytes of the memory
     725             : /// reference, which determines the scale.
     726        8306 : bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
     727             :                                               SDValue &Base, SDValue &OffImm) {
     728       16612 :   SDLoc dl(N);
     729       16612 :   const DataLayout &DL = CurDAG->getDataLayout();
     730        8306 :   const TargetLowering *TLI = getTargetLowering();
     731       16612 :   if (N.getOpcode() == ISD::FrameIndex) {
     732         750 :     int FI = cast<FrameIndexSDNode>(N)->getIndex();
     733        2250 :     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
     734        2250 :     OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
     735         750 :     return true;
     736             :   }
     737             : 
     738       15112 :   if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
     739             :     GlobalAddressSDNode *GAN =
     740        4020 :         dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
     741        2680 :     Base = N.getOperand(0);
     742        2680 :     OffImm = N.getOperand(1);
     743        1340 :     if (!GAN)
     744             :       return true;
     745             : 
     746        1243 :     const GlobalValue *GV = GAN->getGlobal();
     747        1243 :     unsigned Alignment = GV->getAlignment();
     748        1243 :     Type *Ty = GV->getValueType();
     749        1243 :     if (Alignment == 0 && Ty->isSized())
     750        1026 :       Alignment = DL.getABITypeAlignment(Ty);
     751             : 
     752        1243 :     if (Alignment >= Size)
     753             :       return true;
     754             :   }
     755             : 
     756        6235 :   if (CurDAG->isBaseWithConstantOffset(N)) {
     757        7641 :     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
     758        2547 :       int64_t RHSC = (int64_t)RHS->getZExtValue();
     759        2547 :       unsigned Scale = Log2_32(Size);
     760        2547 :       if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
     761        4308 :         Base = N.getOperand(0);
     762        4308 :         if (Base.getOpcode() == ISD::FrameIndex) {
     763         302 :           int FI = cast<FrameIndexSDNode>(Base)->getIndex();
     764         906 :           Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
     765             :         }
     766        6462 :         OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
     767        2154 :         return true;
     768             :       }
     769             :     }
     770             :   }
     771             : 
     772             :   // Before falling back to our general case, check if the unscaled
     773             :   // instructions can handle this. If so, that's preferable.
     774        4081 :   if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
     775             :     return false;
     776             : 
     777             :   // Base only. The address will be materialized into a register before
     778             :   // the memory is accessed.
     779             :   //    add x0, Xbase, #offset
     780             :   //    ldr x0, [x0]
     781        3707 :   Base = N;
     782       11121 :   OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
     783        3707 :   return true;
     784             : }
     785             : 
     786             : /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
     787             : /// immediate" address.  This should only match when there is an offset that
     788             : /// is not valid for a scaled immediate addressing mode.  The "Size" argument
     789             : /// is the size in bytes of the memory reference, which is needed here to know
     790             : /// what is valid for a scaled immediate.
     791        4455 : bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
     792             :                                                  SDValue &Base,
     793             :                                                  SDValue &OffImm) {
     794        4455 :   if (!CurDAG->isBaseWithConstantOffset(N))
     795             :     return false;
     796        2301 :   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
     797         767 :     int64_t RHSC = RHS->getSExtValue();
     798             :     // If the offset is valid as a scaled immediate, don't match here.
     799         776 :     if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
     800           9 :         RHSC < (0x1000 << Log2_32(Size)))
     801             :       return false;
     802         767 :     if (RHSC >= -256 && RHSC < 256) {
     803        1496 :       Base = N.getOperand(0);
     804        1496 :       if (Base.getOpcode() == ISD::FrameIndex) {
     805           6 :         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
     806           6 :         const TargetLowering *TLI = getTargetLowering();
     807          12 :         Base = CurDAG->getTargetFrameIndex(
     808          18 :             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
     809             :       }
     810        3740 :       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
     811             :       return true;
     812             :     }
     813             :   }
     814             :   return false;
     815             : }
     816             : 
     817          11 : static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
     818          22 :   SDLoc dl(N);
     819          22 :   SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
     820             :   SDValue ImpDef = SDValue(
     821          22 :       CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
     822          11 :   MachineSDNode *Node = CurDAG->getMachineNode(
     823          11 :       TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
     824          22 :   return SDValue(Node, 0);
     825             : }
     826             : 
     827             : /// \brief Check if the given SHL node (\p N), can be used to form an
     828             : /// extended register for an addressing mode.
     829         369 : bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
     830             :                                             bool WantExtend, SDValue &Offset,
     831             :                                             SDValue &SignExtend) {
     832             :   assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
     833        1107 :   ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
     834         738 :   if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
     835             :     return false;
     836             : 
     837         369 :   SDLoc dl(N);
     838         369 :   if (WantExtend) {
     839             :     AArch64_AM::ShiftExtendType Ext =
     840         424 :         getExtendTypeForNode(N.getOperand(0), true);
     841         212 :     if (Ext == AArch64_AM::InvalidShiftExtend)
     842             :       return false;
     843             : 
     844         168 :     Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
     845         112 :     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
     846          56 :                                            MVT::i32);
     847             :   } else {
     848         314 :     Offset = N.getOperand(0);
     849         471 :     SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
     850             :   }
     851             : 
     852         213 :   unsigned LegalShiftVal = Log2_32(Size);
     853         213 :   unsigned ShiftVal = CSD->getZExtValue();
     854             : 
     855         213 :   if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
     856             :     return false;
     857             : 
     858         207 :   return isWorthFolding(N);
     859             : }
     860             : 
     861        8719 : bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
     862             :                                             SDValue &Base, SDValue &Offset,
     863             :                                             SDValue &SignExtend,
     864             :                                             SDValue &DoShift) {
     865       17438 :   if (N.getOpcode() != ISD::ADD)
     866             :     return false;
     867        5922 :   SDValue LHS = N.getOperand(0);
     868        5922 :   SDValue RHS = N.getOperand(1);
     869        2961 :   SDLoc dl(N);
     870             : 
     871             :   // We don't want to match immediate adds here, because they are better lowered
     872             :   // to the register-immediate addressing modes.
     873        3356 :   if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
     874             :     return false;
     875             : 
     876             :   // Check if this particular node is reused in any non-memory related
     877             :   // operation.  If yes, do not try to fold this node into the address
     878             :   // computation, since the computation will be kept.
     879         398 :   const SDNode *Node = N.getNode();
     880        1962 :   for (SDNode *UI : Node->uses()) {
     881         406 :     if (!isa<MemSDNode>(*UI))
     882             :       return false;
     883             :   }
     884             : 
     885             :   // Remember if it is worth folding N when it produces extended register.
     886         354 :   bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
     887             : 
     888             :   // Try to match a shifted extend on the RHS.
     889         881 :   if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
     890         174 :       SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
     891          46 :     Base = LHS;
     892         138 :     DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
     893             :     return true;
     894             :   }
     895             : 
     896             :   // Try to match a shifted extend on the LHS.
     897         960 :   if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
     898          38 :       SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
     899           7 :     Base = RHS;
     900          21 :     DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
     901             :     return true;
     902             :   }
     903             : 
     904             :   // There was no shift, whatever else we find.
     905         903 :   DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
     906             : 
     907         301 :   AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
     908             :   // Try to match an unshifted extend on the LHS.
     909         601 :   if (IsExtendedRegisterWorthFolding &&
     910             :       (Ext = getExtendTypeForNode(LHS, true)) !=
     911             :           AArch64_AM::InvalidShiftExtend) {
     912           1 :     Base = RHS;
     913           2 :     Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
     914           2 :     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
     915           1 :                                            MVT::i32);
     916           1 :     if (isWorthFolding(LHS))
     917             :       return true;
     918             :   }
     919             : 
     920             :   // Try to match an unshifted extend on the RHS.
     921         300 :   if (IsExtendedRegisterWorthFolding &&
     922             :       (Ext = getExtendTypeForNode(RHS, true)) !=
     923             :           AArch64_AM::InvalidShiftExtend) {
     924          48 :     Base = LHS;
     925          96 :     Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
     926          96 :     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
     927          48 :                                            MVT::i32);
     928          48 :     if (isWorthFolding(RHS))
     929             :       return true;
     930             :   }
     931             : 
     932             :   return false;
     933             : }
     934             : 
     935             : // Check if the given immediate is preferred by ADD. If an immediate can be
     936             : // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
     937             : // encoded by one MOVZ, return true.
     938             : static bool isPreferredADD(int64_t ImmOff) {
     939             :   // Constant in [0x0, 0xfff] can be encoded in ADD.
     940         745 :   if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
     941             :     return true;
     942             :   // Check if it can be encoded in an "ADD LSL #12".
     943         349 :   if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
     944             :     // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
     945          30 :     return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
     946          15 :            (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
     947             :   return false;
     948             : }
     949             : 
     950        8622 : bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
     951             :                                             SDValue &Base, SDValue &Offset,
     952             :                                             SDValue &SignExtend,
     953             :                                             SDValue &DoShift) {
     954       17244 :   if (N.getOpcode() != ISD::ADD)
     955             :     return false;
     956        5728 :   SDValue LHS = N.getOperand(0);
     957        5728 :   SDValue RHS = N.getOperand(1);
     958        2864 :   SDLoc DL(N);
     959             : 
     960             :   // Check if this particular node is reused in any non-memory related
     961             :   // operation.  If yes, do not try to fold this node into the address
     962             :   // computation, since the computation will be kept.
     963        2864 :   const SDNode *Node = N.getNode();
     964       14721 :   for (SDNode *UI : Node->uses()) {
     965        3089 :     if (!isa<MemSDNode>(*UI))
     966             :       return false;
     967             :   }
     968             : 
     969             :   // Watch out if RHS is a wide immediate, it can not be selected into
     970             :   // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
     971             :   // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
     972             :   // instructions like:
     973             :   //     MOV  X0, WideImmediate
     974             :   //     ADD  X1, BaseReg, X0
     975             :   //     LDR  X2, [X1, 0]
     976             :   // For such situation, using [BaseReg, XReg] addressing mode can save one
     977             :   // ADD/SUB:
     978             :   //     MOV  X0, WideImmediate
     979             :   //     LDR  X2, [BaseReg, X0]
     980             :   if (isa<ConstantSDNode>(RHS)) {
     981        5116 :     int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
     982        2558 :     unsigned Scale = Log2_32(Size);
     983             :     // Skip the immediate can be selected by load/store addressing mode.
     984             :     // Also skip the immediate can be encoded by a single ADD (SUB is also
     985             :     // checked by using -ImmOff).
     986        4986 :     if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
     987        2873 :         isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
     988        2534 :       return false;
     989             : 
     990          24 :     SDValue Ops[] = { RHS };
     991             :     SDNode *MOVI =
     992          72 :         CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
     993          24 :     SDValue MOVIV = SDValue(MOVI, 0);
     994             :     // This ADD of two X register will be selected into [Reg+Reg] mode.
     995          72 :     N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
     996             :   }
     997             : 
     998             :   // Remember if it is worth folding N when it produces extended register.
     999         281 :   bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
    1000             : 
    1001             :   // Try to match a shifted extend on the RHS.
    1002         665 :   if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
    1003         128 :       SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
    1004          91 :     Base = LHS;
    1005         273 :     DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
    1006             :     return true;
    1007             :   }
    1008             : 
    1009             :   // Try to match a shifted extend on the LHS.
    1010         549 :   if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
    1011          29 :       SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
    1012          29 :     Base = RHS;
    1013          87 :     DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
    1014             :     return true;
    1015             :   }
    1016             : 
    1017             :   // Match any non-shifted, non-extend, non-immediate add expression.
    1018         161 :   Base = LHS;
    1019         161 :   Offset = RHS;
    1020         483 :   SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
    1021         483 :   DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
    1022             :   // Reg1 + Reg2 is free: no check needed.
    1023             :   return true;
    1024             : }
    1025             : 
    1026             : SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
    1027             :   static const unsigned RegClassIDs[] = {
    1028             :       AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
    1029             :   static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
    1030             :                                      AArch64::dsub2, AArch64::dsub3};
    1031             : 
    1032         119 :   return createTuple(Regs, RegClassIDs, SubRegs);
    1033             : }
    1034             : 
    1035             : SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
    1036             :   static const unsigned RegClassIDs[] = {
    1037             :       AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
    1038             :   static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
    1039             :                                      AArch64::qsub2, AArch64::qsub3};
    1040             : 
    1041         343 :   return createTuple(Regs, RegClassIDs, SubRegs);
    1042             : }
    1043             : 
    1044         462 : SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
    1045             :                                          const unsigned RegClassIDs[],
    1046             :                                          const unsigned SubRegs[]) {
    1047             :   // There's no special register-class for a vector-list of 1 element: it's just
    1048             :   // a vector.
    1049         462 :   if (Regs.size() == 1)
    1050          22 :     return Regs[0];
    1051             : 
    1052             :   assert(Regs.size() >= 2 && Regs.size() <= 4);
    1053             : 
    1054         880 :   SDLoc DL(Regs[0]);
    1055             : 
    1056         880 :   SmallVector<SDValue, 4> Ops;
    1057             : 
    1058             :   // First operand of REG_SEQUENCE is the desired RegClass.
    1059         440 :   Ops.push_back(
    1060        1760 :       CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
    1061             : 
    1062             :   // Then we get pairs of source & subregister-position for the components.
    1063        1745 :   for (unsigned i = 0; i < Regs.size(); ++i) {
    1064        2610 :     Ops.push_back(Regs[i]);
    1065        3915 :     Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
    1066             :   }
    1067             : 
    1068             :   SDNode *N =
    1069        1320 :       CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
    1070         440 :   return SDValue(N, 0);
    1071             : }
    1072             : 
    1073          14 : void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
    1074             :                                       bool isExt) {
    1075          28 :   SDLoc dl(N);
    1076          28 :   EVT VT = N->getValueType(0);
    1077             : 
    1078          14 :   unsigned ExtOff = isExt;
    1079             : 
    1080             :   // Form a REG_SEQUENCE to force register allocation.
    1081          14 :   unsigned Vec0Off = ExtOff + 1;
    1082          14 :   SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
    1083          56 :                                N->op_begin() + Vec0Off + NumVecs);
    1084          28 :   SDValue RegSeq = createQTuple(Regs);
    1085             : 
    1086          28 :   SmallVector<SDValue, 6> Ops;
    1087          14 :   if (isExt)
    1088          12 :     Ops.push_back(N->getOperand(1));
    1089          14 :   Ops.push_back(RegSeq);
    1090          28 :   Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
    1091          42 :   ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
    1092          14 : }
    1093             : 
    1094        5398 : bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
    1095        5398 :   LoadSDNode *LD = cast<LoadSDNode>(N);
    1096       10796 :   if (LD->isUnindexed())
    1097             :     return false;
    1098          44 :   EVT VT = LD->getMemoryVT();
    1099          88 :   EVT DstVT = N->getValueType(0);
    1100          88 :   ISD::MemIndexedMode AM = LD->getAddressingMode();
    1101          44 :   bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
    1102             : 
    1103             :   // We're not doing validity checking here. That was done when checking
    1104             :   // if we should mark the load as indexed or not. We're just selecting
    1105             :   // the right instruction.
    1106          44 :   unsigned Opcode = 0;
    1107             : 
    1108          44 :   ISD::LoadExtType ExtType = LD->getExtensionType();
    1109          44 :   bool InsertTo64 = false;
    1110          88 :   if (VT == MVT::i64)
    1111           3 :     Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
    1112          82 :   else if (VT == MVT::i32) {
    1113           5 :     if (ExtType == ISD::NON_EXTLOAD)
    1114           4 :       Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
    1115           1 :     else if (ExtType == ISD::SEXTLOAD)
    1116           1 :       Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
    1117             :     else {
    1118           0 :       Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
    1119           0 :       InsertTo64 = true;
    1120             :       // The result of the load is only i32. It's the subreg_to_reg that makes
    1121             :       // it into an i64.
    1122           0 :       DstVT = MVT::i32;
    1123             :     }
    1124          72 :   } else if (VT == MVT::i16) {
    1125           4 :     if (ExtType == ISD::SEXTLOAD) {
    1126           4 :       if (DstVT == MVT::i64)
    1127           1 :         Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
    1128             :       else
    1129           1 :         Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
    1130             :     } else {
    1131           2 :       Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
    1132           4 :       InsertTo64 = DstVT == MVT::i64;
    1133             :       // The result of the load is only i32. It's the subreg_to_reg that makes
    1134             :       // it into an i64.
    1135           2 :       DstVT = MVT::i32;
    1136             :     }
    1137          64 :   } else if (VT == MVT::i8) {
    1138           5 :     if (ExtType == ISD::SEXTLOAD) {
    1139           4 :       if (DstVT == MVT::i64)
    1140           1 :         Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
    1141             :       else
    1142           1 :         Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
    1143             :     } else {
    1144           3 :       Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
    1145           6 :       InsertTo64 = DstVT == MVT::i64;
    1146             :       // The result of the load is only i32. It's the subreg_to_reg that makes
    1147             :       // it into an i64.
    1148           3 :       DstVT = MVT::i32;
    1149             :     }
    1150          54 :   } else if (VT == MVT::f16) {
    1151           1 :     Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
    1152          52 :   } else if (VT == MVT::f32) {
    1153           1 :     Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
    1154          48 :   } else if (VT == MVT::f64 || VT.is64BitVector()) {
    1155          12 :     Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
    1156          13 :   } else if (VT.is128BitVector()) {
    1157          13 :     Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
    1158             :   } else
    1159             :     return false;
    1160          88 :   SDValue Chain = LD->getChain();
    1161          44 :   SDValue Base = LD->getBasePtr();
    1162          88 :   ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
    1163          44 :   int OffsetVal = (int)OffsetOp->getZExtValue();
    1164          44 :   SDLoc dl(N);
    1165         132 :   SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
    1166          44 :   SDValue Ops[] = { Base, Offset, Chain };
    1167          88 :   SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
    1168         132 :                                        MVT::Other, Ops);
    1169             :   // Either way, we're replacing the node, so tell the caller that.
    1170          44 :   SDValue LoadedVal = SDValue(Res, 1);
    1171          44 :   if (InsertTo64) {
    1172           6 :     SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
    1173           2 :     LoadedVal =
    1174           8 :         SDValue(CurDAG->getMachineNode(
    1175             :                     AArch64::SUBREG_TO_REG, dl, MVT::i64,
    1176           4 :                     CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
    1177           2 :                     SubReg),
    1178             :                 0);
    1179             :   }
    1180             : 
    1181          44 :   ReplaceUses(SDValue(N, 0), LoadedVal);
    1182          44 :   ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
    1183          44 :   ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
    1184          44 :   CurDAG->RemoveDeadNode(N);
    1185          44 :   return true;
    1186             : }
    1187             : 
    1188         120 : void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
    1189             :                                      unsigned SubRegIdx) {
    1190         240 :   SDLoc dl(N);
    1191         240 :   EVT VT = N->getValueType(0);
    1192         240 :   SDValue Chain = N->getOperand(0);
    1193             : 
    1194         240 :   SDValue Ops[] = {N->getOperand(2), // Mem operand;
    1195         120 :                    Chain};
    1196             : 
    1197         120 :   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
    1198             : 
    1199         360 :   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
    1200         120 :   SDValue SuperReg = SDValue(Ld, 0);
    1201         475 :   for (unsigned i = 0; i < NumVecs; ++i)
    1202         710 :     ReplaceUses(SDValue(N, i),
    1203         355 :         CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
    1204             : 
    1205         120 :   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
    1206             : 
    1207             :   // Transfer memoperands.
    1208         120 :   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
    1209         120 :   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
    1210         240 :   cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
    1211             : 
    1212         120 :   CurDAG->RemoveDeadNode(N);
    1213         120 : }
    1214             : 
    1215         236 : void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
    1216             :                                          unsigned Opc, unsigned SubRegIdx) {
    1217         472 :   SDLoc dl(N);
    1218         472 :   EVT VT = N->getValueType(0);
    1219         472 :   SDValue Chain = N->getOperand(0);
    1220             : 
    1221         472 :   SDValue Ops[] = {N->getOperand(1), // Mem operand
    1222         472 :                    N->getOperand(2), // Incremental
    1223         472 :                    Chain};
    1224             : 
    1225         236 :   const EVT ResTys[] = {MVT::i64, // Type of the write back register
    1226             :                         MVT::Untyped, MVT::Other};
    1227             : 
    1228         708 :   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
    1229             : 
    1230             :   // Update uses of write back register
    1231         472 :   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
    1232             : 
    1233             :   // Update uses of vector list
    1234         236 :   SDValue SuperReg = SDValue(Ld, 1);
    1235         236 :   if (NumVecs == 1)
    1236          20 :     ReplaceUses(SDValue(N, 0), SuperReg);
    1237             :   else
    1238        1512 :     for (unsigned i = 0; i < NumVecs; ++i)
    1239        1296 :       ReplaceUses(SDValue(N, i),
    1240         648 :           CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
    1241             : 
    1242             :   // Update the chain
    1243         236 :   ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
    1244         236 :   CurDAG->RemoveDeadNode(N);
    1245         236 : }
    1246             : 
    1247          89 : void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
    1248             :                                       unsigned Opc) {
    1249         178 :   SDLoc dl(N);
    1250         267 :   EVT VT = N->getOperand(2)->getValueType(0);
    1251             : 
    1252             :   // Form a REG_SEQUENCE to force register allocation.
    1253          89 :   bool Is128Bit = VT.getSizeInBits() == 128;
    1254         267 :   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
    1255         267 :   SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
    1256             : 
    1257         267 :   SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
    1258         267 :   SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
    1259             : 
    1260             :   // Transfer memoperands.
    1261          89 :   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
    1262          89 :   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
    1263         178 :   cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
    1264             : 
    1265         178 :   ReplaceNode(N, St);
    1266          89 : }
    1267             : 
    1268         144 : void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
    1269             :                                           unsigned Opc) {
    1270         288 :   SDLoc dl(N);
    1271         432 :   EVT VT = N->getOperand(2)->getValueType(0);
    1272         144 :   const EVT ResTys[] = {MVT::i64,    // Type of the write back register
    1273             :                         MVT::Other}; // Type for the Chain
    1274             : 
    1275             :   // Form a REG_SEQUENCE to force register allocation.
    1276         144 :   bool Is128Bit = VT.getSizeInBits() == 128;
    1277         432 :   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
    1278         432 :   SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
    1279             : 
    1280             :   SDValue Ops[] = {RegSeq,
    1281         288 :                    N->getOperand(NumVecs + 1), // base register
    1282         288 :                    N->getOperand(NumVecs + 2), // Incremental
    1283         576 :                    N->getOperand(0)};          // Chain
    1284         432 :   SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
    1285             : 
    1286         288 :   ReplaceNode(N, St);
    1287         144 : }
    1288             : 
    1289             : namespace {
    1290             : /// WidenVector - Given a value in the V64 register class, produce the
    1291             : /// equivalent value in the V128 register class.
    1292             : class WidenVector {
    1293             :   SelectionDAG &DAG;
    1294             : 
    1295             : public:
    1296          85 :   WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
    1297             : 
    1298         250 :   SDValue operator()(SDValue V64Reg) {
    1299         500 :     EVT VT = V64Reg.getValueType();
    1300         250 :     unsigned NarrowSize = VT.getVectorNumElements();
    1301         250 :     MVT EltTy = VT.getVectorElementType().getSimpleVT();
    1302         250 :     MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
    1303         500 :     SDLoc DL(V64Reg);
    1304             : 
    1305             :     SDValue Undef =
    1306         750 :         SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
    1307         750 :     return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
    1308             :   }
    1309             : };
    1310             : } // namespace
    1311             : 
    1312             : /// NarrowVector - Given a value in the V128 register class, produce the
    1313             : /// equivalent value in the V64 register class.
    1314         122 : static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
    1315         244 :   EVT VT = V128Reg.getValueType();
    1316         122 :   unsigned WideSize = VT.getVectorNumElements();
    1317         122 :   MVT EltTy = VT.getVectorElementType().getSimpleVT();
    1318         122 :   MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
    1319             : 
    1320         244 :   return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
    1321         366 :                                     V128Reg);
    1322             : }
    1323             : 
    1324          26 : void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
    1325             :                                          unsigned Opc) {
    1326          52 :   SDLoc dl(N);
    1327          52 :   EVT VT = N->getValueType(0);
    1328          26 :   bool Narrow = VT.getSizeInBits() == 64;
    1329             : 
    1330             :   // Form a REG_SEQUENCE to force register allocation.
    1331          78 :   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
    1332             : 
    1333          26 :   if (Narrow)
    1334          10 :     transform(Regs, Regs.begin(),
    1335           5 :                    WidenVector(*CurDAG));
    1336             : 
    1337          52 :   SDValue RegSeq = createQTuple(Regs);
    1338             : 
    1339          26 :   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
    1340             : 
    1341             :   unsigned LaneNo =
    1342         104 :       cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
    1343             : 
    1344          52 :   SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
    1345         104 :                    N->getOperand(NumVecs + 3), N->getOperand(0)};
    1346          78 :   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
    1347          26 :   SDValue SuperReg = SDValue(Ld, 0);
    1348             : 
    1349          78 :   EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
    1350             :   static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
    1351             :                                     AArch64::qsub2, AArch64::qsub3 };
    1352         102 :   for (unsigned i = 0; i < NumVecs; ++i) {
    1353          76 :     SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
    1354          76 :     if (Narrow)
    1355          13 :       NV = NarrowVector(NV, *CurDAG);
    1356          76 :     ReplaceUses(SDValue(N, i), NV);
    1357             :   }
    1358             : 
    1359          26 :   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
    1360          26 :   CurDAG->RemoveDeadNode(N);
    1361          26 : }
    1362             : 
    1363          94 : void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
    1364             :                                              unsigned Opc) {
    1365         188 :   SDLoc dl(N);
    1366         188 :   EVT VT = N->getValueType(0);
    1367          94 :   bool Narrow = VT.getSizeInBits() == 64;
    1368             : 
    1369             :   // Form a REG_SEQUENCE to force register allocation.
    1370         282 :   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
    1371             : 
    1372          94 :   if (Narrow)
    1373          74 :     transform(Regs, Regs.begin(),
    1374          37 :                    WidenVector(*CurDAG));
    1375             : 
    1376         188 :   SDValue RegSeq = createQTuple(Regs);
    1377             : 
    1378             :   const EVT ResTys[] = {MVT::i64, // Type of the write back register
    1379         188 :                         RegSeq->getValueType(0), MVT::Other};
    1380             : 
    1381             :   unsigned LaneNo =
    1382         376 :       cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
    1383             : 
    1384             :   SDValue Ops[] = {RegSeq,
    1385          94 :                    CurDAG->getTargetConstant(LaneNo, dl,
    1386          94 :                                              MVT::i64),         // Lane Number
    1387         188 :                    N->getOperand(NumVecs + 2),                  // Base register
    1388         188 :                    N->getOperand(NumVecs + 3),                  // Incremental
    1389         470 :                    N->getOperand(0)};
    1390         282 :   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
    1391             : 
    1392             :   // Update uses of the write back register
    1393         188 :   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
    1394             : 
    1395             :   // Update uses of the vector list
    1396          94 :   SDValue SuperReg = SDValue(Ld, 1);
    1397          94 :   if (NumVecs == 1) {
    1398          23 :     ReplaceUses(SDValue(N, 0),
    1399           1 :                 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
    1400             :   } else {
    1401         216 :     EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
    1402             :     static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
    1403             :                                       AArch64::qsub2, AArch64::qsub3 };
    1404         288 :     for (unsigned i = 0; i < NumVecs; ++i) {
    1405         216 :       SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
    1406         216 :                                                   SuperReg);
    1407         216 :       if (Narrow)
    1408         108 :         NV = NarrowVector(NV, *CurDAG);
    1409         216 :       ReplaceUses(SDValue(N, i), NV);
    1410             :     }
    1411             :   }
    1412             : 
    1413             :   // Update the Chain
    1414          94 :   ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
    1415          94 :   CurDAG->RemoveDeadNode(N);
    1416          94 : }
    1417             : 
    1418          23 : void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
    1419             :                                           unsigned Opc) {
    1420          46 :   SDLoc dl(N);
    1421          69 :   EVT VT = N->getOperand(2)->getValueType(0);
    1422          23 :   bool Narrow = VT.getSizeInBits() == 64;
    1423             : 
    1424             :   // Form a REG_SEQUENCE to force register allocation.
    1425          69 :   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
    1426             : 
    1427          23 :   if (Narrow)
    1428          14 :     transform(Regs, Regs.begin(),
    1429           7 :                    WidenVector(*CurDAG));
    1430             : 
    1431          46 :   SDValue RegSeq = createQTuple(Regs);
    1432             : 
    1433             :   unsigned LaneNo =
    1434          92 :       cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
    1435             : 
    1436          46 :   SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
    1437          92 :                    N->getOperand(NumVecs + 3), N->getOperand(0)};
    1438          69 :   SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
    1439             : 
    1440             :   // Transfer memoperands.
    1441          23 :   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
    1442          23 :   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
    1443          46 :   cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
    1444             : 
    1445          46 :   ReplaceNode(N, St);
    1446          23 : }
    1447             : 
    1448          72 : void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
    1449             :                                               unsigned Opc) {
    1450         144 :   SDLoc dl(N);
    1451         216 :   EVT VT = N->getOperand(2)->getValueType(0);
    1452          72 :   bool Narrow = VT.getSizeInBits() == 64;
    1453             : 
    1454             :   // Form a REG_SEQUENCE to force register allocation.
    1455         216 :   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
    1456             : 
    1457          72 :   if (Narrow)
    1458          72 :     transform(Regs, Regs.begin(),
    1459          36 :                    WidenVector(*CurDAG));
    1460             : 
    1461         144 :   SDValue RegSeq = createQTuple(Regs);
    1462             : 
    1463          72 :   const EVT ResTys[] = {MVT::i64, // Type of the write back register
    1464             :                         MVT::Other};
    1465             : 
    1466             :   unsigned LaneNo =
    1467         288 :       cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
    1468             : 
    1469         144 :   SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
    1470         144 :                    N->getOperand(NumVecs + 2), // Base Register
    1471         144 :                    N->getOperand(NumVecs + 3), // Incremental
    1472         360 :                    N->getOperand(0)};
    1473         216 :   SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
    1474             : 
    1475             :   // Transfer memoperands.
    1476          72 :   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
    1477          72 :   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
    1478         144 :   cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
    1479             : 
    1480         144 :   ReplaceNode(N, St);
    1481          72 : }
    1482             : 
    1483         651 : static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
    1484             :                                        unsigned &Opc, SDValue &Opd0,
    1485             :                                        unsigned &LSB, unsigned &MSB,
    1486             :                                        unsigned NumberOfIgnoredLowBits,
    1487             :                                        bool BiggerPattern) {
    1488             :   assert(N->getOpcode() == ISD::AND &&
    1489             :          "N must be a AND operation to call this function");
    1490             : 
    1491        1302 :   EVT VT = N->getValueType(0);
    1492             : 
    1493             :   // Here we can test the type of VT and return false when the type does not
    1494             :   // match, but since it is done prior to that call in the current context
    1495             :   // we turned that into an assert to avoid redundant code.
    1496             :   assert((VT == MVT::i32 || VT == MVT::i64) &&
    1497             :          "Type checking must have been done before calling this function");
    1498             : 
    1499             :   // FIXME: simplify-demanded-bits in DAGCombine will probably have
    1500             :   // changed the AND node to a 32-bit mask operation. We'll have to
    1501             :   // undo that as part of the transform here if we want to catch all
    1502             :   // the opportunities.
    1503             :   // Currently the NumberOfIgnoredLowBits argument helps to recover
    1504             :   // form these situations when matching bigger pattern (bitfield insert).
    1505             : 
    1506             :   // For unsigned extracts, check for a shift right and mask
    1507         651 :   uint64_t AndImm = 0;
    1508         651 :   if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
    1509             :     return false;
    1510             : 
    1511        1178 :   const SDNode *Op0 = N->getOperand(0).getNode();
    1512             : 
    1513             :   // Because of simplify-demanded-bits in DAGCombine, the mask may have been
    1514             :   // simplified. Try to undo that
    1515         589 :   AndImm |= (1 << NumberOfIgnoredLowBits) - 1;
    1516             : 
    1517             :   // The immediate is a mask of the low bits iff imm & (imm+1) == 0
    1518         589 :   if (AndImm & (AndImm + 1))
    1519             :     return false;
    1520             : 
    1521         386 :   bool ClampMSB = false;
    1522         386 :   uint64_t SrlImm = 0;
    1523             :   // Handle the SRL + ANY_EXTEND case.
    1524         445 :   if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
    1525          44 :       isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
    1526             :     // Extend the incoming operand of the SRL to 64-bit.
    1527          30 :     Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
    1528             :     // Make sure to clamp the MSB so that we preserve the semantics of the
    1529             :     // original operations.
    1530          10 :     ClampMSB = true;
    1531         703 :   } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
    1532          11 :              isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
    1533             :                                    SrlImm)) {
    1534             :     // If the shift result was truncated, we can still combine them.
    1535           9 :     Opd0 = Op0->getOperand(0).getOperand(0);
    1536             : 
    1537             :     // Use the type of SRL node.
    1538           6 :     VT = Opd0->getValueType(0);
    1539          25 :   } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
    1540          50 :     Opd0 = Op0->getOperand(0);
    1541         348 :   } else if (BiggerPattern) {
    1542             :     // Let's pretend a 0 shift right has been performed.
    1543             :     // The resulting code will be at least as good as the original one
    1544             :     // plus it may expose more opportunities for bitfield insert pattern.
    1545             :     // FIXME: Currently we limit this to the bigger pattern, because
    1546             :     // some optimizations expect AND and not UBFM.
    1547          12 :     Opd0 = N->getOperand(0);
    1548             :   } else
    1549             :     return false;
    1550             : 
    1551             :   // Bail out on large immediates. This happens when no proper
    1552             :   // combining/constant folding was performed.
    1553          44 :   if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
    1554             :     DEBUG((dbgs() << N
    1555             :            << ": Found large shift immediate, this should not happen\n"));
    1556             :     return false;
    1557             :   }
    1558             : 
    1559          44 :   LSB = SrlImm;
    1560         148 :   MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
    1561         100 :                                  : countTrailingOnes<uint64_t>(AndImm)) -
    1562             :         1;
    1563          44 :   if (ClampMSB)
    1564             :     // Since we're moving the extend before the right shift operation, we need
    1565             :     // to clamp the MSB to make sure we don't shift in undefined bits instead of
    1566             :     // the zeros which would get shifted in with the original right shift
    1567             :     // operation.
    1568          10 :     MSB = MSB > 31 ? 31 : MSB;
    1569             : 
    1570          88 :   Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
    1571          44 :   return true;
    1572             : }
    1573             : 
    1574         239 : static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
    1575             :                                              SDValue &Opd0, unsigned &Immr,
    1576             :                                              unsigned &Imms) {
    1577             :   assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
    1578             : 
    1579         478 :   EVT VT = N->getValueType(0);
    1580         239 :   unsigned BitWidth = VT.getSizeInBits();
    1581             :   assert((VT == MVT::i32 || VT == MVT::i64) &&
    1582             :          "Type checking must have been done before calling this function");
    1583             : 
    1584         478 :   SDValue Op = N->getOperand(0);
    1585         239 :   if (Op->getOpcode() == ISD::TRUNCATE) {
    1586          22 :     Op = Op->getOperand(0);
    1587          22 :     VT = Op->getValueType(0);
    1588          11 :     BitWidth = VT.getSizeInBits();
    1589             :   }
    1590             : 
    1591             :   uint64_t ShiftImm;
    1592         710 :   if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
    1593         464 :       !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
    1594             :     return false;
    1595             : 
    1596          21 :   unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
    1597           7 :   if (ShiftImm + Width > BitWidth)
    1598             :     return false;
    1599             : 
    1600          14 :   Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
    1601          14 :   Opd0 = Op.getOperand(0);
    1602           7 :   Immr = ShiftImm;
    1603           7 :   Imms = ShiftImm + Width - 1;
    1604           7 :   return true;
    1605             : }
    1606             : 
    1607         210 : static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
    1608             :                                           SDValue &Opd0, unsigned &LSB,
    1609             :                                           unsigned &MSB) {
    1610             :   // We are looking for the following pattern which basically extracts several
    1611             :   // continuous bits from the source value and places it from the LSB of the
    1612             :   // destination value, all other bits of the destination value or set to zero:
    1613             :   //
    1614             :   // Value2 = AND Value, MaskImm
    1615             :   // SRL Value2, ShiftImm
    1616             :   //
    1617             :   // with MaskImm >> ShiftImm to search for the bit width.
    1618             :   //
    1619             :   // This gets selected into a single UBFM:
    1620             :   //
    1621             :   // UBFM Value, ShiftImm, BitWide + SrlImm -1
    1622             :   //
    1623             : 
    1624         210 :   if (N->getOpcode() != ISD::SRL)
    1625             :     return false;
    1626             : 
    1627         140 :   uint64_t AndMask = 0;
    1628         420 :   if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
    1629             :     return false;
    1630             : 
    1631          27 :   Opd0 = N->getOperand(0).getOperand(0);
    1632             : 
    1633           9 :   uint64_t SrlImm = 0;
    1634          27 :   if (!isIntImmediate(N->getOperand(1), SrlImm))
    1635             :     return false;
    1636             : 
    1637             :   // Check whether we really have several bits extract here.
    1638          12 :   unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
    1639          11 :   if (BitWide && isMask_64(AndMask >> SrlImm)) {
    1640          15 :     if (N->getValueType(0) == MVT::i32)
    1641           5 :       Opc = AArch64::UBFMWri;
    1642             :     else
    1643           0 :       Opc = AArch64::UBFMXri;
    1644             : 
    1645           5 :     LSB = SrlImm;
    1646           5 :     MSB = BitWide + SrlImm - 1;
    1647           5 :     return true;
    1648             :   }
    1649             : 
    1650             :   return false;
    1651             : }
    1652             : 
    1653         210 : static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
    1654             :                                        unsigned &Immr, unsigned &Imms,
    1655             :                                        bool BiggerPattern) {
    1656             :   assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
    1657             :          "N must be a SHR/SRA operation to call this function");
    1658             : 
    1659         420 :   EVT VT = N->getValueType(0);
    1660             : 
    1661             :   // Here we can test the type of VT and return false when the type does not
    1662             :   // match, but since it is done prior to that call in the current context
    1663             :   // we turned that into an assert to avoid redundant code.
    1664             :   assert((VT == MVT::i32 || VT == MVT::i64) &&
    1665             :          "Type checking must have been done before calling this function");
    1666             : 
    1667             :   // Check for AND + SRL doing several bits extract.
    1668         210 :   if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
    1669             :     return true;
    1670             : 
    1671             :   // We're looking for a shift of a shift.
    1672         205 :   uint64_t ShlImm = 0;
    1673         205 :   uint64_t TruncBits = 0;
    1674         424 :   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
    1675          42 :     Opd0 = N->getOperand(0).getOperand(0);
    1676         289 :   } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
    1677          74 :              N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
    1678             :     // We are looking for a shift of truncate. Truncate from i64 to i32 could
    1679             :     // be considered as setting high 32 bits as zero. Our strategy here is to
    1680             :     // always generate 64bit UBFM. This consistency will help the CSE pass
    1681             :     // later find more redundancy.
    1682           3 :     Opd0 = N->getOperand(0).getOperand(0);
    1683           2 :     TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
    1684           2 :     VT = Opd0->getValueType(0);
    1685             :     assert(VT == MVT::i64 && "the promoted type should be i64");
    1686         190 :   } else if (BiggerPattern) {
    1687             :     // Let's pretend a 0 shift left has been performed.
    1688             :     // FIXME: Currently we limit this to the bigger pattern case,
    1689             :     // because some optimizations expect AND and not UBFM
    1690          10 :     Opd0 = N->getOperand(0);
    1691             :   } else
    1692             :     return false;
    1693             : 
    1694             :   // Missing combines/constant folding may have left us with strange
    1695             :   // constants.
    1696          20 :   if (ShlImm >= VT.getSizeInBits()) {
    1697             :     DEBUG((dbgs() << N
    1698             :            << ": Found large shift immediate, this should not happen\n"));
    1699             :     return false;
    1700             :   }
    1701             : 
    1702          20 :   uint64_t SrlImm = 0;
    1703          60 :   if (!isIntImmediate(N->getOperand(1), SrlImm))
    1704             :     return false;
    1705             : 
    1706             :   assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
    1707             :          "bad amount in shift node!");
    1708          17 :   int immr = SrlImm - ShlImm;
    1709          17 :   Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
    1710          17 :   Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
    1711             :   // SRA requires a signed extraction
    1712          34 :   if (VT == MVT::i32)
    1713           9 :     Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
    1714             :   else
    1715          16 :     Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
    1716             :   return true;
    1717             : }
    1718             : 
    1719          83 : bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
    1720             :   assert(N->getOpcode() == ISD::SIGN_EXTEND);
    1721             : 
    1722         166 :   EVT VT = N->getValueType(0);
    1723         249 :   EVT NarrowVT = N->getOperand(0)->getValueType(0);
    1724         113 :   if (VT != MVT::i64 || NarrowVT != MVT::i32)
    1725             :     return false;
    1726             : 
    1727             :   uint64_t ShiftImm;
    1728          60 :   SDValue Op = N->getOperand(0);
    1729          60 :   if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
    1730             :     return false;
    1731             : 
    1732           1 :   SDLoc dl(N);
    1733             :   // Extend the incoming operand of the shift to 64-bits.
    1734           2 :   SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
    1735           1 :   unsigned Immr = ShiftImm;
    1736           1 :   unsigned Imms = NarrowVT.getSizeInBits() - 1;
    1737           1 :   SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
    1738           3 :                    CurDAG->getTargetConstant(Imms, dl, VT)};
    1739           2 :   CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
    1740           1 :   return true;
    1741             : }
    1742             : 
    1743        1544 : static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
    1744             :                                 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
    1745             :                                 unsigned NumberOfIgnoredLowBits = 0,
    1746             :                                 bool BiggerPattern = false) {
    1747        3676 :   if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
    1748             :     return false;
    1749             : 
    1750        2970 :   switch (N->getOpcode()) {
    1751         385 :   default:
    1752         385 :     if (!N->isMachineOpcode())
    1753             :       return false;
    1754             :     break;
    1755         651 :   case ISD::AND:
    1756         651 :     return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
    1757         651 :                                       NumberOfIgnoredLowBits, BiggerPattern);
    1758         210 :   case ISD::SRL:
    1759             :   case ISD::SRA:
    1760         210 :     return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
    1761             : 
    1762         239 :   case ISD::SIGN_EXTEND_INREG:
    1763         239 :     return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
    1764             :   }
    1765             : 
    1766          12 :   unsigned NOpc = N->getMachineOpcode();
    1767           6 :   switch (NOpc) {
    1768             :   default:
    1769             :     return false;
    1770           0 :   case AArch64::SBFMWri:
    1771             :   case AArch64::UBFMWri:
    1772             :   case AArch64::SBFMXri:
    1773             :   case AArch64::UBFMXri:
    1774           0 :     Opc = NOpc;
    1775           0 :     Opd0 = N->getOperand(0);
    1776           0 :     Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
    1777           0 :     Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
    1778           0 :     return true;
    1779             :   }
    1780             :   // Unreachable
    1781             :   return false;
    1782             : }
    1783             : 
    1784        1010 : bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
    1785             :   unsigned Opc, Immr, Imms;
    1786        1010 :   SDValue Opd0;
    1787        1010 :   if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
    1788             :     return false;
    1789             : 
    1790          90 :   EVT VT = N->getValueType(0);
    1791          45 :   SDLoc dl(N);
    1792             : 
    1793             :   // If the bit extract operation is 64bit but the original type is 32bit, we
    1794             :   // need to add one EXTRACT_SUBREG.
    1795          72 :   if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
    1796          10 :     SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
    1797          20 :                        CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
    1798             : 
    1799          15 :     SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
    1800          15 :     SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
    1801          20 :     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
    1802           5 :                                           MVT::i32, SDValue(BFM, 0), SubReg));
    1803             :     return true;
    1804             :   }
    1805             : 
    1806          40 :   SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
    1807         120 :                    CurDAG->getTargetConstant(Imms, dl, VT)};
    1808          80 :   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
    1809          40 :   return true;
    1810             : }
    1811             : 
    1812             : /// Does DstMask form a complementary pair with the mask provided by
    1813             : /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
    1814             : /// this asks whether DstMask zeroes precisely those bits that will be set by
    1815             : /// the other half.
    1816          33 : static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
    1817             :                               unsigned NumberOfIgnoredHighBits, EVT VT) {
    1818             :   assert((VT == MVT::i32 || VT == MVT::i64) &&
    1819             :          "i32 or i64 mask type expected!");
    1820          33 :   unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
    1821             : 
    1822          66 :   APInt SignificantDstMask = APInt(BitWidth, DstMask);
    1823          66 :   APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
    1824             : 
    1825         231 :   return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
    1826         231 :          (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
    1827             : }
    1828             : 
    1829             : // Look for bits that will be useful for later uses.
    1830             : // A bit is consider useless as soon as it is dropped and never used
    1831             : // before it as been dropped.
    1832             : // E.g., looking for useful bit of x
    1833             : // 1. y = x & 0x7
    1834             : // 2. z = y >> 2
    1835             : // After #1, x useful bits are 0x7, then the useful bits of x, live through
    1836             : // y.
    1837             : // After #2, the useful bits of x are 0x4.
    1838             : // However, if x is used on an unpredicatable instruction, then all its bits
    1839             : // are useful.
    1840             : // E.g.
    1841             : // 1. y = x & 0x7
    1842             : // 2. z = y >> 2
    1843             : // 3. str x, [@x]
    1844             : static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
    1845             : 
    1846           7 : static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
    1847             :                                               unsigned Depth) {
    1848             :   uint64_t Imm =
    1849          28 :       cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
    1850           7 :   Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
    1851          28 :   UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
    1852           7 :   getUsefulBits(Op, UsefulBits, Depth + 1);
    1853           7 : }
    1854             : 
    1855          20 : static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
    1856             :                                              uint64_t Imm, uint64_t MSB,
    1857             :                                              unsigned Depth) {
    1858             :   // inherit the bitwidth value
    1859          40 :   APInt OpUsefulBits(UsefulBits);
    1860          20 :   OpUsefulBits = 1;
    1861             : 
    1862          20 :   if (MSB >= Imm) {
    1863           8 :     OpUsefulBits <<= MSB - Imm + 1;
    1864           8 :     --OpUsefulBits;
    1865             :     // The interesting part will be in the lower part of the result
    1866           8 :     getUsefulBits(Op, OpUsefulBits, Depth + 1);
    1867             :     // The interesting part was starting at Imm in the argument
    1868           8 :     OpUsefulBits <<= Imm;
    1869             :   } else {
    1870          12 :     OpUsefulBits <<= MSB + 1;
    1871          12 :     --OpUsefulBits;
    1872             :     // The interesting part will be shifted in the result
    1873          12 :     OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
    1874          12 :     getUsefulBits(Op, OpUsefulBits, Depth + 1);
    1875             :     // The interesting part was at zero in the argument
    1876          12 :     OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
    1877             :   }
    1878             : 
    1879          20 :   UsefulBits &= OpUsefulBits;
    1880          20 : }
    1881             : 
    1882          20 : static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
    1883             :                                   unsigned Depth) {
    1884             :   uint64_t Imm =
    1885          80 :       cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
    1886             :   uint64_t MSB =
    1887          80 :       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
    1888             : 
    1889          20 :   getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
    1890          20 : }
    1891             : 
    1892           0 : static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
    1893             :                                               unsigned Depth) {
    1894             :   uint64_t ShiftTypeAndValue =
    1895           0 :       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
    1896           0 :   APInt Mask(UsefulBits);
    1897           0 :   Mask.clearAllBits();
    1898           0 :   Mask.flipAllBits();
    1899             : 
    1900           0 :   if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
    1901             :     // Shift Left
    1902           0 :     uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
    1903           0 :     Mask <<= ShiftAmt;
    1904           0 :     getUsefulBits(Op, Mask, Depth + 1);
    1905           0 :     Mask.lshrInPlace(ShiftAmt);
    1906           0 :   } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
    1907             :     // Shift Right
    1908             :     // We do not handle AArch64_AM::ASR, because the sign will change the
    1909             :     // number of useful bits
    1910           0 :     uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
    1911           0 :     Mask.lshrInPlace(ShiftAmt);
    1912           0 :     getUsefulBits(Op, Mask, Depth + 1);
    1913           0 :     Mask <<= ShiftAmt;
    1914             :   } else
    1915           0 :     return;
    1916             : 
    1917           0 :   UsefulBits &= Mask;
    1918             : }
    1919             : 
    1920          15 : static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
    1921             :                                  unsigned Depth) {
    1922             :   uint64_t Imm =
    1923          60 :       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
    1924             :   uint64_t MSB =
    1925          60 :       cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
    1926             : 
    1927          30 :   APInt OpUsefulBits(UsefulBits);
    1928          15 :   OpUsefulBits = 1;
    1929             : 
    1930          45 :   APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
    1931          15 :   ResultUsefulBits.flipAllBits();
    1932          45 :   APInt Mask(UsefulBits.getBitWidth(), 0);
    1933             : 
    1934          15 :   getUsefulBits(Op, ResultUsefulBits, Depth + 1);
    1935             : 
    1936          15 :   if (MSB >= Imm) {
    1937             :     // The instruction is a BFXIL.
    1938           2 :     uint64_t Width = MSB - Imm + 1;
    1939           2 :     uint64_t LSB = Imm;
    1940             : 
    1941           2 :     OpUsefulBits <<= Width;
    1942           2 :     --OpUsefulBits;
    1943             : 
    1944           4 :     if (Op.getOperand(1) == Orig) {
    1945             :       // Copy the low bits from the result to bits starting from LSB.
    1946           0 :       Mask = ResultUsefulBits & OpUsefulBits;
    1947           0 :       Mask <<= LSB;
    1948             :     }
    1949             : 
    1950           6 :     if (Op.getOperand(0) == Orig)
    1951             :       // Bits starting from LSB in the input contribute to the result.
    1952          14 :       Mask |= (ResultUsefulBits & ~OpUsefulBits);
    1953             :   } else {
    1954             :     // The instruction is a BFI.
    1955          13 :     uint64_t Width = MSB + 1;
    1956          13 :     uint64_t LSB = UsefulBits.getBitWidth() - Imm;
    1957             : 
    1958          13 :     OpUsefulBits <<= Width;
    1959          13 :     --OpUsefulBits;
    1960          13 :     OpUsefulBits <<= LSB;
    1961             : 
    1962          36 :     if (Op.getOperand(1) == Orig) {
    1963             :       // Copy the bits from the result to the zero bits.
    1964          50 :       Mask = ResultUsefulBits & OpUsefulBits;
    1965          10 :       Mask.lshrInPlace(LSB);
    1966             :     }
    1967             : 
    1968          33 :     if (Op.getOperand(0) == Orig)
    1969          49 :       Mask |= (ResultUsefulBits & ~OpUsefulBits);
    1970             :   }
    1971             : 
    1972          15 :   UsefulBits &= Mask;
    1973          15 : }
    1974             : 
    1975         336 : static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
    1976             :                                 SDValue Orig, unsigned Depth) {
    1977             : 
    1978             :   // Users of this node should have already been instruction selected
    1979             :   // FIXME: Can we turn that into an assert?
    1980         336 :   if (!UserNode->isMachineOpcode())
    1981             :     return;
    1982             : 
    1983         352 :   switch (UserNode->getMachineOpcode()) {
    1984             :   default:
    1985             :     return;
    1986           7 :   case AArch64::ANDSWri:
    1987             :   case AArch64::ANDSXri:
    1988             :   case AArch64::ANDWri:
    1989             :   case AArch64::ANDXri:
    1990             :     // We increment Depth only when we call the getUsefulBits
    1991           7 :     return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
    1992           7 :                                              Depth);
    1993          20 :   case AArch64::UBFMWri:
    1994             :   case AArch64::UBFMXri:
    1995          20 :     return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
    1996             : 
    1997           6 :   case AArch64::ORRWrs:
    1998             :   case AArch64::ORRXrs:
    1999          12 :     if (UserNode->getOperand(1) != Orig)
    2000             :       return;
    2001           0 :     return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
    2002           0 :                                              Depth);
    2003          15 :   case AArch64::BFMWri:
    2004             :   case AArch64::BFMXri:
    2005          15 :     return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
    2006             : 
    2007           3 :   case AArch64::STRBBui:
    2008             :   case AArch64::STURBBi:
    2009           9 :     if (UserNode->getOperand(0) != Orig)
    2010             :       return;
    2011          12 :     UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
    2012           3 :     return;
    2013             : 
    2014           5 :   case AArch64::STRHHui:
    2015             :   case AArch64::STURHHi:
    2016          15 :     if (UserNode->getOperand(0) != Orig)
    2017             :       return;
    2018          20 :     UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
    2019           5 :     return;
    2020             :   }
    2021             : }
    2022             : 
    2023         328 : static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
    2024         328 :   if (Depth >= 6)
    2025           0 :     return;
    2026             :   // Initialize UsefulBits
    2027         328 :   if (!Depth) {
    2028         286 :     unsigned Bitwidth = Op.getScalarValueSizeInBits();
    2029             :     // At the beginning, assume every produced bits is useful
    2030         858 :     UsefulBits = APInt(Bitwidth, 0);
    2031         286 :     UsefulBits.flipAllBits();
    2032             :   }
    2033         984 :   APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
    2034             : 
    2035        1984 :   for (SDNode *Node : Op.getNode()->uses()) {
    2036             :     // A use cannot produce useful bits
    2037         672 :     APInt UsefulBitsForUse = APInt(UsefulBits);
    2038         336 :     getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
    2039         336 :     UsersUsefulBits |= UsefulBitsForUse;
    2040             :   }
    2041             :   // UsefulBits contains the produced bits that are meaningful for the
    2042             :   // current definition, thus a user cannot make a bit meaningful at
    2043             :   // this point
    2044         328 :   UsefulBits &= UsersUsefulBits;
    2045             : }
    2046             : 
    2047             : /// Create a machine node performing a notional SHL of Op by ShlAmount. If
    2048             : /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
    2049             : /// 0, return Op unchanged.
    2050          68 : static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
    2051          68 :   if (ShlAmount == 0)
    2052          67 :     return Op;
    2053             : 
    2054           2 :   EVT VT = Op.getValueType();
    2055           1 :   SDLoc dl(Op);
    2056           1 :   unsigned BitWidth = VT.getSizeInBits();
    2057           1 :   unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
    2058             : 
    2059             :   SDNode *ShiftNode;
    2060           1 :   if (ShlAmount > 0) {
    2061             :     // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
    2062           0 :     ShiftNode = CurDAG->getMachineNode(
    2063             :         UBFMOpc, dl, VT, Op,
    2064             :         CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
    2065           0 :         CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
    2066             :   } else {
    2067             :     // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
    2068             :     assert(ShlAmount < 0 && "expected right shift");
    2069           1 :     int ShrAmount = -ShlAmount;
    2070           3 :     ShiftNode = CurDAG->getMachineNode(
    2071             :         UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
    2072           1 :         CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
    2073             :   }
    2074             : 
    2075           1 :   return SDValue(ShiftNode, 0);
    2076             : }
    2077             : 
    2078             : /// Does this tree qualify as an attempt to move a bitfield into position,
    2079             : /// essentially "(and (shl VAL, N), Mask)".
    2080        1004 : static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
    2081             :                                     bool BiggerPattern,
    2082             :                                     SDValue &Src, int &ShiftAmount,
    2083             :                                     int &MaskWidth) {
    2084        2008 :   EVT VT = Op.getValueType();
    2085        1004 :   unsigned BitWidth = VT.getSizeInBits();
    2086             :   (void)BitWidth;
    2087             :   assert(BitWidth == 32 || BitWidth == 64);
    2088             : 
    2089        2008 :   KnownBits Known;
    2090        1004 :   CurDAG->computeKnownBits(Op, Known);
    2091             : 
    2092             :   // Non-zero in the sense that they're not provably zero, which is the key
    2093             :   // point if we want to use this value
    2094        5020 :   uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
    2095             : 
    2096             :   // Discard a constant AND mask if present. It's safe because the node will
    2097             :   // already have been factored into the computeKnownBits calculation above.
    2098             :   uint64_t AndImm;
    2099        1549 :   if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
    2100             :     assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
    2101        1090 :     Op = Op.getOperand(0);
    2102             :   }
    2103             : 
    2104             :   // Don't match if the SHL has more than one use, since then we'll end up
    2105             :   // generating SHL+UBFIZ instead of just keeping SHL+AND.
    2106        1795 :   if (!BiggerPattern && !Op.hasOneUse())
    2107             :     return false;
    2108             : 
    2109             :   uint64_t ShlImm;
    2110        1842 :   if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
    2111             :     return false;
    2112         150 :   Op = Op.getOperand(0);
    2113             : 
    2114          69 :   if (!isShiftedMask_64(NonZeroBits))
    2115             :     return false;
    2116             : 
    2117          69 :   ShiftAmount = countTrailingZeros(NonZeroBits);
    2118         138 :   MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
    2119             : 
    2120             :   // BFI encompasses sufficiently many nodes that it's worth inserting an extra
    2121             :   // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
    2122             :   // amount.  BiggerPattern is true when this pattern is being matched for BFI,
    2123             :   // BiggerPattern is false when this pattern is being matched for UBFIZ, in
    2124             :   // which case it is not profitable to insert an extra shift.
    2125          69 :   if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
    2126             :     return false;
    2127          68 :   Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
    2128             : 
    2129          68 :   return true;
    2130             : }
    2131             : 
    2132             : static bool isShiftedMask(uint64_t Mask, EVT VT) {
    2133             :   assert(VT == MVT::i32 || VT == MVT::i64);
    2134          32 :   if (VT == MVT::i32)
    2135          11 :     return isShiftedMask_32(Mask);
    2136             :   return isShiftedMask_64(Mask);
    2137             : }
    2138             : 
    2139             : // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
    2140             : // inserted only sets known zero bits.
    2141         218 : static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
    2142             :   assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
    2143             : 
    2144         436 :   EVT VT = N->getValueType(0);
    2145         385 :   if (VT != MVT::i32 && VT != MVT::i64)
    2146             :     return false;
    2147             : 
    2148         102 :   unsigned BitWidth = VT.getSizeInBits();
    2149             : 
    2150             :   uint64_t OrImm;
    2151         102 :   if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
    2152             :     return false;
    2153             : 
    2154             :   // Skip this transformation if the ORR immediate can be encoded in the ORR.
    2155             :   // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
    2156             :   // performance neutral.
    2157          68 :   if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
    2158             :     return false;
    2159             : 
    2160             :   uint64_t MaskImm;
    2161          18 :   SDValue And = N->getOperand(0);
    2162             :   // Must be a single use AND with an immediate operand.
    2163          18 :   if (!And.hasOneUse() ||
    2164          15 :       !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
    2165             :     return false;
    2166             : 
    2167             :   // Compute the Known Zero for the AND as this allows us to catch more general
    2168             :   // cases than just looking for AND with imm.
    2169          14 :   KnownBits Known;
    2170           7 :   CurDAG->computeKnownBits(And, Known);
    2171             : 
    2172             :   // Non-zero in the sense that they're not provably zero, which is the key
    2173             :   // point if we want to use this value.
    2174          35 :   uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
    2175             : 
    2176             :   // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
    2177           7 :   if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
    2178             :     return false;
    2179             : 
    2180             :   // The bits being inserted must only set those bits that are known to be zero.
    2181           7 :   if ((OrImm & NotKnownZero) != 0) {
    2182             :     // FIXME:  It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
    2183             :     // currently handle this case.
    2184             :     return false;
    2185             :   }
    2186             : 
    2187             :   // BFI/BFXIL dst, src, #lsb, #width.
    2188           7 :   int LSB = countTrailingOnes(NotKnownZero);
    2189          21 :   int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
    2190             : 
    2191             :   // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
    2192           7 :   unsigned ImmR = (BitWidth - LSB) % BitWidth;
    2193           7 :   unsigned ImmS = Width - 1;
    2194             : 
    2195             :   // If we're creating a BFI instruction avoid cases where we need more
    2196             :   // instructions to materialize the BFI constant as compared to the original
    2197             :   // ORR.  A BFXIL will use the same constant as the original ORR, so the code
    2198             :   // should be no worse in this case.
    2199           7 :   bool IsBFI = LSB != 0;
    2200           7 :   uint64_t BFIImm = OrImm >> LSB;
    2201          11 :   if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
    2202             :     // We have a BFI instruction and we know the constant can't be materialized
    2203             :     // with a ORR-immediate with the zero register.
    2204             :     unsigned OrChunks = 0, BFIChunks = 0;
    2205          28 :     for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
    2206          12 :       if (((OrImm >> Shift) & 0xFFFF) != 0)
    2207           6 :         ++OrChunks;
    2208          12 :       if (((BFIImm >> Shift) & 0xFFFF) != 0)
    2209           7 :         ++BFIChunks;
    2210             :     }
    2211           4 :     if (BFIChunks > OrChunks)
    2212             :       return false;
    2213             :   }
    2214             : 
    2215             :   // Materialize the constant to be inserted.
    2216           6 :   SDLoc DL(N);
    2217          12 :   unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
    2218           6 :   SDNode *MOVI = CurDAG->getMachineNode(
    2219           6 :       MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
    2220             : 
    2221             :   // Create the BFI/BFXIL instruction.
    2222          12 :   SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
    2223           6 :                    CurDAG->getTargetConstant(ImmR, DL, VT),
    2224          24 :                    CurDAG->getTargetConstant(ImmS, DL, VT)};
    2225          12 :   unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
    2226           6 :   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
    2227           6 :   return true;
    2228             : }
    2229             : 
    2230         285 : static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
    2231             :                                       SelectionDAG *CurDAG) {
    2232             :   assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
    2233             : 
    2234         570 :   EVT VT = N->getValueType(0);
    2235         474 :   if (VT != MVT::i32 && VT != MVT::i64)
    2236             :     return false;
    2237             : 
    2238         169 :   unsigned BitWidth = VT.getSizeInBits();
    2239             : 
    2240             :   // Because of simplify-demanded-bits in DAGCombine, involved masks may not
    2241             :   // have the expected shape. Try to undo that.
    2242             : 
    2243         169 :   unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
    2244         169 :   unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
    2245             : 
    2246             :   // Given a OR operation, check if we have the following pattern
    2247             :   // ubfm c, b, imm, imm2 (or something that does the same jobs, see
    2248             :   //                       isBitfieldExtractOp)
    2249             :   // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
    2250             :   //                 countTrailingZeros(mask2) == imm2 - imm + 1
    2251             :   // f = d | c
    2252             :   // if yes, replace the OR instruction with:
    2253             :   // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
    2254             : 
    2255             :   // OR is commutative, check all combinations of operand order and values of
    2256             :   // BiggerPattern, i.e.
    2257             :   //     Opd0, Opd1, BiggerPattern=false
    2258             :   //     Opd1, Opd0, BiggerPattern=false
    2259             :   //     Opd0, Opd1, BiggerPattern=true
    2260             :   //     Opd1, Opd0, BiggerPattern=true
    2261             :   // Several of these combinations may match, so check with BiggerPattern=false
    2262             :   // first since that will produce better results by matching more instructions
    2263             :   // and/or inserting fewer extra instructions.
    2264         640 :   for (int I = 0; I < 4; ++I) {
    2265             : 
    2266         534 :     SDValue Dst, Src;
    2267             :     unsigned ImmR, ImmS;
    2268         534 :     bool BiggerPattern = I / 2;
    2269        1068 :     SDValue OrOpd0Val = N->getOperand(I % 2);
    2270         534 :     SDNode *OrOpd0 = OrOpd0Val.getNode();
    2271        1068 :     SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
    2272         534 :     SDNode *OrOpd1 = OrOpd1Val.getNode();
    2273             : 
    2274             :     unsigned BFXOpc;
    2275             :     int DstLSB, Width;
    2276         534 :     if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
    2277             :                             NumberOfIgnoredLowBits, BiggerPattern)) {
    2278             :       // Check that the returned opcode is compatible with the pattern,
    2279             :       // i.e., same type and zero extended (U and not S)
    2280          74 :       if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
    2281          12 :           (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
    2282         473 :         continue;
    2283             : 
    2284             :       // Compute the width of the bitfield insertion
    2285          26 :       DstLSB = 0;
    2286          26 :       Width = ImmS - ImmR + 1;
    2287             :       // FIXME: This constraint is to catch bitfield insertion we may
    2288             :       // want to widen the pattern if we want to grab general bitfied
    2289             :       // move case
    2290          26 :       if (Width <= 0)
    2291           0 :         continue;
    2292             : 
    2293             :       // If the mask on the insertee is correct, we have a BFXIL operation. We
    2294             :       // can share the ImmR and ImmS values from the already-computed UBFM.
    2295         946 :     } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
    2296             :                                        BiggerPattern,
    2297             :                                        Src, DstLSB, Width)) {
    2298          66 :       ImmR = (BitWidth - DstLSB) % BitWidth;
    2299          66 :       ImmS = Width - 1;
    2300             :     } else
    2301         440 :       continue;
    2302             : 
    2303             :     // Check the second part of the pattern
    2304         184 :     EVT VT = OrOpd1->getValueType(0);
    2305             :     assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
    2306             : 
    2307             :     // Compute the Known Zero for the candidate of the first operand.
    2308             :     // This allows to catch more general case than just looking for
    2309             :     // AND with imm. Indeed, simplify-demanded-bits may have removed
    2310             :     // the AND instruction because it proves it was useless.
    2311          92 :     KnownBits Known;
    2312          92 :     CurDAG->computeKnownBits(OrOpd1Val, Known);
    2313             : 
    2314             :     // Check if there is enough room for the second operand to appear
    2315             :     // in the first one
    2316             :     APInt BitsToBeInserted =
    2317         184 :         APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
    2318             : 
    2319         644 :     if ((BitsToBeInserted & ~Known.Zero) != 0)
    2320          29 :       continue;
    2321             : 
    2322             :     // Set the first operand
    2323             :     uint64_t Imm;
    2324          66 :     if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
    2325          33 :         isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
    2326             :       // In that case, we can eliminate the AND
    2327          46 :       Dst = OrOpd1->getOperand(0);
    2328             :     else
    2329             :       // Maybe the AND has been removed by simplify-demanded-bits
    2330             :       // or is useful because it discards more bits
    2331          40 :       Dst = OrOpd1Val;
    2332             : 
    2333             :     // both parts match
    2334         126 :     SDLoc DL(N);
    2335          63 :     SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
    2336         189 :                      CurDAG->getTargetConstant(ImmS, DL, VT)};
    2337         126 :     unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
    2338          63 :     CurDAG->SelectNodeTo(N, Opc, VT, Ops);
    2339          63 :     return true;
    2340             :   }
    2341             : 
    2342             :   // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
    2343             :   // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
    2344             :   // mask (e.g., 0x000ffff0).
    2345             :   uint64_t Mask0Imm, Mask1Imm;
    2346         212 :   SDValue And0 = N->getOperand(0);
    2347         212 :   SDValue And1 = N->getOperand(1);
    2348         360 :   if (And0.hasOneUse() && And1.hasOneUse() &&
    2349          88 :       isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
    2350          33 :       isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
    2351         560 :       APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
    2352           5 :       (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
    2353             : 
    2354             :     // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
    2355             :     // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
    2356             :     // bits to be inserted.
    2357           4 :     if (isShiftedMask(Mask0Imm, VT)) {
    2358           3 :       std::swap(And0, And1);
    2359             :       std::swap(Mask0Imm, Mask1Imm);
    2360             :     }
    2361             : 
    2362           8 :     SDValue Src = And1->getOperand(0);
    2363           8 :     SDValue Dst = And0->getOperand(0);
    2364           8 :     unsigned LSB = countTrailingZeros(Mask1Imm);
    2365          16 :     int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
    2366             : 
    2367             :     // The BFXIL inserts the low-order bits from a source register, so right
    2368             :     // shift the needed bits into place.
    2369           8 :     SDLoc DL(N);
    2370           8 :     unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
    2371          12 :     SDNode *LSR = CurDAG->getMachineNode(
    2372             :         ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
    2373           8 :         CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
    2374             : 
    2375             :     // BFXIL is an alias of BFM, so translate to BFM operands.
    2376           4 :     unsigned ImmR = (BitWidth - LSB) % BitWidth;
    2377           4 :     unsigned ImmS = Width - 1;
    2378             : 
    2379             :     // Create the BFXIL instruction.
    2380             :     SDValue Ops[] = {Dst, SDValue(LSR, 0),
    2381           4 :                      CurDAG->getTargetConstant(ImmR, DL, VT),
    2382          16 :                      CurDAG->getTargetConstant(ImmS, DL, VT)};
    2383           8 :     unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
    2384           4 :     CurDAG->SelectNodeTo(N, Opc, VT, Ops);
    2385           4 :     return true;
    2386             :   }
    2387             : 
    2388             :   return false;
    2389             : }
    2390             : 
    2391         286 : bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
    2392         286 :   if (N->getOpcode() != ISD::OR)
    2393             :     return false;
    2394             : 
    2395         286 :   APInt NUsefulBits;
    2396         286 :   getUsefulBits(SDValue(N, 0), NUsefulBits);
    2397             : 
    2398             :   // If all bits are not useful, just return UNDEF.
    2399         286 :   if (!NUsefulBits) {
    2400           2 :     CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
    2401             :     return true;
    2402             :   }
    2403             : 
    2404         285 :   if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
    2405             :     return true;
    2406             : 
    2407         218 :   return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
    2408             : }
    2409             : 
    2410             : /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
    2411             : /// equivalent of a left shift by a constant amount followed by an and masking
    2412             : /// out a contiguous set of bits.
    2413         965 : bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
    2414         965 :   if (N->getOpcode() != ISD::AND)
    2415             :     return false;
    2416             : 
    2417        1110 :   EVT VT = N->getValueType(0);
    2418         732 :   if (VT != MVT::i32 && VT != MVT::i64)
    2419             :     return false;
    2420             : 
    2421         498 :   SDValue Op0;
    2422             :   int DstLSB, Width;
    2423         498 :   if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
    2424             :                                Op0, DstLSB, Width))
    2425             :     return false;
    2426             : 
    2427             :   // ImmR is the rotate right amount.
    2428           2 :   unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
    2429             :   // ImmS is the most significant bit of the source to be moved.
    2430           2 :   unsigned ImmS = Width - 1;
    2431             : 
    2432           2 :   SDLoc DL(N);
    2433           2 :   SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
    2434           6 :                    CurDAG->getTargetConstant(ImmS, DL, VT)};
    2435           4 :   unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
    2436           4 :   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
    2437           2 :   return true;
    2438             : }
    2439             : 
    2440             : bool
    2441          32 : AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
    2442             :                                               unsigned RegWidth) {
    2443          64 :   APFloat FVal(0.0);
    2444           0 :   if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
    2445           0 :     FVal = CN->getValueAPF();
    2446          32 :   else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
    2447             :     // Some otherwise illegal constants are allowed in this case.
    2448          96 :     if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
    2449          96 :         !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
    2450             :       return false;
    2451             : 
    2452             :     ConstantPoolSDNode *CN =
    2453         128 :         dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
    2454          64 :     FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
    2455             :   } else
    2456             :     return false;
    2457             : 
    2458             :   // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
    2459             :   // is between 1 and 32 for a destination w-register, or 1 and 64 for an
    2460             :   // x-register.
    2461             :   //
    2462             :   // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
    2463             :   // want THIS_NODE to be 2^fbits. This is much easier to deal with using
    2464             :   // integers.
    2465             :   bool IsExact;
    2466             : 
    2467             :   // fbits is between 1 and 64 in the worst-case, which means the fmul
    2468             :   // could have 2^64 as an actual operand. Need 65 bits of precision.
    2469          32 :   APSInt IntVal(65, true);
    2470          32 :   FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
    2471             : 
    2472             :   // N.b. isPowerOf2 also checks for > 0.
    2473          32 :   if (!IsExact || !IntVal.isPowerOf2()) return false;
    2474          32 :   unsigned FBits = IntVal.logBase2();
    2475             : 
    2476             :   // Checks above should have guaranteed that we haven't lost information in
    2477             :   // finding FBits, but it must still be in range.
    2478          32 :   if (FBits == 0 || FBits > RegWidth) return false;
    2479             : 
    2480         160 :   FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
    2481             :   return true;
    2482             : }
    2483             : 
    2484             : // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
    2485             : // of the string and obtains the integer values from them and combines these
    2486             : // into a single value to be used in the MRS/MSR instruction.
    2487          17 : static int getIntOperandFromRegisterString(StringRef RegString) {
    2488          34 :   SmallVector<StringRef, 5> Fields;
    2489          17 :   RegString.split(Fields, ':');
    2490             : 
    2491          17 :   if (Fields.size() == 1)
    2492             :     return -1;
    2493             : 
    2494             :   assert(Fields.size() == 5
    2495             :             && "Invalid number of fields in read register string");
    2496             : 
    2497           2 :   SmallVector<int, 5> Ops;
    2498           2 :   bool AllIntFields = true;
    2499             : 
    2500          12 :   for (StringRef Field : Fields) {
    2501             :     unsigned IntField;
    2502          10 :     AllIntFields &= !Field.getAsInteger(10, IntField);
    2503          10 :     Ops.push_back(IntField);
    2504             :   }
    2505             : 
    2506             :   assert(AllIntFields &&
    2507             :           "Unexpected non-integer value in special register string.");
    2508             : 
    2509             :   // Need to combine the integer fields of the string into a single value
    2510             :   // based on the bit encoding of MRS/MSR instruction.
    2511           8 :   return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
    2512           4 :          (Ops[3] << 3) | (Ops[4]);
    2513             : }
    2514             : 
    2515             : // Lower the read_register intrinsic to an MRS instruction node if the special
    2516             : // register string argument is either of the form detailed in the ALCE (the
    2517             : // form described in getIntOperandsFromRegsterString) or is a named register
    2518             : // known by the MRS SysReg mapper.
    2519           7 : bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
    2520          21 :   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
    2521          21 :   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
    2522          14 :   SDLoc DL(N);
    2523             : 
    2524           7 :   int Reg = getIntOperandFromRegisterString(RegString->getString());
    2525           7 :   if (Reg != -1) {
    2526           6 :     ReplaceNode(N, CurDAG->getMachineNode(
    2527             :                        AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
    2528           2 :                        CurDAG->getTargetConstant(Reg, DL, MVT::i32),
    2529           3 :                        N->getOperand(0)));
    2530           1 :     return true;
    2531             :   }
    2532             : 
    2533             :   // Use the sysreg mapper to map the remaining possible strings to the
    2534             :   // value for the register to be used for the instruction operand.
    2535           6 :   auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
    2536           7 :   if (TheReg && TheReg->Readable &&
    2537           2 :       TheReg->haveFeatures(Subtarget->getFeatureBits()))
    2538           1 :     Reg = TheReg->Encoding;
    2539             :   else
    2540           5 :     Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
    2541             : 
    2542           6 :   if (Reg != -1) {
    2543           6 :     ReplaceNode(N, CurDAG->getMachineNode(
    2544             :                        AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
    2545           2 :                        CurDAG->getTargetConstant(Reg, DL, MVT::i32),
    2546           3 :                        N->getOperand(0)));
    2547           1 :     return true;
    2548             :   }
    2549             : 
    2550             :   return false;
    2551             : }
    2552             : 
    2553             : // Lower the write_register intrinsic to an MSR instruction node if the special
    2554             : // register string argument is either of the form detailed in the ALCE (the
    2555             : // form described in getIntOperandsFromRegsterString) or is a named register
    2556             : // known by the MSR SysReg mapper.
    2557          10 : bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
    2558          30 :   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
    2559          30 :   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
    2560          20 :   SDLoc DL(N);
    2561             : 
    2562          10 :   int Reg = getIntOperandFromRegisterString(RegString->getString());
    2563          10 :   if (Reg != -1) {
    2564           3 :     ReplaceNode(
    2565           5 :         N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
    2566           2 :                                   CurDAG->getTargetConstant(Reg, DL, MVT::i32),
    2567           4 :                                   N->getOperand(2), N->getOperand(0)));
    2568           1 :     return true;
    2569             :   }
    2570             : 
    2571             :   // Check if the register was one of those allowed as the pstatefield value in
    2572             :   // the MSR (immediate) instruction. To accept the values allowed in the
    2573             :   // pstatefield for the MSR (immediate) instruction, we also require that an
    2574             :   // immediate value has been provided as an argument, we know that this is
    2575             :   // the case as it has been ensured by semantic checking.
    2576           9 :   auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
    2577           9 :   if (PMapper) {
    2578             :     assert (isa<ConstantSDNode>(N->getOperand(2))
    2579             :               && "Expected a constant integer expression.");
    2580           1 :     unsigned Reg = PMapper->Encoding;
    2581           4 :     uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
    2582             :     unsigned State;
    2583           1 :     if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO) {
    2584             :       assert(Immed < 2 && "Bad imm");
    2585             :       State = AArch64::MSRpstateImm1;
    2586             :     } else {
    2587             :       assert(Immed < 16 && "Bad imm");
    2588           1 :       State = AArch64::MSRpstateImm4;
    2589             :     }
    2590           6 :     ReplaceNode(N, CurDAG->getMachineNode(
    2591             :                        State, DL, MVT::Other,
    2592           2 :                        CurDAG->getTargetConstant(Reg, DL, MVT::i32),
    2593           2 :                        CurDAG->getTargetConstant(Immed, DL, MVT::i16),
    2594           3 :                        N->getOperand(0)));
    2595           1 :     return true;
    2596             :   }
    2597             : 
    2598             :   // Use the sysreg mapper to attempt to map the remaining possible strings
    2599             :   // to the value for the register to be used for the MSR (register)
    2600             :   // instruction operand.
    2601           8 :   auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
    2602           9 :   if (TheReg && TheReg->Writeable &&
    2603           2 :       TheReg->haveFeatures(Subtarget->getFeatureBits()))
    2604           1 :     Reg = TheReg->Encoding;
    2605             :   else
    2606           7 :     Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
    2607           8 :   if (Reg != -1) {
    2608           5 :     ReplaceNode(N, CurDAG->getMachineNode(
    2609             :                        AArch64::MSR, DL, MVT::Other,
    2610           2 :                        CurDAG->getTargetConstant(Reg, DL, MVT::i32),
    2611           4 :                        N->getOperand(2), N->getOperand(0)));
    2612           1 :     return true;
    2613             :   }
    2614             : 
    2615             :   return false;
    2616             : }
    2617             : 
    2618             : /// We've got special pseudo-instructions for these
    2619          36 : bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
    2620             :   unsigned Opcode;
    2621          36 :   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
    2622             : 
    2623             :   // Leave IR for LSE if subtarget supports it.
    2624          36 :   if (Subtarget->hasLSE()) return false;
    2625             : 
    2626           7 :   if (MemTy == MVT::i8)
    2627             :     Opcode = AArch64::CMP_SWAP_8;
    2628           5 :   else if (MemTy == MVT::i16)
    2629             :     Opcode = AArch64::CMP_SWAP_16;
    2630           3 :   else if (MemTy == MVT::i32)
    2631             :     Opcode = AArch64::CMP_SWAP_32;
    2632           1 :   else if (MemTy == MVT::i64)
    2633             :     Opcode = AArch64::CMP_SWAP_64;
    2634             :   else
    2635           0 :     llvm_unreachable("Unknown AtomicCmpSwap type");
    2636             : 
    2637           8 :   MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
    2638          24 :   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
    2639          16 :                    N->getOperand(0)};
    2640          12 :   SDNode *CmpSwap = CurDAG->getMachineNode(
    2641           8 :       Opcode, SDLoc(N),
    2642          16 :       CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
    2643             : 
    2644           4 :   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
    2645           4 :   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
    2646           8 :   cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
    2647             : 
    2648           4 :   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
    2649           4 :   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
    2650           4 :   CurDAG->RemoveDeadNode(N);
    2651             : 
    2652           4 :   return true;
    2653             : }
    2654             : 
    2655      156190 : void AArch64DAGToDAGISel::Select(SDNode *Node) {
    2656             :   // Dump information about the Node being selected
    2657             :   DEBUG(errs() << "Selecting: ");
    2658             :   DEBUG(Node->dump(CurDAG));
    2659             :   DEBUG(errs() << "\n");
    2660             : 
    2661             :   // If we have a custom node, we already have selected!
    2662      156190 :   if (Node->isMachineOpcode()) {
    2663             :     DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
    2664         456 :     Node->setNodeId(-1);
    2665        2635 :     return;
    2666             :   }
    2667             : 
    2668             :   // Few custom selection stuff.
    2669      311468 :   EVT VT = Node->getValueType(0);
    2670             : 
    2671      311468 :   switch (Node->getOpcode()) {
    2672             :   default:
    2673             :     break;
    2674             : 
    2675          36 :   case ISD::ATOMIC_CMP_SWAP:
    2676          36 :     if (SelectCMP_SWAP(Node))
    2677             :       return;
    2678             :     break;
    2679             : 
    2680           7 :   case ISD::READ_REGISTER:
    2681           7 :     if (tryReadRegister(Node))
    2682             :       return;
    2683             :     break;
    2684             : 
    2685          10 :   case ISD::WRITE_REGISTER:
    2686          10 :     if (tryWriteRegister(Node))
    2687             :       return;
    2688             :     break;
    2689             : 
    2690        2282 :   case ISD::ADD:
    2691        2282 :     if (tryMLAV64LaneV128(Node))
    2692             :       return;
    2693             :     break;
    2694             : 
    2695        5398 :   case ISD::LOAD: {
    2696             :     // Try to select as an indexed load. Fall through to normal processing
    2697             :     // if we can't.
    2698        5398 :     if (tryIndexedLoad(Node))
    2699             :       return;
    2700             :     break;
    2701             :   }
    2702             : 
    2703        1010 :   case ISD::SRL:
    2704             :   case ISD::AND:
    2705             :   case ISD::SRA:
    2706             :   case ISD::SIGN_EXTEND_INREG:
    2707        1010 :     if (tryBitfieldExtractOp(Node))
    2708             :       return;
    2709         965 :     if (tryBitfieldInsertInZeroOp(Node))
    2710             :       return;
    2711             :     break;
    2712             : 
    2713          83 :   case ISD::SIGN_EXTEND:
    2714          83 :     if (tryBitfieldExtractOpFromSExt(Node))
    2715             :       return;
    2716             :     break;
    2717             : 
    2718         286 :   case ISD::OR:
    2719         286 :     if (tryBitfieldInsertOp(Node))
    2720             :       return;
    2721             :     break;
    2722             : 
    2723        1230 :   case ISD::EXTRACT_VECTOR_ELT: {
    2724             :     // Extracting lane zero is a special case where we can just use a plain
    2725             :     // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
    2726             :     // the rest of the compiler, especially the register allocator and copyi
    2727             :     // propagation, to reason about, so is preferred when it's possible to
    2728             :     // use it.
    2729        3690 :     ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
    2730             :     // Bail and use the default Select() for non-zero lanes.
    2731        1230 :     if (LaneNode->getZExtValue() != 0)
    2732             :       break;
    2733             :     // If the element type is not the same as the result type, likewise
    2734             :     // bail and use the default Select(), as there's more to do than just
    2735             :     // a cross-class COPY. This catches extracts of i8 and i16 elements
    2736             :     // since they will need an explicit zext.
    2737        2140 :     if (VT != Node->getOperand(0).getValueType().getVectorElementType())
    2738             :       break;
    2739             :     unsigned SubReg;
    2740        1260 :     switch (Node->getOperand(0)
    2741         840 :                 .getValueType()
    2742         840 :                 .getVectorElementType()
    2743         420 :                 .getSizeInBits()) {
    2744           0 :     default:
    2745           0 :       llvm_unreachable("Unexpected vector element type!");
    2746             :     case 64:
    2747             :       SubReg = AArch64::dsub;
    2748             :       break;
    2749         148 :     case 32:
    2750         148 :       SubReg = AArch64::ssub;
    2751         148 :       break;
    2752         107 :     case 16:
    2753         107 :       SubReg = AArch64::hsub;
    2754         107 :       break;
    2755           0 :     case 8:
    2756           0 :       llvm_unreachable("unexpected zext-requiring extract element!");
    2757             :     }
    2758        1260 :     SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
    2759        1260 :                                                      Node->getOperand(0));
    2760             :     DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
    2761             :     DEBUG(Extract->dumpr(CurDAG));
    2762             :     DEBUG(dbgs() << "\n");
    2763         840 :     ReplaceNode(Node, Extract.getNode());
    2764         420 :     return;
    2765             :   }
    2766        1689 :   case ISD::Constant: {
    2767             :     // Materialize zero constants as copies from WZR/XZR.  This allows
    2768             :     // the coalescer to propagate these into other instructions.
    2769        1689 :     ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
    2770        1689 :     if (ConstNode->isNullValue()) {
    2771        1202 :       if (VT == MVT::i32) {
    2772         455 :         SDValue New = CurDAG->getCopyFromReg(
    2773        1820 :             CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
    2774         910 :         ReplaceNode(Node, New.getNode());
    2775             :         return;
    2776         292 :       } else if (VT == MVT::i64) {
    2777         146 :         SDValue New = CurDAG->getCopyFromReg(
    2778         584 :             CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
    2779         292 :         ReplaceNode(Node, New.getNode());
    2780             :         return;
    2781             :       }
    2782             :     }
    2783             :     break;
    2784             :   }
    2785             : 
    2786         133 :   case ISD::FrameIndex: {
    2787             :     // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
    2788         133 :     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
    2789         133 :     unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
    2790         133 :     const TargetLowering *TLI = getTargetLowering();
    2791         133 :     SDValue TFI = CurDAG->getTargetFrameIndex(
    2792         532 :         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
    2793         266 :     SDLoc DL(Node);
    2794         266 :     SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
    2795         532 :                       CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
    2796         399 :     CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
    2797             :     return;
    2798             :   }
    2799         398 :   case ISD::INTRINSIC_W_CHAIN: {
    2800        1592 :     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
    2801         398 :     switch (IntNo) {
    2802             :     default:
    2803             :       break;
    2804          16 :     case Intrinsic::aarch64_ldaxp:
    2805             :     case Intrinsic::aarch64_ldxp: {
    2806          16 :       unsigned Op =
    2807          16 :           IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
    2808          32 :       SDValue MemAddr = Node->getOperand(2);
    2809          32 :       SDLoc DL(Node);
    2810          32 :       SDValue Chain = Node->getOperand(0);
    2811             : 
    2812          32 :       SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
    2813          48 :                                           MVT::Other, MemAddr, Chain);
    2814             : 
    2815             :       // Transfer memoperands.
    2816          16 :       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
    2817          16 :       MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
    2818          32 :       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
    2819          32 :       ReplaceNode(Node, Ld);
    2820             :       return;
    2821             :     }
    2822          16 :     case Intrinsic::aarch64_stlxp:
    2823             :     case Intrinsic::aarch64_stxp: {
    2824          16 :       unsigned Op =
    2825          16 :           IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
    2826          32 :       SDLoc DL(Node);
    2827          32 :       SDValue Chain = Node->getOperand(0);
    2828          32 :       SDValue ValLo = Node->getOperand(2);
    2829          32 :       SDValue ValHi = Node->getOperand(3);
    2830          32 :       SDValue MemAddr = Node->getOperand(4);
    2831             : 
    2832             :       // Place arguments in the right order.
    2833          16 :       SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
    2834             : 
    2835          64 :       SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
    2836             :       // Transfer memoperands.
    2837          16 :       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
    2838          16 :       MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
    2839          32 :       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
    2840             : 
    2841          32 :       ReplaceNode(Node, St);
    2842             :       return;
    2843             :     }
    2844          14 :     case Intrinsic::aarch64_neon_ld1x2:
    2845          28 :       if (VT == MVT::v8i8) {
    2846           1 :         SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
    2847           1 :         return;
    2848          26 :       } else if (VT == MVT::v16i8) {
    2849           1 :         SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
    2850           1 :         return;
    2851          23 :       } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    2852           2 :         SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
    2853           2 :         return;
    2854          19 :       } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    2855           2 :         SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
    2856           2 :         return;
    2857          15 :       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    2858           2 :         SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
    2859           2 :         return;
    2860          11 :       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    2861           2 :         SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
    2862           2 :         return;
    2863           7 :       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    2864           2 :         SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
    2865           2 :         return;
    2866           3 :       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    2867           2 :         SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
    2868           2 :         return;
    2869             :       }
    2870             :       break;
    2871          14 :     case Intrinsic::aarch64_neon_ld1x3:
    2872          28 :       if (VT == MVT::v8i8) {
    2873           1 :         SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
    2874           1 :         return;
    2875          26 :       } else if (VT == MVT::v16i8) {
    2876           1 :         SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
    2877           1 :         return;
    2878          23 :       } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    2879           2 :         SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
    2880           2 :         return;
    2881          19 :       } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    2882           2 :         SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
    2883           2 :         return;
    2884          15 :       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    2885           2 :         SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
    2886           2 :         return;
    2887          11 :       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    2888           2 :         SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
    2889           2 :         return;
    2890           7 :       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    2891           2 :         SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
    2892           2 :         return;
    2893           3 :       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    2894           2 :         SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
    2895           2 :         return;
    2896             :       }
    2897             :       break;
    2898          14 :     case Intrinsic::aarch64_neon_ld1x4:
    2899          28 :       if (VT == MVT::v8i8) {
    2900           1 :         SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
    2901           1 :         return;
    2902          26 :       } else if (VT == MVT::v16i8) {
    2903           1 :         SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
    2904           1 :         return;
    2905          23 :       } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    2906           2 :         SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
    2907           2 :         return;
    2908          19 :       } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    2909           2 :         SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
    2910           2 :         return;
    2911          15 :       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    2912           2 :         SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
    2913           2 :         return;
    2914          11 :       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    2915           2 :         SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
    2916           2 :         return;
    2917           7 :       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    2918           2 :         SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
    2919           2 :         return;
    2920           3 :       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    2921           2 :         SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
    2922           2 :         return;
    2923             :       }
    2924             :       break;
    2925          19 :     case Intrinsic::aarch64_neon_ld2:
    2926          38 :       if (VT == MVT::v8i8) {
    2927           5 :         SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
    2928           5 :         return;
    2929          28 :       } else if (VT == MVT::v16i8) {
    2930           1 :         SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
    2931           1 :         return;
    2932          25 :       } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    2933           2 :         SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
    2934           2 :         return;
    2935          21 :       } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    2936           2 :         SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
    2937           2 :         return;
    2938          16 :       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    2939           2 :         SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
    2940           2 :         return;
    2941          12 :       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    2942           4 :         SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
    2943           4 :         return;
    2944           5 :       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    2945           2 :         SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
    2946           2 :         return;
    2947           1 :       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    2948           1 :         SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
    2949           1 :         return;
    2950             :       }
    2951             :       break;
    2952          15 :     case Intrinsic::aarch64_neon_ld3:
    2953          30 :       if (VT == MVT::v8i8) {
    2954           2 :         SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
    2955           2 :         return;
    2956          26 :       } else if (VT == MVT::v16i8) {
    2957           2 :         SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
    2958           2 :         return;
    2959          20 :       } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    2960           3 :         SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
    2961           3 :         return;
    2962          15 :       } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    2963           2 :         SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
    2964           2 :         return;
    2965          11 :       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    2966           1 :         SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
    2967           1 :         return;
    2968           9 :       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    2969           2 :         SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
    2970           2 :         return;
    2971           5 :       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    2972           2 :         SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
    2973           2 :         return;
    2974           1 :       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    2975           1 :         SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
    2976           1 :         return;
    2977             :       }
    2978             :       break;
    2979          14 :     case Intrinsic::aarch64_neon_ld4:
    2980          28 :       if (VT == MVT::v8i8) {
    2981           1 :         SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
    2982           1 :         return;
    2983          26 :       } else if (VT == MVT::v16i8) {
    2984           3 :         SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
    2985           3 :         return;
    2986          18 :       } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    2987           3 :         SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
    2988           3 :         return;
    2989          13 :       } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    2990           2 :         SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
    2991           2 :         return;
    2992           9 :       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    2993           1 :         SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
    2994           1 :         return;
    2995           7 :       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    2996           1 :         SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
    2997           1 :         return;
    2998           5 :       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    2999           2 :         SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
    3000           2 :         return;
    3001           1 :       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3002           1 :         SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
    3003           1 :         return;
    3004             :       }
    3005             :       break;
    3006          10 :     case Intrinsic::aarch64_neon_ld2r:
    3007          20 :       if (VT == MVT::v8i8) {
    3008           1 :         SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
    3009           1 :         return;
    3010          18 :       } else if (VT == MVT::v16i8) {
    3011           1 :         SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
    3012           1 :         return;
    3013          15 :       } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3014           2 :         SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
    3015           2 :         return;
    3016          11 :       } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3017           2 :         SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
    3018           2 :         return;
    3019           7 :       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3020           1 :         SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
    3021           1 :         return;
    3022           5 :       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3023           1 :         SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
    3024           1 :         return;
    3025           3 :       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3026           1 :         SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
    3027           1 :         return;
    3028           1 :       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3029           1 :         SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
    3030           1 :         return;
    3031             :       }
    3032             :       break;
    3033          10 :     case Intrinsic::aarch64_neon_ld3r:
    3034          20 :       if (VT == MVT::v8i8) {
    3035           1 :         SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
    3036           1 :         return;
    3037          18 :       } else if (VT == MVT::v16i8) {
    3038           1 :         SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
    3039           1 :         return;
    3040          15 :       } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3041           2 :         SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
    3042           2 :         return;
    3043          11 :       } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3044           2 :         SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
    3045           2 :         return;
    3046           7 :       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3047           1 :         SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
    3048           1 :         return;
    3049           5 :       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3050           1 :         SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
    3051           1 :         return;
    3052           3 :       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3053           1 :         SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
    3054           1 :         return;
    3055           1 :       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3056           1 :         SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
    3057           1 :         return;
    3058             :       }
    3059             :       break;
    3060          10 :     case Intrinsic::aarch64_neon_ld4r:
    3061          20 :       if (VT == MVT::v8i8) {
    3062           1 :         SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
    3063           1 :         return;
    3064          18 :       } else if (VT == MVT::v16i8) {
    3065           1 :         SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
    3066           1 :         return;
    3067          15 :       } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3068           2 :         SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
    3069           2 :         return;
    3070          11 :       } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3071           2 :         SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
    3072           2 :         return;
    3073           7 :       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3074           1 :         SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
    3075           1 :         return;
    3076           5 :       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3077           1 :         SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
    3078           1 :         return;
    3079           3 :       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3080           1 :         SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
    3081           1 :         return;
    3082           1 :       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3083           1 :         SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
    3084           1 :         return;
    3085             :       }
    3086             :       break;
    3087          10 :     case Intrinsic::aarch64_neon_ld2lane:
    3088          19 :       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
    3089           1 :         SelectLoadLane(Node, 2, AArch64::LD2i8);
    3090           1 :         return;
    3091          25 :       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
    3092           7 :                  VT == MVT::v8f16) {
    3093           3 :         SelectLoadLane(Node, 2, AArch64::LD2i16);
    3094           3 :         return;
    3095          12 :       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
    3096           3 :                  VT == MVT::v2f32) {
    3097           3 :         SelectLoadLane(Node, 2, AArch64::LD2i32);
    3098           3 :         return;
    3099           5 :       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
    3100           0 :                  VT == MVT::v1f64) {
    3101           3 :         SelectLoadLane(Node, 2, AArch64::LD2i64);
    3102           3 :         return;
    3103             :       }
    3104             :       break;
    3105           8 :     case Intrinsic::aarch64_neon_ld3lane:
    3106          15 :       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
    3107           1 :         SelectLoadLane(Node, 3, AArch64::LD3i8);
    3108           1 :         return;
    3109          19 :       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
    3110           5 :                  VT == MVT::v8f16) {
    3111           3 :         SelectLoadLane(Node, 3, AArch64::LD3i16);
    3112           3 :         return;
    3113           6 :       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
    3114           1 :                  VT == MVT::v2f32) {
    3115           3 :         SelectLoadLane(Node, 3, AArch64::LD3i32);
    3116           3 :         return;
    3117           1 :       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
    3118           0 :                  VT == MVT::v1f64) {
    3119           1 :         SelectLoadLane(Node, 3, AArch64::LD3i64);
    3120           1 :         return;
    3121             :       }
    3122             :       break;
    3123           8 :     case Intrinsic::aarch64_neon_ld4lane:
    3124          15 :       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
    3125           1 :         SelectLoadLane(Node, 4, AArch64::LD4i8);
    3126           1 :         return;
    3127          19 :       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
    3128           5 :                  VT == MVT::v8f16) {
    3129           3 :         SelectLoadLane(Node, 4, AArch64::LD4i16);
    3130           3 :         return;
    3131           6 :       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
    3132           1 :                  VT == MVT::v2f32) {
    3133           3 :         SelectLoadLane(Node, 4, AArch64::LD4i32);
    3134           3 :         return;
    3135           1 :       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
    3136           0 :                  VT == MVT::v1f64) {
    3137           1 :         SelectLoadLane(Node, 4, AArch64::LD4i64);
    3138           1 :         return;
    3139             :       }
    3140             :       break;
    3141             :     }
    3142             :   } break;
    3143        1522 :   case ISD::INTRINSIC_WO_CHAIN: {
    3144        6088 :     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
    3145        1522 :     switch (IntNo) {
    3146             :     default:
    3147             :       break;
    3148           4 :     case Intrinsic::aarch64_neon_tbl2:
    3149           4 :       SelectTable(Node, 2,
    3150           4 :                   VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
    3151             :                   false);
    3152           4 :       return;
    3153           2 :     case Intrinsic::aarch64_neon_tbl3:
    3154           4 :       SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
    3155             :                                            : AArch64::TBLv16i8Three,
    3156             :                   false);
    3157           2 :       return;
    3158           2 :     case Intrinsic::aarch64_neon_tbl4:
    3159           4 :       SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
    3160             :                                            : AArch64::TBLv16i8Four,
    3161             :                   false);
    3162           2 :       return;
    3163           2 :     case Intrinsic::aarch64_neon_tbx2:
    3164           2 :       SelectTable(Node, 2,
    3165           2 :                   VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
    3166             :                   true);
    3167           2 :       return;
    3168           2 :     case Intrinsic::aarch64_neon_tbx3:
    3169           4 :       SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
    3170             :                                            : AArch64::TBXv16i8Three,
    3171             :                   true);
    3172           2 :       return;
    3173           2 :     case Intrinsic::aarch64_neon_tbx4:
    3174           4 :       SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
    3175             :                                            : AArch64::TBXv16i8Four,
    3176             :                   true);
    3177           2 :       return;
    3178         114 :     case Intrinsic::aarch64_neon_smull:
    3179             :     case Intrinsic::aarch64_neon_umull:
    3180         114 :       if (tryMULLV64LaneV128(IntNo, Node))
    3181             :         return;
    3182             :       break;
    3183             :     }
    3184             :     break;
    3185             :   }
    3186         144 :   case ISD::INTRINSIC_VOID: {
    3187         576 :     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
    3188         144 :     if (Node->getNumOperands() >= 3)
    3189         381 :       VT = Node->getOperand(2)->getValueType(0);
    3190         144 :     switch (IntNo) {
    3191             :     default:
    3192             :       break;
    3193          14 :     case Intrinsic::aarch64_neon_st1x2: {
    3194          28 :       if (VT == MVT::v8i8) {
    3195           1 :         SelectStore(Node, 2, AArch64::ST1Twov8b);
    3196           1 :         return;
    3197          26 :       } else if (VT == MVT::v16i8) {
    3198           1 :         SelectStore(Node, 2, AArch64::ST1Twov16b);
    3199           1 :         return;
    3200          23 :       } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3201           2 :         SelectStore(Node, 2, AArch64::ST1Twov4h);
    3202           2 :         return;
    3203          19 :       } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3204           2 :         SelectStore(Node, 2, AArch64::ST1Twov8h);
    3205           2 :         return;
    3206          15 :       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3207           2 :         SelectStore(Node, 2, AArch64::ST1Twov2s);
    3208           2 :         return;
    3209          11 :       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3210           2 :         SelectStore(Node, 2, AArch64::ST1Twov4s);
    3211           2 :         return;
    3212           7 :       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3213           2 :         SelectStore(Node, 2, AArch64::ST1Twov2d);
    3214           2 :         return;
    3215           3 :       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3216           2 :         SelectStore(Node, 2, AArch64::ST1Twov1d);
    3217           2 :         return;
    3218             :       }
    3219             :       break;
    3220             :     }
    3221          14 :     case Intrinsic::aarch64_neon_st1x3: {
    3222          28 :       if (VT == MVT::v8i8) {
    3223           1 :         SelectStore(Node, 3, AArch64::ST1Threev8b);
    3224           1 :         return;
    3225          26 :       } else if (VT == MVT::v16i8) {
    3226           1 :         SelectStore(Node, 3, AArch64::ST1Threev16b);
    3227           1 :         return;
    3228          23 :       } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3229           2 :         SelectStore(Node, 3, AArch64::ST1Threev4h);
    3230           2 :         return;
    3231          19 :       } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3232           2 :         SelectStore(Node, 3, AArch64::ST1Threev8h);
    3233           2 :         return;
    3234          15 :       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3235           2 :         SelectStore(Node, 3, AArch64::ST1Threev2s);
    3236           2 :         return;
    3237          11 :       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3238           2 :         SelectStore(Node, 3, AArch64::ST1Threev4s);
    3239           2 :         return;
    3240           7 :       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3241           2 :         SelectStore(Node, 3, AArch64::ST1Threev2d);
    3242           2 :         return;
    3243           3 :       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3244           2 :         SelectStore(Node, 3, AArch64::ST1Threev1d);
    3245           2 :         return;
    3246             :       }
    3247             :       break;
    3248             :     }
    3249          14 :     case Intrinsic::aarch64_neon_st1x4: {
    3250          28 :       if (VT == MVT::v8i8) {
    3251           1 :         SelectStore(Node, 4, AArch64::ST1Fourv8b);
    3252           1 :         return;
    3253          26 :       } else if (VT == MVT::v16i8) {
    3254           1 :         SelectStore(Node, 4, AArch64::ST1Fourv16b);
    3255           1 :         return;
    3256          23 :       } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3257           2 :         SelectStore(Node, 4, AArch64::ST1Fourv4h);
    3258           2 :         return;
    3259          19 :       } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3260           2 :         SelectStore(Node, 4, AArch64::ST1Fourv8h);
    3261           2 :         return;
    3262          15 :       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3263           2 :         SelectStore(Node, 4, AArch64::ST1Fourv2s);
    3264           2 :         return;
    3265          11 :       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3266           2 :         SelectStore(Node, 4, AArch64::ST1Fourv4s);
    3267           2 :         return;
    3268           7 :       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3269           2 :         SelectStore(Node, 4, AArch64::ST1Fourv2d);
    3270           2 :         return;
    3271           3 :       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3272           2 :         SelectStore(Node, 4, AArch64::ST1Fourv1d);
    3273           2 :         return;
    3274             :       }
    3275             :       break;
    3276             :     }
    3277          21 :     case Intrinsic::aarch64_neon_st2: {
    3278          42 :       if (VT == MVT::v8i8) {
    3279           9 :         SelectStore(Node, 2, AArch64::ST2Twov8b);
    3280           9 :         return;
    3281          24 :       } else if (VT == MVT::v16i8) {
    3282           1 :         SelectStore(Node, 2, AArch64::ST2Twov16b);
    3283           1 :         return;
    3284          21 :       } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3285           2 :         SelectStore(Node, 2, AArch64::ST2Twov4h);
    3286           2 :         return;
    3287          17 :       } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3288           2 :         SelectStore(Node, 2, AArch64::ST2Twov8h);
    3289           2 :         return;
    3290          13 :       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3291           1 :         SelectStore(Node, 2, AArch64::ST2Twov2s);
    3292           1 :         return;
    3293          11 :       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3294           3 :         SelectStore(Node, 2, AArch64::ST2Twov4s);
    3295           3 :         return;
    3296           5 :       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3297           1 :         SelectStore(Node, 2, AArch64::ST2Twov2d);
    3298           1 :         return;
    3299           2 :       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3300           2 :         SelectStore(Node, 2, AArch64::ST1Twov1d);
    3301           2 :         return;
    3302             :       }
    3303             :       break;
    3304             :     }
    3305          14 :     case Intrinsic::aarch64_neon_st3: {
    3306          28 :       if (VT == MVT::v8i8) {
    3307           3 :         SelectStore(Node, 3, AArch64::ST3Threev8b);
    3308           3 :         return;
    3309          22 :       } else if (VT == MVT::v16i8) {
    3310           3 :         SelectStore(Node, 3, AArch64::ST3Threev16b);
    3311           3 :         return;
    3312          15 :       } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3313           2 :         SelectStore(Node, 3, AArch64::ST3Threev4h);
    3314           2 :         return;
    3315          11 :       } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3316           2 :         SelectStore(Node, 3, AArch64::ST3Threev8h);
    3317           2 :         return;
    3318           7 :       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3319           1 :         SelectStore(Node, 3, AArch64::ST3Threev2s);
    3320           1 :         return;
    3321           5 :       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3322           1 :         SelectStore(Node, 3, AArch64::ST3Threev4s);
    3323           1 :         return;
    3324           3 :       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3325           1 :         SelectStore(Node, 3, AArch64::ST3Threev2d);
    3326           1 :         return;
    3327           1 :       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3328           1 :         SelectStore(Node, 3, AArch64::ST1Threev1d);
    3329           1 :         return;
    3330             :       }
    3331             :       break;
    3332             :     }
    3333          12 :     case Intrinsic::aarch64_neon_st4: {
    3334          24 :       if (VT == MVT::v8i8) {
    3335           1 :         SelectStore(Node, 4, AArch64::ST4Fourv8b);
    3336           1 :         return;
    3337          22 :       } else if (VT == MVT::v16i8) {
    3338           3 :         SelectStore(Node, 4, AArch64::ST4Fourv16b);
    3339           3 :         return;
    3340          15 :       } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3341           2 :         SelectStore(Node, 4, AArch64::ST4Fourv4h);
    3342           2 :         return;
    3343          11 :       } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3344           2 :         SelectStore(Node, 4, AArch64::ST4Fourv8h);
    3345           2 :         return;
    3346           7 :       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3347           1 :         SelectStore(Node, 4, AArch64::ST4Fourv2s);
    3348           1 :         return;
    3349           5 :       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3350           1 :         SelectStore(Node, 4, AArch64::ST4Fourv4s);
    3351           1 :         return;
    3352           3 :       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3353           1 :         SelectStore(Node, 4, AArch64::ST4Fourv2d);
    3354           1 :         return;
    3355           1 :       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3356           1 :         SelectStore(Node, 4, AArch64::ST1Fourv1d);
    3357           1 :         return;
    3358             :       }
    3359             :       break;
    3360             :     }
    3361           9 :     case Intrinsic::aarch64_neon_st2lane: {
    3362          17 :       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
    3363           1 :         SelectStoreLane(Node, 2, AArch64::ST2i8);
    3364           1 :         return;
    3365          22 :       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
    3366           6 :                  VT == MVT::v8f16) {
    3367           3 :         SelectStoreLane(Node, 2, AArch64::ST2i16);
    3368           3 :         return;
    3369          13 :       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
    3370           4 :                  VT == MVT::v2f32) {
    3371           1 :         SelectStoreLane(Node, 2, AArch64::ST2i32);
    3372           1 :         return;
    3373           6 :       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
    3374           0 :                  VT == MVT::v1f64) {
    3375           4 :         SelectStoreLane(Node, 2, AArch64::ST2i64);
    3376           4 :         return;
    3377             :       }
    3378             :       break;
    3379             :     }
    3380           7 :     case Intrinsic::aarch64_neon_st3lane: {
    3381          13 :       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
    3382           1 :         SelectStoreLane(Node, 3, AArch64::ST3i8);
    3383           1 :         return;
    3384          16 :       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
    3385           4 :                  VT == MVT::v8f16) {
    3386           3 :         SelectStoreLane(Node, 3, AArch64::ST3i16);
    3387           3 :         return;
    3388           7 :       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
    3389           2 :                  VT == MVT::v2f32) {
    3390           1 :         SelectStoreLane(Node, 3, AArch64::ST3i32);
    3391           1 :         return;
    3392           3 :       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
    3393           0 :                  VT == MVT::v1f64) {
    3394           2 :         SelectStoreLane(Node, 3, AArch64::ST3i64);
    3395           2 :         return;
    3396             :       }
    3397             :       break;
    3398             :     }
    3399           7 :     case Intrinsic::aarch64_neon_st4lane: {
    3400          13 :       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
    3401           1 :         SelectStoreLane(Node, 4, AArch64::ST4i8);
    3402           1 :         return;
    3403          16 :       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
    3404           4 :                  VT == MVT::v8f16) {
    3405           3 :         SelectStoreLane(Node, 4, AArch64::ST4i16);
    3406           3 :         return;
    3407           7 :       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
    3408           2 :                  VT == MVT::v2f32) {
    3409           1 :         SelectStoreLane(Node, 4, AArch64::ST4i32);
    3410           1 :         return;
    3411           3 :       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
    3412           0 :                  VT == MVT::v1f64) {
    3413           2 :         SelectStoreLane(Node, 4, AArch64::ST4i64);
    3414           2 :         return;
    3415             :       }
    3416             :       break;
    3417             :     }
    3418             :     }
    3419             :     break;
    3420             :   }
    3421          24 :   case AArch64ISD::LD2post: {
    3422          48 :     if (VT == MVT::v8i8) {
    3423           2 :       SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
    3424           2 :       return;
    3425          44 :     } else if (VT == MVT::v16i8) {
    3426           2 :       SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
    3427           2 :       return;
    3428          38 :     } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3429           2 :       SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
    3430           2 :       return;
    3431          34 :     } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3432           2 :       SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
    3433           2 :       return;
    3434          30 :     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3435           4 :       SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
    3436           4 :       return;
    3437          22 :     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3438           4 :       SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
    3439           4 :       return;
    3440          14 :     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3441           4 :       SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
    3442           4 :       return;
    3443           6 :     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3444           4 :       SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
    3445           4 :       return;
    3446             :     }
    3447             :     break;
    3448             :   }
    3449          24 :   case AArch64ISD::LD3post: {
    3450          48 :     if (VT == MVT::v8i8) {
    3451           2 :       SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
    3452           2 :       return;
    3453          44 :     } else if (VT == MVT::v16i8) {
    3454           2 :       SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
    3455           2 :       return;
    3456          38 :     } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3457           2 :       SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
    3458           2 :       return;
    3459          34 :     } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3460           2 :       SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
    3461           2 :       return;
    3462          30 :     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3463           4 :       SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
    3464           4 :       return;
    3465          22 :     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3466           4 :       SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
    3467           4 :       return;
    3468          14 :     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3469           4 :       SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
    3470           4 :       return;
    3471           6 :     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3472           4 :       SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
    3473           4 :       return;
    3474             :     }
    3475             :     break;
    3476             :   }
    3477          24 :   case AArch64ISD::LD4post: {
    3478          48 :     if (VT == MVT::v8i8) {
    3479           2 :       SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
    3480           2 :       return;
    3481          44 :     } else if (VT == MVT::v16i8) {
    3482           2 :       SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
    3483           2 :       return;
    3484          38 :     } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3485           2 :       SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
    3486           2 :       return;
    3487          34 :     } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3488           2 :       SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
    3489           2 :       return;
    3490          30 :     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3491           4 :       SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
    3492           4 :       return;
    3493          22 :     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3494           4 :       SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
    3495           4 :       return;
    3496          14 :     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3497           4 :       SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
    3498           4 :       return;
    3499           6 :     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3500           4 :       SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
    3501           4 :       return;
    3502             :     }
    3503             :     break;
    3504             :   }
    3505          24 :   case AArch64ISD::LD1x2post: {
    3506          48 :     if (VT == MVT::v8i8) {
    3507           2 :       SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
    3508           2 :       return;
    3509          44 :     } else if (VT == MVT::v16i8) {
    3510           2 :       SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
    3511           2 :       return;
    3512          38 :     } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3513           2 :       SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
    3514           2 :       return;
    3515          34 :     } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3516           2 :       SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
    3517           2 :       return;
    3518          30 :     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3519           4 :       SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
    3520           4 :       return;
    3521          22 :     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3522           4 :       SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
    3523           4 :       return;
    3524          14 :     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3525           4 :       SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
    3526           4 :       return;
    3527           6 :     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3528           4 :       SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
    3529           4 :       return;
    3530             :     }
    3531             :     break;
    3532             :   }
    3533          24 :   case AArch64ISD::LD1x3post: {
    3534          48 :     if (VT == MVT::v8i8) {
    3535           2 :       SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
    3536           2 :       return;
    3537          44 :     } else if (VT == MVT::v16i8) {
    3538           2 :       SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
    3539           2 :       return;
    3540          38 :     } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3541           2 :       SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
    3542           2 :       return;
    3543          34 :     } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3544           2 :       SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
    3545           2 :       return;
    3546          30 :     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3547           4 :       SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
    3548           4 :       return;
    3549          22 :     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3550           4 :       SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
    3551           4 :       return;
    3552          14 :     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3553           4 :       SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
    3554           4 :       return;
    3555           6 :     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3556           4 :       SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
    3557           4 :       return;
    3558             :     }
    3559             :     break;
    3560             :   }
    3561          24 :   case AArch64ISD::LD1x4post: {
    3562          48 :     if (VT == MVT::v8i8) {
    3563           2 :       SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
    3564           2 :       return;
    3565          44 :     } else if (VT == MVT::v16i8) {
    3566           2 :       SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
    3567           2 :       return;
    3568          38 :     } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3569           2 :       SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
    3570           2 :       return;
    3571          34 :     } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3572           2 :       SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
    3573           2 :       return;
    3574          30 :     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3575           4 :       SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
    3576           4 :       return;
    3577          22 :     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3578           4 :       SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
    3579           4 :       return;
    3580          14 :     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3581           4 :       SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
    3582           4 :       return;
    3583           6 :     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3584           4 :       SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
    3585           4 :       return;
    3586             :     }
    3587             :     break;
    3588             :   }
    3589          20 :   case AArch64ISD::LD1DUPpost: {
    3590          40 :     if (VT == MVT::v8i8) {
    3591           2 :       SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
    3592           2 :       return;
    3593          36 :     } else if (VT == MVT::v16i8) {
    3594           2 :       SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
    3595           2 :       return;
    3596          30 :     } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3597           2 :       SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
    3598           2 :       return;
    3599          26 :     } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3600           2 :       SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
    3601           2 :       return;
    3602          22 :     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3603           4 :       SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
    3604           4 :       return;
    3605          14 :     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3606           4 :       SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
    3607           4 :       return;
    3608           8 :     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3609           0 :       SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
    3610           0 :       return;
    3611           6 :     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3612           4 :       SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
    3613           4 :       return;
    3614             :     }
    3615             :     break;
    3616             :   }
    3617          24 :   case AArch64ISD::LD2DUPpost: {
    3618          48 :     if (VT == MVT::v8i8) {
    3619           2 :       SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
    3620           2 :       return;
    3621          44 :     } else if (VT == MVT::v16i8) {
    3622           2 :       SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
    3623           2 :       return;
    3624          38 :     } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3625           2 :       SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
    3626           2 :       return;
    3627          34 :     } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3628           2 :       SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
    3629           2 :       return;
    3630          30 :     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3631           4 :       SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
    3632           4 :       return;
    3633          22 :     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3634           4 :       SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
    3635           4 :       return;
    3636          14 :     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3637           4 :       SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
    3638           4 :       return;
    3639           6 :     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3640           4 :       SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
    3641           4 :       return;
    3642             :     }
    3643             :     break;
    3644             :   }
    3645          24 :   case AArch64ISD::LD3DUPpost: {
    3646          48 :     if (VT == MVT::v8i8) {
    3647           2 :       SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
    3648           2 :       return;
    3649          44 :     } else if (VT == MVT::v16i8) {
    3650           2 :       SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
    3651           2 :       return;
    3652          38 :     } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3653           2 :       SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
    3654           2 :       return;
    3655          34 :     } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3656           2 :       SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
    3657           2 :       return;
    3658          30 :     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3659           4 :       SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
    3660           4 :       return;
    3661          22 :     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3662           4 :       SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
    3663           4 :       return;
    3664          14 :     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3665           4 :       SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
    3666           4 :       return;
    3667           6 :     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3668           4 :       SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
    3669           4 :       return;
    3670             :     }
    3671             :     break;
    3672             :   }
    3673          24 :   case AArch64ISD::LD4DUPpost: {
    3674          48 :     if (VT == MVT::v8i8) {
    3675           2 :       SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
    3676           2 :       return;
    3677          44 :     } else if (VT == MVT::v16i8) {
    3678           2 :       SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
    3679           2 :       return;
    3680          38 :     } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3681           2 :       SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
    3682           2 :       return;
    3683          34 :     } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3684           2 :       SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
    3685           2 :       return;
    3686          30 :     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3687           4 :       SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
    3688           4 :       return;
    3689          22 :     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3690           4 :       SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
    3691           4 :       return;
    3692          14 :     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3693           4 :       SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
    3694           4 :       return;
    3695           6 :     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3696           4 :       SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
    3697           4 :       return;
    3698             :     }
    3699             :     break;
    3700             :   }
    3701          22 :   case AArch64ISD::LD1LANEpost: {
    3702          40 :     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
    3703           4 :       SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
    3704           4 :       return;
    3705          45 :     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
    3706          13 :                VT == MVT::v8f16) {
    3707           5 :       SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
    3708           5 :       return;
    3709          31 :     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
    3710           5 :                VT == MVT::v2f32) {
    3711           8 :       SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
    3712           8 :       return;
    3713          11 :     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
    3714           0 :                VT == MVT::v1f64) {
    3715           5 :       SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
    3716           5 :       return;
    3717             :     }
    3718             :     break;
    3719             :   }
    3720          24 :   case AArch64ISD::LD2LANEpost: {
    3721          46 :     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
    3722           4 :       SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
    3723           4 :       return;
    3724          54 :     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
    3725          16 :                VT == MVT::v8f16) {
    3726           4 :       SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
    3727           4 :       return;
    3728          42 :     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
    3729          10 :                VT == MVT::v2f32) {
    3730           8 :       SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
    3731           8 :       return;
    3732          18 :     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
    3733           2 :                VT == MVT::v1f64) {
    3734           8 :       SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
    3735           8 :       return;
    3736             :     }
    3737             :     break;
    3738             :   }
    3739          24 :   case AArch64ISD::LD3LANEpost: {
    3740          46 :     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
    3741           4 :       SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
    3742           4 :       return;
    3743          54 :     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
    3744          16 :                VT == MVT::v8f16) {
    3745           4 :       SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
    3746           4 :       return;
    3747          42 :     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
    3748          10 :                VT == MVT::v2f32) {
    3749           8 :       SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
    3750           8 :       return;
    3751          18 :     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
    3752           2 :                VT == MVT::v1f64) {
    3753           8 :       SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
    3754           8 :       return;
    3755             :     }
    3756             :     break;
    3757             :   }
    3758          24 :   case AArch64ISD::LD4LANEpost: {
    3759          46 :     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
    3760           4 :       SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
    3761           4 :       return;
    3762          54 :     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
    3763          16 :                VT == MVT::v8f16) {
    3764           4 :       SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
    3765           4 :       return;
    3766          42 :     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
    3767          10 :                VT == MVT::v2f32) {
    3768           8 :       SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
    3769           8 :       return;
    3770          18 :     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
    3771           2 :                VT == MVT::v1f64) {
    3772           8 :       SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
    3773           8 :       return;
    3774             :     }
    3775             :     break;
    3776             :   }
    3777          24 :   case AArch64ISD::ST2post: {
    3778          72 :     VT = Node->getOperand(1).getValueType();
    3779          48 :     if (VT == MVT::v8i8) {
    3780           2 :       SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
    3781           2 :       return;
    3782          44 :     } else if (VT == MVT::v16i8) {
    3783           2 :       SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
    3784           2 :       return;
    3785          38 :     } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3786           2 :       SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
    3787           2 :       return;
    3788          34 :     } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3789           2 :       SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
    3790           2 :       return;
    3791          30 :     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3792           4 :       SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
    3793           4 :       return;
    3794          22 :     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3795           4 :       SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
    3796           4 :       return;
    3797          14 :     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3798           4 :       SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
    3799           4 :       return;
    3800           6 :     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3801           4 :       SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
    3802           4 :       return;
    3803             :     }
    3804             :     break;
    3805             :   }
    3806          24 :   case AArch64ISD::ST3post: {
    3807          72 :     VT = Node->getOperand(1).getValueType();
    3808          48 :     if (VT == MVT::v8i8) {
    3809           2 :       SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
    3810           2 :       return;
    3811          44 :     } else if (VT == MVT::v16i8) {
    3812           2 :       SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
    3813           2 :       return;
    3814          38 :     } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3815           2 :       SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
    3816           2 :       return;
    3817          34 :     } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3818           2 :       SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
    3819           2 :       return;
    3820          30 :     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3821           4 :       SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
    3822           4 :       return;
    3823          22 :     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3824           4 :       SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
    3825           4 :       return;
    3826          14 :     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3827           4 :       SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
    3828           4 :       return;
    3829           6 :     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3830           4 :       SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
    3831           4 :       return;
    3832             :     }
    3833             :     break;
    3834             :   }
    3835          24 :   case AArch64ISD::ST4post: {
    3836          72 :     VT = Node->getOperand(1).getValueType();
    3837          48 :     if (VT == MVT::v8i8) {
    3838           2 :       SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
    3839           2 :       return;
    3840          44 :     } else if (VT == MVT::v16i8) {
    3841           2 :       SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
    3842           2 :       return;
    3843          38 :     } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3844           2 :       SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
    3845           2 :       return;
    3846          34 :     } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3847           2 :       SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
    3848           2 :       return;
    3849          30 :     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3850           4 :       SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
    3851           4 :       return;
    3852          22 :     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3853           4 :       SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
    3854           4 :       return;
    3855          14 :     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3856           4 :       SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
    3857           4 :       return;
    3858           6 :     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3859           4 :       SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
    3860           4 :       return;
    3861             :     }
    3862             :     break;
    3863             :   }
    3864          24 :   case AArch64ISD::ST1x2post: {
    3865          72 :     VT = Node->getOperand(1).getValueType();
    3866          48 :     if (VT == MVT::v8i8) {
    3867           2 :       SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
    3868           2 :       return;
    3869          44 :     } else if (VT == MVT::v16i8) {
    3870           2 :       SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
    3871           2 :       return;
    3872          38 :     } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3873           2 :       SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
    3874           2 :       return;
    3875          34 :     } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3876           2 :       SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
    3877           2 :       return;
    3878          30 :     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3879           4 :       SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
    3880           4 :       return;
    3881          22 :     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3882           4 :       SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
    3883           4 :       return;
    3884          14 :     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3885           4 :       SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
    3886           4 :       return;
    3887           6 :     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3888           4 :       SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
    3889           4 :       return;
    3890             :     }
    3891             :     break;
    3892             :   }
    3893          24 :   case AArch64ISD::ST1x3post: {
    3894          72 :     VT = Node->getOperand(1).getValueType();
    3895          48 :     if (VT == MVT::v8i8) {
    3896           2 :       SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
    3897           2 :       return;
    3898          44 :     } else if (VT == MVT::v16i8) {
    3899           2 :       SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
    3900           2 :       return;
    3901          38 :     } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3902           2 :       SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
    3903           2 :       return;
    3904          34 :     } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3905           2 :       SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
    3906           2 :       return;
    3907          30 :     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3908           4 :       SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
    3909           4 :       return;
    3910          22 :     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3911           4 :       SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
    3912           4 :       return;
    3913          14 :     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3914           4 :       SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
    3915           4 :       return;
    3916           6 :     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3917           4 :       SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
    3918           4 :       return;
    3919             :     }
    3920             :     break;
    3921             :   }
    3922          24 :   case AArch64ISD::ST1x4post: {
    3923          72 :     VT = Node->getOperand(1).getValueType();
    3924          48 :     if (VT == MVT::v8i8) {
    3925           2 :       SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
    3926           2 :       return;
    3927          44 :     } else if (VT == MVT::v16i8) {
    3928           2 :       SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
    3929           2 :       return;
    3930          38 :     } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
    3931           2 :       SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
    3932           2 :       return;
    3933          34 :     } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
    3934           2 :       SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
    3935           2 :       return;
    3936          30 :     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
    3937           4 :       SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
    3938           4 :       return;
    3939          22 :     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
    3940           4 :       SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
    3941           4 :       return;
    3942          14 :     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
    3943           4 :       SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
    3944           4 :       return;
    3945           6 :     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
    3946           4 :       SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
    3947           4 :       return;
    3948             :     }
    3949             :     break;
    3950             :   }
    3951          24 :   case AArch64ISD::ST2LANEpost: {
    3952          72 :     VT = Node->getOperand(1).getValueType();
    3953          46 :     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
    3954           4 :       SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
    3955           4 :       return;
    3956          54 :     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
    3957          16 :                VT == MVT::v8f16) {
    3958           4 :       SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
    3959           4 :       return;
    3960          42 :     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
    3961          10 :                VT == MVT::v2f32) {
    3962           8 :       SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
    3963           8 :       return;
    3964          18 :     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
    3965           2 :                VT == MVT::v1f64) {
    3966           8 :       SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
    3967           8 :       return;
    3968             :     }
    3969             :     break;
    3970             :   }
    3971          24 :   case AArch64ISD::ST3LANEpost: {
    3972          72 :     VT = Node->getOperand(1).getValueType();
    3973          46 :     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
    3974           4 :       SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
    3975           4 :       return;
    3976          54 :     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
    3977          16 :                VT == MVT::v8f16) {
    3978           4 :       SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
    3979           4 :       return;
    3980          42 :     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
    3981          10 :                VT == MVT::v2f32) {
    3982           8 :       SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
    3983           8 :       return;
    3984          18 :     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
    3985           2 :                VT == MVT::v1f64) {
    3986           8 :       SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
    3987           8 :       return;
    3988             :     }
    3989             :     break;
    3990             :   }
    3991          24 :   case AArch64ISD::ST4LANEpost: {
    3992          72 :     VT = Node->getOperand(1).getValueType();
    3993          46 :     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
    3994           4 :       SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
    3995           4 :       return;
    3996          54 :     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
    3997          16 :                VT == MVT::v8f16) {
    3998           4 :       SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
    3999           4 :       return;
    4000          42 :     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
    4001          10 :                VT == MVT::v2f32) {
    4002           8 :       SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
    4003           8 :       return;
    4004          18 :     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
    4005           2 :                VT == MVT::v1f64) {
    4006           8 :       SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
    4007           8 :       return;
    4008             :     }
    4009             :     break;
    4010             :   }
    4011             :   }
    4012             : 
    4013             :   // Select the default instruction
    4014      153549 :   SelectCode(Node);
    4015             : }
    4016             : 
    4017             : /// createAArch64ISelDag - This pass converts a legalized DAG into a
    4018             : /// AArch64-specific DAG, ready for instruction scheduling.
    4019         986 : FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
    4020             :                                          CodeGenOpt::Level OptLevel) {
    4021        1972 :   return new AArch64DAGToDAGISel(TM, OptLevel);
    4022             : }

Generated by: LCOV version 1.13