LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64InstrInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1609 1907 84.4 %
Date: 2018-10-20 13:21:21 Functions: 88 90 97.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file contains the AArch64 implementation of the TargetInstrInfo class.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "AArch64InstrInfo.h"
      15             : #include "AArch64MachineFunctionInfo.h"
      16             : #include "AArch64Subtarget.h"
      17             : #include "MCTargetDesc/AArch64AddressingModes.h"
      18             : #include "Utils/AArch64BaseInfo.h"
      19             : #include "llvm/ADT/ArrayRef.h"
      20             : #include "llvm/ADT/STLExtras.h"
      21             : #include "llvm/ADT/SmallVector.h"
      22             : #include "llvm/CodeGen/MachineBasicBlock.h"
      23             : #include "llvm/CodeGen/MachineFrameInfo.h"
      24             : #include "llvm/CodeGen/MachineFunction.h"
      25             : #include "llvm/CodeGen/MachineInstr.h"
      26             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      27             : #include "llvm/CodeGen/MachineMemOperand.h"
      28             : #include "llvm/CodeGen/MachineOperand.h"
      29             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      30             : #include "llvm/CodeGen/MachineModuleInfo.h"
      31             : #include "llvm/CodeGen/StackMaps.h"
      32             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      33             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      34             : #include "llvm/IR/DebugLoc.h"
      35             : #include "llvm/IR/GlobalValue.h"
      36             : #include "llvm/MC/MCInst.h"
      37             : #include "llvm/MC/MCInstrDesc.h"
      38             : #include "llvm/Support/Casting.h"
      39             : #include "llvm/Support/CodeGen.h"
      40             : #include "llvm/Support/CommandLine.h"
      41             : #include "llvm/Support/Compiler.h"
      42             : #include "llvm/Support/ErrorHandling.h"
      43             : #include "llvm/Support/MathExtras.h"
      44             : #include "llvm/Target/TargetMachine.h"
      45             : #include "llvm/Target/TargetOptions.h"
      46             : #include <cassert>
      47             : #include <cstdint>
      48             : #include <iterator>
      49             : #include <utility>
      50             : 
      51             : using namespace llvm;
      52             : 
      53             : #define GET_INSTRINFO_CTOR_DTOR
      54             : #include "AArch64GenInstrInfo.inc"
      55             : 
      56             : static cl::opt<unsigned> TBZDisplacementBits(
      57             :     "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
      58             :     cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
      59             : 
      60             : static cl::opt<unsigned> CBZDisplacementBits(
      61             :     "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
      62             :     cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
      63             : 
      64             : static cl::opt<unsigned>
      65             :     BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
      66             :                         cl::desc("Restrict range of Bcc instructions (DEBUG)"));
      67             : 
      68        1573 : AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
      69             :     : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
      70        1573 :       RI(STI.getTargetTriple()), Subtarget(STI) {}
      71             : 
      72             : /// GetInstSize - Return the number of bytes of code the specified
      73             : /// instruction may be.  This returns the maximum number of bytes.
      74       91199 : unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
      75       91199 :   const MachineBasicBlock &MBB = *MI.getParent();
      76       91199 :   const MachineFunction *MF = MBB.getParent();
      77       91199 :   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
      78             : 
      79      182398 :   if (MI.getOpcode() == AArch64::INLINEASM)
      80         216 :     return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
      81             : 
      82             :   // FIXME: We currently only handle pseudoinstructions that don't get expanded
      83             :   //        before the assembly printer.
      84             :   unsigned NumBytes = 0;
      85             :   const MCInstrDesc &Desc = MI.getDesc();
      86       90983 :   switch (Desc.getOpcode()) {
      87             :   default:
      88             :     // Anything not explicitly designated otherwise is a normal 4-byte insn.
      89             :     NumBytes = 4;
      90             :     break;
      91        3311 :   case TargetOpcode::DBG_VALUE:
      92             :   case TargetOpcode::EH_LABEL:
      93             :   case TargetOpcode::IMPLICIT_DEF:
      94             :   case TargetOpcode::KILL:
      95             :     NumBytes = 0;
      96        3311 :     break;
      97          17 :   case TargetOpcode::STACKMAP:
      98             :     // The upper bound for a stackmap intrinsic is the full length of its shadow
      99          17 :     NumBytes = StackMapOpers(&MI).getNumPatchBytes();
     100             :     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
     101          17 :     break;
     102          48 :   case TargetOpcode::PATCHPOINT:
     103             :     // The size of the patchpoint intrinsic is the number of bytes requested
     104          48 :     NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
     105             :     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
     106          48 :     break;
     107          30 :   case AArch64::TLSDESC_CALLSEQ:
     108             :     // This gets lowered to an instruction sequence which takes 16 bytes
     109             :     NumBytes = 16;
     110          30 :     break;
     111             :   }
     112             : 
     113             :   return NumBytes;
     114             : }
     115             : 
     116       39354 : static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
     117             :                             SmallVectorImpl<MachineOperand> &Cond) {
     118             :   // Block ends with fall-through condbranch.
     119       78708 :   switch (LastInst->getOpcode()) {
     120           0 :   default:
     121           0 :     llvm_unreachable("Unknown branch instruction?");
     122       18644 :   case AArch64::Bcc:
     123       18644 :     Target = LastInst->getOperand(1).getMBB();
     124       18644 :     Cond.push_back(LastInst->getOperand(0));
     125       18644 :     break;
     126       12757 :   case AArch64::CBZW:
     127             :   case AArch64::CBZX:
     128             :   case AArch64::CBNZW:
     129             :   case AArch64::CBNZX:
     130       12757 :     Target = LastInst->getOperand(1).getMBB();
     131       25514 :     Cond.push_back(MachineOperand::CreateImm(-1));
     132       38271 :     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
     133       12757 :     Cond.push_back(LastInst->getOperand(0));
     134       12757 :     break;
     135        7953 :   case AArch64::TBZW:
     136             :   case AArch64::TBZX:
     137             :   case AArch64::TBNZW:
     138             :   case AArch64::TBNZX:
     139        7953 :     Target = LastInst->getOperand(2).getMBB();
     140       15906 :     Cond.push_back(MachineOperand::CreateImm(-1));
     141       23859 :     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
     142        7953 :     Cond.push_back(LastInst->getOperand(0));
     143       15906 :     Cond.push_back(LastInst->getOperand(1));
     144             :   }
     145       39354 : }
     146             : 
     147             : static unsigned getBranchDisplacementBits(unsigned Opc) {
     148        1370 :   switch (Opc) {
     149           0 :   default:
     150           0 :     llvm_unreachable("unexpected opcode!");
     151             :   case AArch64::B:
     152             :     return 64;
     153             :   case AArch64::TBNZW:
     154             :   case AArch64::TBZW:
     155             :   case AArch64::TBNZX:
     156             :   case AArch64::TBZX:
     157             :     return TBZDisplacementBits;
     158             :   case AArch64::CBNZW:
     159             :   case AArch64::CBZW:
     160             :   case AArch64::CBNZX:
     161             :   case AArch64::CBZX:
     162             :     return CBZDisplacementBits;
     163             :   case AArch64::Bcc:
     164             :     return BCCDisplacementBits;
     165             :   }
     166             : }
     167             : 
     168        1370 : bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
     169             :                                              int64_t BrOffset) const {
     170             :   unsigned Bits = getBranchDisplacementBits(BranchOp);
     171             :   assert(Bits >= 3 && "max branch displacement must be enough to jump"
     172             :                       "over conditional branch expansion");
     173        1126 :   return isIntN(Bits, BrOffset / 4);
     174             : }
     175             : 
     176             : MachineBasicBlock *
     177        1448 : AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
     178        2896 :   switch (MI.getOpcode()) {
     179           0 :   default:
     180           0 :     llvm_unreachable("unexpected opcode!");
     181         244 :   case AArch64::B:
     182         244 :     return MI.getOperand(0).getMBB();
     183         211 :   case AArch64::TBZW:
     184             :   case AArch64::TBNZW:
     185             :   case AArch64::TBZX:
     186             :   case AArch64::TBNZX:
     187         211 :     return MI.getOperand(2).getMBB();
     188         993 :   case AArch64::CBZW:
     189             :   case AArch64::CBNZW:
     190             :   case AArch64::CBZX:
     191             :   case AArch64::CBNZX:
     192             :   case AArch64::Bcc:
     193         993 :     return MI.getOperand(1).getMBB();
     194             :   }
     195             : }
     196             : 
     197             : // Branch analysis.
     198      405272 : bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
     199             :                                      MachineBasicBlock *&TBB,
     200             :                                      MachineBasicBlock *&FBB,
     201             :                                      SmallVectorImpl<MachineOperand> &Cond,
     202             :                                      bool AllowModify) const {
     203             :   // If the block has no terminators, it just falls into the block after it.
     204      405272 :   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
     205      405272 :   if (I == MBB.end())
     206             :     return false;
     207             : 
     208      403815 :   if (!isUnpredicatedTerminator(*I))
     209             :     return false;
     210             : 
     211             :   // Get the last instruction in the block.
     212             :   MachineInstr *LastInst = &*I;
     213             : 
     214             :   // If there is only one terminator instruction, process it.
     215      379556 :   unsigned LastOpc = LastInst->getOpcode();
     216      743645 :   if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
     217      362063 :     if (isUncondBranchOpcode(LastOpc)) {
     218        7256 :       TBB = LastInst->getOperand(0).getMBB();
     219        7256 :       return false;
     220             :     }
     221             :     if (isCondBranchOpcode(LastOpc)) {
     222             :       // Block ends with fall-through condbranch.
     223       22181 :       parseCondBranch(LastInst, TBB, Cond);
     224       22181 :       return false;
     225             :     }
     226             :     return true; // Can't handle indirect branch.
     227             :   }
     228             : 
     229             :   // Get the instruction before it if it is a terminator.
     230             :   MachineInstr *SecondLastInst = &*I;
     231       17493 :   unsigned SecondLastOpc = SecondLastInst->getOpcode();
     232             : 
     233             :   // If AllowModify is true and the block ends with two or more unconditional
     234             :   // branches, delete all but the first unconditional branch.
     235       17493 :   if (AllowModify && isUncondBranchOpcode(LastOpc)) {
     236        2394 :     while (isUncondBranchOpcode(SecondLastOpc)) {
     237           2 :       LastInst->eraseFromParent();
     238             :       LastInst = SecondLastInst;
     239           2 :       LastOpc = LastInst->getOpcode();
     240           4 :       if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
     241             :         // Return now the only terminator is an unconditional branch.
     242           2 :         TBB = LastInst->getOperand(0).getMBB();
     243           2 :         return false;
     244             :       } else {
     245             :         SecondLastInst = &*I;
     246           0 :         SecondLastOpc = SecondLastInst->getOpcode();
     247             :       }
     248             :     }
     249             :   }
     250             : 
     251             :   // If there are three terminators, we don't know what sort of block this is.
     252       33612 :   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
     253             :     return true;
     254             : 
     255             :   // If the block ends with a B and a Bcc, handle it.
     256       17193 :   if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
     257       17173 :     parseCondBranch(SecondLastInst, TBB, Cond);
     258       17173 :     FBB = LastInst->getOperand(0).getMBB();
     259       17173 :     return false;
     260             :   }
     261             : 
     262             :   // If the block ends with two unconditional branches, handle it.  The second
     263             :   // one is not executed, so remove it.
     264         132 :   if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
     265           0 :     TBB = SecondLastInst->getOperand(0).getMBB();
     266           0 :     I = LastInst;
     267           0 :     if (AllowModify)
     268           0 :       I->eraseFromParent();
     269           0 :     return false;
     270             :   }
     271             : 
     272             :   // ...likewise if it ends with an indirect branch followed by an unconditional
     273             :   // branch.
     274         132 :   if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
     275           0 :     I = LastInst;
     276           0 :     if (AllowModify)
     277           0 :       I->eraseFromParent();
     278           0 :     return true;
     279             :   }
     280             : 
     281             :   // Otherwise, can't handle this.
     282             :   return true;
     283             : }
     284             : 
     285        2074 : bool AArch64InstrInfo::reverseBranchCondition(
     286             :     SmallVectorImpl<MachineOperand> &Cond) const {
     287        2074 :   if (Cond[0].getImm() != -1) {
     288             :     // Regular Bcc
     289        1351 :     AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
     290        1351 :     Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
     291             :   } else {
     292             :     // Folded compare-and-branch
     293         723 :     switch (Cond[1].getImm()) {
     294           0 :     default:
     295           0 :       llvm_unreachable("Unknown conditional branch!");
     296         181 :     case AArch64::CBZW:
     297             :       Cond[1].setImm(AArch64::CBNZW);
     298             :       break;
     299         174 :     case AArch64::CBNZW:
     300             :       Cond[1].setImm(AArch64::CBZW);
     301             :       break;
     302          52 :     case AArch64::CBZX:
     303             :       Cond[1].setImm(AArch64::CBNZX);
     304             :       break;
     305          51 :     case AArch64::CBNZX:
     306             :       Cond[1].setImm(AArch64::CBZX);
     307             :       break;
     308         112 :     case AArch64::TBZW:
     309             :       Cond[1].setImm(AArch64::TBNZW);
     310             :       break;
     311         111 :     case AArch64::TBNZW:
     312             :       Cond[1].setImm(AArch64::TBZW);
     313             :       break;
     314          24 :     case AArch64::TBZX:
     315             :       Cond[1].setImm(AArch64::TBNZX);
     316             :       break;
     317          18 :     case AArch64::TBNZX:
     318             :       Cond[1].setImm(AArch64::TBZX);
     319             :       break;
     320             :     }
     321             :   }
     322             : 
     323        2074 :   return false;
     324             : }
     325             : 
     326        3848 : unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
     327             :                                         int *BytesRemoved) const {
     328        3848 :   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
     329        3848 :   if (I == MBB.end())
     330             :     return 0;
     331             : 
     332        7696 :   if (!isUncondBranchOpcode(I->getOpcode()) &&
     333             :       !isCondBranchOpcode(I->getOpcode()))
     334             :     return 0;
     335             : 
     336             :   // Remove the branch.
     337        3637 :   I->eraseFromParent();
     338             : 
     339        3637 :   I = MBB.end();
     340             : 
     341        3637 :   if (I == MBB.begin()) {
     342         395 :     if (BytesRemoved)
     343           2 :       *BytesRemoved = 4;
     344         395 :     return 1;
     345             :   }
     346             :   --I;
     347        3242 :   if (!isCondBranchOpcode(I->getOpcode())) {
     348        2132 :     if (BytesRemoved)
     349           6 :       *BytesRemoved = 4;
     350        2132 :     return 1;
     351             :   }
     352             : 
     353             :   // Remove the branch.
     354        1110 :   I->eraseFromParent();
     355        1110 :   if (BytesRemoved)
     356           2 :     *BytesRemoved = 8;
     357             : 
     358             :   return 2;
     359             : }
     360             : 
     361        2702 : void AArch64InstrInfo::instantiateCondBranch(
     362             :     MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
     363             :     ArrayRef<MachineOperand> Cond) const {
     364        2702 :   if (Cond[0].getImm() != -1) {
     365             :     // Regular Bcc
     366        1662 :     BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
     367             :   } else {
     368             :     // Folded compare-and-branch
     369             :     // Note that we use addOperand instead of addReg to keep the flags.
     370             :     const MachineInstrBuilder MIB =
     371        1040 :         BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
     372        1040 :     if (Cond.size() > 3)
     373         357 :       MIB.addImm(Cond[3].getImm());
     374             :     MIB.addMBB(TBB);
     375             :   }
     376        2702 : }
     377             : 
     378        3647 : unsigned AArch64InstrInfo::insertBranch(
     379             :     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
     380             :     ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
     381             :   // Shouldn't be a fall through.
     382             :   assert(TBB && "insertBranch must not be told to insert a fallthrough");
     383             : 
     384        3647 :   if (!FBB) {
     385        3578 :     if (Cond.empty()) // Unconditional branch?
     386         945 :       BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
     387             :     else
     388        2633 :       instantiateCondBranch(MBB, DL, TBB, Cond);
     389             : 
     390        3578 :     if (BytesAdded)
     391           2 :       *BytesAdded = 4;
     392             : 
     393        3578 :     return 1;
     394             :   }
     395             : 
     396             :   // Two-way conditional branch.
     397          69 :   instantiateCondBranch(MBB, DL, TBB, Cond);
     398          69 :   BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
     399             : 
     400          69 :   if (BytesAdded)
     401          10 :     *BytesAdded = 8;
     402             : 
     403             :   return 2;
     404             : }
     405             : 
     406             : // Find the original register that VReg is copied from.
     407         240 : static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
     408         486 :   while (TargetRegisterInfo::isVirtualRegister(VReg)) {
     409         397 :     const MachineInstr *DefMI = MRI.getVRegDef(VReg);
     410             :     if (!DefMI->isFullCopy())
     411         151 :       return VReg;
     412         246 :     VReg = DefMI->getOperand(1).getReg();
     413             :   }
     414             :   return VReg;
     415             : }
     416             : 
     417             : // Determine if VReg is defined by an instruction that can be folded into a
     418             : // csel instruction. If so, return the folded opcode, and the replacement
     419             : // register.
     420         224 : static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
     421             :                                 unsigned *NewVReg = nullptr) {
     422         224 :   VReg = removeCopies(MRI, VReg);
     423         224 :   if (!TargetRegisterInfo::isVirtualRegister(VReg))
     424             :     return 0;
     425             : 
     426         151 :   bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
     427         151 :   const MachineInstr *DefMI = MRI.getVRegDef(VReg);
     428             :   unsigned Opc = 0;
     429             :   unsigned SrcOpNum = 0;
     430         302 :   switch (DefMI->getOpcode()) {
     431           0 :   case AArch64::ADDSXri:
     432             :   case AArch64::ADDSWri:
     433             :     // if NZCV is used, do not fold.
     434           0 :     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
     435             :       return 0;
     436             :     // fall-through to ADDXri and ADDWri.
     437             :     LLVM_FALLTHROUGH;
     438             :   case AArch64::ADDXri:
     439             :   case AArch64::ADDWri:
     440             :     // add x, 1 -> csinc.
     441         186 :     if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
     442          10 :         DefMI->getOperand(3).getImm() != 0)
     443             :       return 0;
     444             :     SrcOpNum = 1;
     445          10 :     Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
     446             :     break;
     447             : 
     448           8 :   case AArch64::ORNXrr:
     449             :   case AArch64::ORNWrr: {
     450             :     // not x -> csinv, represented as orn dst, xzr, src.
     451           8 :     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
     452           8 :     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
     453             :       return 0;
     454             :     SrcOpNum = 2;
     455           8 :     Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
     456             :     break;
     457             :   }
     458             : 
     459           8 :   case AArch64::SUBSXrr:
     460             :   case AArch64::SUBSWrr:
     461             :     // if NZCV is used, do not fold.
     462           8 :     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
     463             :       return 0;
     464             :     // fall-through to SUBXrr and SUBWrr.
     465             :     LLVM_FALLTHROUGH;
     466             :   case AArch64::SUBXrr:
     467             :   case AArch64::SUBWrr: {
     468             :     // neg x -> csneg, represented as sub dst, xzr, src.
     469           8 :     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
     470           8 :     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
     471             :       return 0;
     472             :     SrcOpNum = 2;
     473           8 :     Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
     474             :     break;
     475             :   }
     476             :   default:
     477             :     return 0;
     478             :   }
     479             :   assert(Opc && SrcOpNum && "Missing parameters");
     480             : 
     481          26 :   if (NewVReg)
     482          26 :     *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
     483             :   return Opc;
     484             : }
     485             : 
     486          97 : bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
     487             :                                        ArrayRef<MachineOperand> Cond,
     488             :                                        unsigned TrueReg, unsigned FalseReg,
     489             :                                        int &CondCycles, int &TrueCycles,
     490             :                                        int &FalseCycles) const {
     491             :   // Check register classes.
     492          97 :   const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
     493             :   const TargetRegisterClass *RC =
     494         194 :       RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
     495          97 :   if (!RC)
     496             :     return false;
     497             : 
     498             :   // Expanding cbz/tbz requires an extra cycle of latency on the condition.
     499          97 :   unsigned ExtraCondLat = Cond.size() != 1;
     500             : 
     501             :   // GPRs are handled by csel.
     502             :   // FIXME: Fold in x+1, -x, and ~x when applicable.
     503         194 :   if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
     504          41 :       AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
     505             :     // Single-cycle csel, csinc, csinv, and csneg.
     506          94 :     CondCycles = 1 + ExtraCondLat;
     507          94 :     TrueCycles = FalseCycles = 1;
     508          94 :     if (canFoldIntoCSel(MRI, TrueReg))
     509           6 :       TrueCycles = 0;
     510          88 :     else if (canFoldIntoCSel(MRI, FalseReg))
     511           7 :       FalseCycles = 0;
     512          94 :     return true;
     513             :   }
     514             : 
     515             :   // Scalar floating point is handled by fcsel.
     516             :   // FIXME: Form fabs, fmin, and fmax when applicable.
     517           6 :   if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
     518           3 :       AArch64::FPR32RegClass.hasSubClassEq(RC)) {
     519           0 :     CondCycles = 5 + ExtraCondLat;
     520           0 :     TrueCycles = FalseCycles = 2;
     521           0 :     return true;
     522             :   }
     523             : 
     524             :   // Can't do vectors.
     525             :   return false;
     526             : }
     527             : 
     528          24 : void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
     529             :                                     MachineBasicBlock::iterator I,
     530             :                                     const DebugLoc &DL, unsigned DstReg,
     531             :                                     ArrayRef<MachineOperand> Cond,
     532             :                                     unsigned TrueReg, unsigned FalseReg) const {
     533          24 :   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
     534             : 
     535             :   // Parse the condition code, see parseCondBranch() above.
     536             :   AArch64CC::CondCode CC;
     537          24 :   switch (Cond.size()) {
     538           0 :   default:
     539           0 :     llvm_unreachable("Unknown condition opcode in Cond");
     540          15 :   case 1: // b.cc
     541          15 :     CC = AArch64CC::CondCode(Cond[0].getImm());
     542          15 :     break;
     543           5 :   case 3: { // cbz/cbnz
     544             :     // We must insert a compare against 0.
     545             :     bool Is64Bit;
     546           5 :     switch (Cond[1].getImm()) {
     547           0 :     default:
     548           0 :       llvm_unreachable("Unknown branch opcode in Cond");
     549             :     case AArch64::CBZW:
     550             :       Is64Bit = false;
     551             :       CC = AArch64CC::EQ;
     552             :       break;
     553             :     case AArch64::CBZX:
     554             :       Is64Bit = true;
     555             :       CC = AArch64CC::EQ;
     556             :       break;
     557             :     case AArch64::CBNZW:
     558             :       Is64Bit = false;
     559             :       CC = AArch64CC::NE;
     560             :       break;
     561             :     case AArch64::CBNZX:
     562             :       Is64Bit = true;
     563             :       CC = AArch64CC::NE;
     564             :       break;
     565             :     }
     566           5 :     unsigned SrcReg = Cond[2].getReg();
     567           5 :     if (Is64Bit) {
     568             :       // cmp reg, #0 is actually subs xzr, reg, #0.
     569           2 :       MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
     570           6 :       BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
     571           2 :           .addReg(SrcReg)
     572             :           .addImm(0)
     573             :           .addImm(0);
     574             :     } else {
     575           3 :       MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
     576           9 :       BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
     577           3 :           .addReg(SrcReg)
     578             :           .addImm(0)
     579             :           .addImm(0);
     580             :     }
     581             :     break;
     582             :   }
     583           4 :   case 4: { // tbz/tbnz
     584             :     // We must insert a tst instruction.
     585           4 :     switch (Cond[1].getImm()) {
     586           0 :     default:
     587           0 :       llvm_unreachable("Unknown branch opcode in Cond");
     588             :     case AArch64::TBZW:
     589             :     case AArch64::TBZX:
     590             :       CC = AArch64CC::EQ;
     591             :       break;
     592           2 :     case AArch64::TBNZW:
     593             :     case AArch64::TBNZX:
     594             :       CC = AArch64CC::NE;
     595           2 :       break;
     596             :     }
     597             :     // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
     598           4 :     if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
     599           6 :       BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
     600           2 :           .addReg(Cond[2].getReg())
     601             :           .addImm(
     602           2 :               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
     603             :     else
     604           6 :       BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
     605           2 :           .addReg(Cond[2].getReg())
     606             :           .addImm(
     607           2 :               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
     608             :     break;
     609             :   }
     610             :   }
     611             : 
     612             :   unsigned Opc = 0;
     613             :   const TargetRegisterClass *RC = nullptr;
     614             :   bool TryFold = false;
     615          24 :   if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
     616             :     RC = &AArch64::GPR64RegClass;
     617             :     Opc = AArch64::CSELXr;
     618             :     TryFold = true;
     619          14 :   } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
     620             :     RC = &AArch64::GPR32RegClass;
     621             :     Opc = AArch64::CSELWr;
     622             :     TryFold = true;
     623           0 :   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
     624             :     RC = &AArch64::FPR64RegClass;
     625             :     Opc = AArch64::FCSELDrrr;
     626           0 :   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
     627             :     RC = &AArch64::FPR32RegClass;
     628             :     Opc = AArch64::FCSELSrrr;
     629             :   }
     630             :   assert(RC && "Unsupported regclass");
     631             : 
     632             :   // Try folding simple instructions into the csel.
     633          24 :   if (TryFold) {
     634          24 :     unsigned NewVReg = 0;
     635          24 :     unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
     636          24 :     if (FoldedOpc) {
     637             :       // The folded opcodes csinc, csinc and csneg apply the operation to
     638             :       // FalseReg, so we need to invert the condition.
     639             :       CC = AArch64CC::getInvertedCondCode(CC);
     640             :       TrueReg = FalseReg;
     641             :     } else
     642          18 :       FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
     643             : 
     644             :     // Fold the operation. Leave any dead instructions for DCE to clean up.
     645          24 :     if (FoldedOpc) {
     646          13 :       FalseReg = NewVReg;
     647             :       Opc = FoldedOpc;
     648             :       // The extends the live range of NewVReg.
     649          13 :       MRI.clearKillFlags(NewVReg);
     650             :     }
     651             :   }
     652             : 
     653             :   // Pull all virtual register into the appropriate class.
     654          24 :   MRI.constrainRegClass(TrueReg, RC);
     655          24 :   MRI.constrainRegClass(FalseReg, RC);
     656             : 
     657             :   // Insert the csel.
     658          48 :   BuildMI(MBB, I, DL, get(Opc), DstReg)
     659          24 :       .addReg(TrueReg)
     660          24 :       .addReg(FalseReg)
     661          24 :       .addImm(CC);
     662          24 : }
     663             : 
     664             : /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an  ORRxx.
     665             : static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
     666           5 :   uint64_t Imm = MI.getOperand(1).getImm();
     667           5 :   uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
     668             :   uint64_t Encoding;
     669           5 :   return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
     670             : }
     671             : 
     672             : // FIXME: this implementation should be micro-architecture dependent, so a
     673             : // micro-architecture target hook should be introduced here in future.
     674       13479 : bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
     675       13479 :   if (!Subtarget.hasCustomCheapAsMoveHandling())
     676       12265 :     return MI.isAsCheapAsAMove();
     677             : 
     678        1214 :   const unsigned Opcode = MI.getOpcode();
     679             : 
     680             :   // Firstly, check cases gated by features.
     681             : 
     682        1214 :   if (Subtarget.hasZeroCycleZeroingFP()) {
     683        2108 :     if (Opcode == AArch64::FMOVH0 ||
     684        1054 :         Opcode == AArch64::FMOVS0 ||
     685             :         Opcode == AArch64::FMOVD0)
     686             :       return true;
     687             :   }
     688             : 
     689        1190 :   if (Subtarget.hasZeroCycleZeroingGP()) {
     690         373 :     if (Opcode == TargetOpcode::COPY &&
     691           0 :         (MI.getOperand(1).getReg() == AArch64::WZR ||
     692             :          MI.getOperand(1).getReg() == AArch64::XZR))
     693             :       return true;
     694             :   }
     695             : 
     696             :   // Secondly, check cases specific to sub-targets.
     697             : 
     698        1190 :   if (Subtarget.hasExynosCheapAsMoveHandling()) {
     699         657 :     if (isExynosResetFast(MI) || isExynosShiftLeftFast(MI))
     700          37 :       return true;
     701             :     else
     702         620 :       return MI.isAsCheapAsAMove();
     703             :   }
     704             : 
     705             :   // Finally, check generic cases.
     706             : 
     707         533 :   switch (Opcode) {
     708             :   default:
     709             :     return false;
     710             : 
     711             :   // add/sub on register without shift
     712          18 :   case AArch64::ADDWri:
     713             :   case AArch64::ADDXri:
     714             :   case AArch64::SUBWri:
     715             :   case AArch64::SUBXri:
     716          18 :     return (MI.getOperand(3).getImm() == 0);
     717             : 
     718             :   // logical ops on immediate
     719           0 :   case AArch64::ANDWri:
     720             :   case AArch64::ANDXri:
     721             :   case AArch64::EORWri:
     722             :   case AArch64::EORXri:
     723             :   case AArch64::ORRWri:
     724             :   case AArch64::ORRXri:
     725           0 :     return true;
     726             : 
     727             :   // logical ops on register without shift
     728           0 :   case AArch64::ANDWrr:
     729             :   case AArch64::ANDXrr:
     730             :   case AArch64::BICWrr:
     731             :   case AArch64::BICXrr:
     732             :   case AArch64::EONWrr:
     733             :   case AArch64::EONXrr:
     734             :   case AArch64::EORWrr:
     735             :   case AArch64::EORXrr:
     736             :   case AArch64::ORNWrr:
     737             :   case AArch64::ORNXrr:
     738             :   case AArch64::ORRWrr:
     739             :   case AArch64::ORRXrr:
     740           0 :     return true;
     741             : 
     742             :   // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
     743             :   // ORRXri, it is as cheap as MOV
     744             :   case AArch64::MOVi32imm:
     745           5 :     return canBeExpandedToORR(MI, 32);
     746             :   case AArch64::MOVi64imm:
     747           0 :     return canBeExpandedToORR(MI, 64);
     748             :   }
     749             : 
     750             :   llvm_unreachable("Unknown opcode to check as cheap as a move!");
     751             : }
     752             : 
     753        1259 : bool AArch64InstrInfo::isExynosResetFast(const MachineInstr &MI) const {
     754             :   unsigned Reg, Imm, Shift;
     755             : 
     756        2518 :   switch (MI.getOpcode()) {
     757             :   default:
     758             :     return false;
     759             : 
     760             :   // MOV Rd, SP
     761         322 :   case AArch64::ADDWri:
     762             :   case AArch64::ADDXri:
     763         644 :     if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
     764             :       return false;
     765             : 
     766         252 :     Reg = MI.getOperand(1).getReg();
     767         252 :     Imm = MI.getOperand(2).getImm();
     768         252 :     return ((Reg == AArch64::WSP || Reg == AArch64::SP) && Imm == 0);
     769             : 
     770             :   // Literal
     771           3 :   case AArch64::ADR:
     772             :   case AArch64::ADRP:
     773           3 :     return true;
     774             : 
     775             :   // MOVI Vd, #0
     776          47 :   case AArch64::MOVID:
     777             :   case AArch64::MOVIv8b_ns:
     778             :   case AArch64::MOVIv2d_ns:
     779             :   case AArch64::MOVIv16b_ns:
     780          47 :     Imm = MI.getOperand(1).getImm();
     781          47 :     return (Imm == 0);
     782             : 
     783             :   // MOVI Vd, #0
     784           0 :   case AArch64::MOVIv2i32:
     785             :   case AArch64::MOVIv4i16:
     786             :   case AArch64::MOVIv4i32:
     787             :   case AArch64::MOVIv8i16:
     788           0 :     Imm = MI.getOperand(1).getImm();
     789           0 :     Shift = MI.getOperand(2).getImm();
     790           0 :     return (Imm == 0 && Shift == 0);
     791             : 
     792             :   // MOV Rd, Imm
     793           0 :   case AArch64::MOVNWi:
     794             :   case AArch64::MOVNXi:
     795             : 
     796             :   // MOV Rd, Imm
     797             :   case AArch64::MOVZWi:
     798             :   case AArch64::MOVZXi:
     799           0 :     return true;
     800             : 
     801             :   // MOV Rd, Imm
     802           7 :   case AArch64::ORRWri:
     803             :   case AArch64::ORRXri:
     804          14 :     if (!MI.getOperand(1).isReg())
     805             :       return false;
     806             : 
     807           7 :     Reg = MI.getOperand(1).getReg();
     808           7 :     Imm = MI.getOperand(2).getImm();
     809           7 :     return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Imm == 0);
     810             : 
     811             :   // MOV Rd, Rm
     812          72 :   case AArch64::ORRWrs:
     813             :   case AArch64::ORRXrs:
     814         144 :     if (!MI.getOperand(1).isReg())
     815             :       return false;
     816             : 
     817          72 :     Reg = MI.getOperand(1).getReg();
     818          72 :     Imm = MI.getOperand(3).getImm();
     819             :     Shift = AArch64_AM::getShiftValue(Imm);
     820          72 :     return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Shift == 0);
     821             :   }
     822             : }
     823             : 
     824        1642 : bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr &MI) const {
     825             :   unsigned Imm, Shift;
     826             :   AArch64_AM::ShiftExtendType Ext;
     827             : 
     828        3284 :   switch (MI.getOpcode()) {
     829             :   default:
     830             :     return false;
     831             : 
     832             :   // WriteI
     833         358 :   case AArch64::ADDSWri:
     834             :   case AArch64::ADDSXri:
     835             :   case AArch64::ADDWri:
     836             :   case AArch64::ADDXri:
     837             :   case AArch64::SUBSWri:
     838             :   case AArch64::SUBSXri:
     839             :   case AArch64::SUBWri:
     840             :   case AArch64::SUBXri:
     841         358 :     return true;
     842             : 
     843             :   // WriteISReg
     844         422 :   case AArch64::ADDSWrs:
     845             :   case AArch64::ADDSXrs:
     846             :   case AArch64::ADDWrs:
     847             :   case AArch64::ADDXrs:
     848             :   case AArch64::ANDSWrs:
     849             :   case AArch64::ANDSXrs:
     850             :   case AArch64::ANDWrs:
     851             :   case AArch64::ANDXrs:
     852             :   case AArch64::BICSWrs:
     853             :   case AArch64::BICSXrs:
     854             :   case AArch64::BICWrs:
     855             :   case AArch64::BICXrs:
     856             :   case AArch64::EONWrs:
     857             :   case AArch64::EONXrs:
     858             :   case AArch64::EORWrs:
     859             :   case AArch64::EORXrs:
     860             :   case AArch64::ORNWrs:
     861             :   case AArch64::ORNXrs:
     862             :   case AArch64::ORRWrs:
     863             :   case AArch64::ORRXrs:
     864             :   case AArch64::SUBSWrs:
     865             :   case AArch64::SUBSXrs:
     866             :   case AArch64::SUBWrs:
     867             :   case AArch64::SUBXrs:
     868         422 :     Imm = MI.getOperand(3).getImm();
     869             :     Shift = AArch64_AM::getShiftValue(Imm);
     870             :     Ext = AArch64_AM::getShiftType(Imm);
     871         422 :     return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
     872             : 
     873             :   // WriteIEReg
     874          50 :   case AArch64::ADDSWrx:
     875             :   case AArch64::ADDSXrx:
     876             :   case AArch64::ADDSXrx64:
     877             :   case AArch64::ADDWrx:
     878             :   case AArch64::ADDXrx:
     879             :   case AArch64::ADDXrx64:
     880             :   case AArch64::SUBSWrx:
     881             :   case AArch64::SUBSXrx:
     882             :   case AArch64::SUBSXrx64:
     883             :   case AArch64::SUBWrx:
     884             :   case AArch64::SUBXrx:
     885             :   case AArch64::SUBXrx64:
     886          50 :     Imm = MI.getOperand(3).getImm();
     887             :     Shift = AArch64_AM::getArithShiftValue(Imm);
     888             :     Ext = AArch64_AM::getArithExtendType(Imm);
     889          50 :     return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX));
     890             : 
     891         185 :   case AArch64::PRFMroW:
     892             :   case AArch64::PRFMroX:
     893             : 
     894             :   // WriteLDIdx
     895             :   case AArch64::LDRBBroW:
     896             :   case AArch64::LDRBBroX:
     897             :   case AArch64::LDRHHroW:
     898             :   case AArch64::LDRHHroX:
     899             :   case AArch64::LDRSBWroW:
     900             :   case AArch64::LDRSBWroX:
     901             :   case AArch64::LDRSBXroW:
     902             :   case AArch64::LDRSBXroX:
     903             :   case AArch64::LDRSHWroW:
     904             :   case AArch64::LDRSHWroX:
     905             :   case AArch64::LDRSHXroW:
     906             :   case AArch64::LDRSHXroX:
     907             :   case AArch64::LDRSWroW:
     908             :   case AArch64::LDRSWroX:
     909             :   case AArch64::LDRWroW:
     910             :   case AArch64::LDRWroX:
     911             :   case AArch64::LDRXroW:
     912             :   case AArch64::LDRXroX:
     913             : 
     914             :   case AArch64::LDRBroW:
     915             :   case AArch64::LDRBroX:
     916             :   case AArch64::LDRDroW:
     917             :   case AArch64::LDRDroX:
     918             :   case AArch64::LDRHroW:
     919             :   case AArch64::LDRHroX:
     920             :   case AArch64::LDRSroW:
     921             :   case AArch64::LDRSroX:
     922             : 
     923             :   // WriteSTIdx
     924             :   case AArch64::STRBBroW:
     925             :   case AArch64::STRBBroX:
     926             :   case AArch64::STRHHroW:
     927             :   case AArch64::STRHHroX:
     928             :   case AArch64::STRWroW:
     929             :   case AArch64::STRWroX:
     930             :   case AArch64::STRXroW:
     931             :   case AArch64::STRXroX:
     932             : 
     933             :   case AArch64::STRBroW:
     934             :   case AArch64::STRBroX:
     935             :   case AArch64::STRDroW:
     936             :   case AArch64::STRDroX:
     937             :   case AArch64::STRHroW:
     938             :   case AArch64::STRHroX:
     939             :   case AArch64::STRSroW:
     940             :   case AArch64::STRSroX:
     941         185 :     Imm = MI.getOperand(3).getImm();
     942             :     Ext = AArch64_AM::getMemExtendType(Imm);
     943         185 :     return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
     944             :   }
     945             : }
     946             : 
     947         112 : bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
     948         224 :   switch (MI.getOpcode()) {
     949             :   default:
     950             :     return false;
     951             : 
     952           6 :   case AArch64::ADDWrs:
     953             :   case AArch64::ADDXrs:
     954             :   case AArch64::ADDSWrs:
     955             :   case AArch64::ADDSXrs: {
     956           6 :     unsigned Imm = MI.getOperand(3).getImm();
     957             :     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
     958           6 :     if (ShiftVal == 0)
     959             :       return true;
     960           0 :     return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
     961             :   }
     962             : 
     963          27 :   case AArch64::ADDWrx:
     964             :   case AArch64::ADDXrx:
     965             :   case AArch64::ADDXrx64:
     966             :   case AArch64::ADDSWrx:
     967             :   case AArch64::ADDSXrx:
     968             :   case AArch64::ADDSXrx64: {
     969          27 :     unsigned Imm = MI.getOperand(3).getImm();
     970          27 :     switch (AArch64_AM::getArithExtendType(Imm)) {
     971             :     default:
     972             :       return false;
     973             :     case AArch64_AM::UXTB:
     974             :     case AArch64_AM::UXTH:
     975             :     case AArch64_AM::UXTW:
     976             :     case AArch64_AM::UXTX:
     977          27 :       return AArch64_AM::getArithShiftValue(Imm) <= 4;
     978             :     }
     979             :   }
     980             : 
     981          10 :   case AArch64::SUBWrs:
     982             :   case AArch64::SUBSWrs: {
     983          10 :     unsigned Imm = MI.getOperand(3).getImm();
     984             :     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
     985          10 :     return ShiftVal == 0 ||
     986           0 :            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
     987             :   }
     988             : 
     989           0 :   case AArch64::SUBXrs:
     990             :   case AArch64::SUBSXrs: {
     991           0 :     unsigned Imm = MI.getOperand(3).getImm();
     992             :     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
     993           0 :     return ShiftVal == 0 ||
     994           0 :            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
     995             :   }
     996             : 
     997          20 :   case AArch64::SUBWrx:
     998             :   case AArch64::SUBXrx:
     999             :   case AArch64::SUBXrx64:
    1000             :   case AArch64::SUBSWrx:
    1001             :   case AArch64::SUBSXrx:
    1002             :   case AArch64::SUBSXrx64: {
    1003          20 :     unsigned Imm = MI.getOperand(3).getImm();
    1004          20 :     switch (AArch64_AM::getArithExtendType(Imm)) {
    1005             :     default:
    1006             :       return false;
    1007             :     case AArch64_AM::UXTB:
    1008             :     case AArch64_AM::UXTH:
    1009             :     case AArch64_AM::UXTW:
    1010             :     case AArch64_AM::UXTX:
    1011          20 :       return AArch64_AM::getArithShiftValue(Imm) == 0;
    1012             :     }
    1013             :   }
    1014             : 
    1015          49 :   case AArch64::LDRBBroW:
    1016             :   case AArch64::LDRBBroX:
    1017             :   case AArch64::LDRBroW:
    1018             :   case AArch64::LDRBroX:
    1019             :   case AArch64::LDRDroW:
    1020             :   case AArch64::LDRDroX:
    1021             :   case AArch64::LDRHHroW:
    1022             :   case AArch64::LDRHHroX:
    1023             :   case AArch64::LDRHroW:
    1024             :   case AArch64::LDRHroX:
    1025             :   case AArch64::LDRQroW:
    1026             :   case AArch64::LDRQroX:
    1027             :   case AArch64::LDRSBWroW:
    1028             :   case AArch64::LDRSBWroX:
    1029             :   case AArch64::LDRSBXroW:
    1030             :   case AArch64::LDRSBXroX:
    1031             :   case AArch64::LDRSHWroW:
    1032             :   case AArch64::LDRSHWroX:
    1033             :   case AArch64::LDRSHXroW:
    1034             :   case AArch64::LDRSHXroX:
    1035             :   case AArch64::LDRSWroW:
    1036             :   case AArch64::LDRSWroX:
    1037             :   case AArch64::LDRSroW:
    1038             :   case AArch64::LDRSroX:
    1039             :   case AArch64::LDRWroW:
    1040             :   case AArch64::LDRWroX:
    1041             :   case AArch64::LDRXroW:
    1042             :   case AArch64::LDRXroX:
    1043             :   case AArch64::PRFMroW:
    1044             :   case AArch64::PRFMroX:
    1045             :   case AArch64::STRBBroW:
    1046             :   case AArch64::STRBBroX:
    1047             :   case AArch64::STRBroW:
    1048             :   case AArch64::STRBroX:
    1049             :   case AArch64::STRDroW:
    1050             :   case AArch64::STRDroX:
    1051             :   case AArch64::STRHHroW:
    1052             :   case AArch64::STRHHroX:
    1053             :   case AArch64::STRHroW:
    1054             :   case AArch64::STRHroX:
    1055             :   case AArch64::STRQroW:
    1056             :   case AArch64::STRQroX:
    1057             :   case AArch64::STRSroW:
    1058             :   case AArch64::STRSroX:
    1059             :   case AArch64::STRWroW:
    1060             :   case AArch64::STRWroX:
    1061             :   case AArch64::STRXroW:
    1062             :   case AArch64::STRXroX: {
    1063          49 :     unsigned IsSigned = MI.getOperand(3).getImm();
    1064          49 :     return !IsSigned;
    1065             :   }
    1066             :   }
    1067             : }
    1068             : 
    1069      115585 : bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
    1070             :                                              unsigned &SrcReg, unsigned &DstReg,
    1071             :                                              unsigned &SubIdx) const {
    1072      231170 :   switch (MI.getOpcode()) {
    1073             :   default:
    1074             :     return false;
    1075         641 :   case AArch64::SBFMXri: // aka sxtw
    1076             :   case AArch64::UBFMXri: // aka uxtw
    1077             :     // Check for the 32 -> 64 bit extension case, these instructions can do
    1078             :     // much more.
    1079         641 :     if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
    1080             :       return false;
    1081             :     // This is a signed or unsigned 32 -> 64 bit extension.
    1082          37 :     SrcReg = MI.getOperand(1).getReg();
    1083          37 :     DstReg = MI.getOperand(0).getReg();
    1084          37 :     SubIdx = AArch64::sub_32;
    1085          37 :     return true;
    1086             :   }
    1087             : }
    1088             : 
    1089        9540 : bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
    1090             :     MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
    1091             :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    1092        9540 :   unsigned BaseRegA = 0, BaseRegB = 0;
    1093        9540 :   int64_t OffsetA = 0, OffsetB = 0;
    1094        9540 :   unsigned WidthA = 0, WidthB = 0;
    1095             : 
    1096             :   assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
    1097             :   assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
    1098             : 
    1099       28614 :   if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
    1100       28608 :       MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
    1101        3032 :     return false;
    1102             : 
    1103             :   // Retrieve the base register, offset from the base register and width. Width
    1104             :   // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
    1105             :   // base registers are identical, and the offset of a lower memory access +
    1106             :   // the width doesn't overlap the offset of a higher memory access,
    1107             :   // then the memory accesses are different.
    1108       11104 :   if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
    1109        4596 :       getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
    1110        4175 :     if (BaseRegA == BaseRegB) {
    1111        2415 :       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
    1112        2415 :       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
    1113        2415 :       int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
    1114        2415 :       if (LowOffset + LowWidth <= HighOffset)
    1115        2154 :         return true;
    1116             :     }
    1117             :   }
    1118             :   return false;
    1119             : }
    1120             : 
    1121             : /// analyzeCompare - For a comparison instruction, return the source registers
    1122             : /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
    1123             : /// Return true if the comparison instruction can be analyzed.
    1124        1595 : bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
    1125             :                                       unsigned &SrcReg2, int &CmpMask,
    1126             :                                       int &CmpValue) const {
    1127             :   // The first operand can be a frame index where we'd normally expect a
    1128             :   // register.
    1129             :   assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
    1130        3190 :   if (!MI.getOperand(1).isReg())
    1131             :     return false;
    1132             : 
    1133        3188 :   switch (MI.getOpcode()) {
    1134             :   default:
    1135             :     break;
    1136         904 :   case AArch64::SUBSWrr:
    1137             :   case AArch64::SUBSWrs:
    1138             :   case AArch64::SUBSWrx:
    1139             :   case AArch64::SUBSXrr:
    1140             :   case AArch64::SUBSXrs:
    1141             :   case AArch64::SUBSXrx:
    1142             :   case AArch64::ADDSWrr:
    1143             :   case AArch64::ADDSWrs:
    1144             :   case AArch64::ADDSWrx:
    1145             :   case AArch64::ADDSXrr:
    1146             :   case AArch64::ADDSXrs:
    1147             :   case AArch64::ADDSXrx:
    1148             :     // Replace SUBSWrr with SUBWrr if NZCV is not used.
    1149         904 :     SrcReg = MI.getOperand(1).getReg();
    1150         904 :     SrcReg2 = MI.getOperand(2).getReg();
    1151         904 :     CmpMask = ~0;
    1152         904 :     CmpValue = 0;
    1153         904 :     return true;
    1154         650 :   case AArch64::SUBSWri:
    1155             :   case AArch64::ADDSWri:
    1156             :   case AArch64::SUBSXri:
    1157             :   case AArch64::ADDSXri:
    1158         650 :     SrcReg = MI.getOperand(1).getReg();
    1159         650 :     SrcReg2 = 0;
    1160         650 :     CmpMask = ~0;
    1161             :     // FIXME: In order to convert CmpValue to 0 or 1
    1162         650 :     CmpValue = MI.getOperand(2).getImm() != 0;
    1163         650 :     return true;
    1164          40 :   case AArch64::ANDSWri:
    1165             :   case AArch64::ANDSXri:
    1166             :     // ANDS does not use the same encoding scheme as the others xxxS
    1167             :     // instructions.
    1168          40 :     SrcReg = MI.getOperand(1).getReg();
    1169          40 :     SrcReg2 = 0;
    1170          40 :     CmpMask = ~0;
    1171             :     // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
    1172             :     // while the type of CmpValue is int. When converting uint64_t to int,
    1173             :     // the high 32 bits of uint64_t will be lost.
    1174             :     // In fact it causes a bug in spec2006-483.xalancbmk
    1175             :     // CmpValue is only used to compare with zero in OptimizeCompareInstr
    1176          40 :     CmpValue = AArch64_AM::decodeLogicalImmediate(
    1177          40 :                    MI.getOperand(2).getImm(),
    1178          40 :                    MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
    1179          40 :     return true;
    1180             :   }
    1181             : 
    1182             :   return false;
    1183             : }
    1184             : 
    1185         502 : static bool UpdateOperandRegClass(MachineInstr &Instr) {
    1186         502 :   MachineBasicBlock *MBB = Instr.getParent();
    1187             :   assert(MBB && "Can't get MachineBasicBlock here");
    1188         502 :   MachineFunction *MF = MBB->getParent();
    1189             :   assert(MF && "Can't get MachineFunction here");
    1190         502 :   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
    1191         502 :   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
    1192         502 :   MachineRegisterInfo *MRI = &MF->getRegInfo();
    1193             : 
    1194        2224 :   for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
    1195             :        ++OpIdx) {
    1196        1722 :     MachineOperand &MO = Instr.getOperand(OpIdx);
    1197             :     const TargetRegisterClass *OpRegCstraints =
    1198        1722 :         Instr.getRegClassConstraint(OpIdx, TII, TRI);
    1199             : 
    1200             :     // If there's no constraint, there's nothing to do.
    1201        1722 :     if (!OpRegCstraints)
    1202             :       continue;
    1203             :     // If the operand is a frame index, there's nothing to do here.
    1204             :     // A frame index operand will resolve correctly during PEI.
    1205        1345 :     if (MO.isFI())
    1206             :       continue;
    1207             : 
    1208             :     assert(MO.isReg() &&
    1209             :            "Operand has register constraints without being a register!");
    1210             : 
    1211        1343 :     unsigned Reg = MO.getReg();
    1212        1343 :     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
    1213           0 :       if (!OpRegCstraints->contains(Reg))
    1214             :         return false;
    1215        2803 :     } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
    1216         117 :                !MRI->constrainRegClass(Reg, OpRegCstraints))
    1217             :       return false;
    1218             :   }
    1219             : 
    1220             :   return true;
    1221             : }
    1222             : 
    1223             : /// Return the opcode that does not set flags when possible - otherwise
    1224             : /// return the original opcode. The caller is responsible to do the actual
    1225             : /// substitution and legality checking.
    1226        1139 : static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
    1227             :   // Don't convert all compare instructions, because for some the zero register
    1228             :   // encoding becomes the sp register.
    1229             :   bool MIDefinesZeroReg = false;
    1230        2278 :   if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
    1231             :     MIDefinesZeroReg = true;
    1232             : 
    1233        2278 :   switch (MI.getOpcode()) {
    1234           0 :   default:
    1235           0 :     return MI.getOpcode();
    1236             :   case AArch64::ADDSWrr:
    1237             :     return AArch64::ADDWrr;
    1238           0 :   case AArch64::ADDSWri:
    1239           0 :     return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
    1240           0 :   case AArch64::ADDSWrs:
    1241           0 :     return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
    1242           0 :   case AArch64::ADDSWrx:
    1243           0 :     return AArch64::ADDWrx;
    1244           0 :   case AArch64::ADDSXrr:
    1245           0 :     return AArch64::ADDXrr;
    1246           0 :   case AArch64::ADDSXri:
    1247           0 :     return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
    1248           0 :   case AArch64::ADDSXrs:
    1249           0 :     return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
    1250           0 :   case AArch64::ADDSXrx:
    1251           0 :     return AArch64::ADDXrx;
    1252         464 :   case AArch64::SUBSWrr:
    1253         464 :     return AArch64::SUBWrr;
    1254         137 :   case AArch64::SUBSWri:
    1255         137 :     return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
    1256          23 :   case AArch64::SUBSWrs:
    1257          23 :     return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
    1258           8 :   case AArch64::SUBSWrx:
    1259           8 :     return AArch64::SUBWrx;
    1260         255 :   case AArch64::SUBSXrr:
    1261         255 :     return AArch64::SUBXrr;
    1262         228 :   case AArch64::SUBSXri:
    1263         228 :     return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
    1264          12 :   case AArch64::SUBSXrs:
    1265          12 :     return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
    1266          12 :   case AArch64::SUBSXrx:
    1267          12 :     return AArch64::SUBXrx;
    1268             :   }
    1269             : }
    1270             : 
    1271             : enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
    1272             : 
    1273             : /// True when condition flags are accessed (either by writing or reading)
    1274             : /// on the instruction trace starting at From and ending at To.
    1275             : ///
    1276             : /// Note: If From and To are from different blocks it's assumed CC are accessed
    1277             : ///       on the path.
    1278          35 : static bool areCFlagsAccessedBetweenInstrs(
    1279             :     MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
    1280             :     const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
    1281             :   // Early exit if To is at the beginning of the BB.
    1282          70 :   if (To == To->getParent()->begin())
    1283             :     return true;
    1284             : 
    1285             :   // Check whether the instructions are in the same basic block
    1286             :   // If not, assume the condition flags might get modified somewhere.
    1287          35 :   if (To->getParent() != From->getParent())
    1288             :     return true;
    1289             : 
    1290             :   // From must be above To.
    1291             :   assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
    1292             :                       [From](MachineInstr &MI) {
    1293             :                         return MI.getIterator() == From;
    1294             :                       }) != To->getParent()->rend());
    1295             : 
    1296             :   // We iterate backward starting \p To until we hit \p From.
    1297         129 :   for (--To; To != From; --To) {
    1298             :     const MachineInstr &Instr = *To;
    1299             : 
    1300          94 :     if (((AccessToCheck & AK_Write) &&
    1301          94 :          Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
    1302         188 :         ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
    1303           0 :       return true;
    1304             :   }
    1305             :   return false;
    1306             : }
    1307             : 
    1308             : /// Try to optimize a compare instruction. A compare instruction is an
    1309             : /// instruction which produces AArch64::NZCV. It can be truly compare
    1310             : /// instruction
    1311             : /// when there are no uses of its destination register.
    1312             : ///
    1313             : /// The following steps are tried in order:
    1314             : /// 1. Convert CmpInstr into an unconditional version.
    1315             : /// 2. Remove CmpInstr if above there is an instruction producing a needed
    1316             : ///    condition code or an instruction which can be converted into such an
    1317             : ///    instruction.
    1318             : ///    Only comparison with zero is supported.
    1319        1588 : bool AArch64InstrInfo::optimizeCompareInstr(
    1320             :     MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
    1321             :     int CmpValue, const MachineRegisterInfo *MRI) const {
    1322             :   assert(CmpInstr.getParent());
    1323             :   assert(MRI);
    1324             : 
    1325             :   // Replace SUBSWrr with SUBWrr if NZCV is not used.
    1326        1588 :   int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
    1327        1588 :   if (DeadNZCVIdx != -1) {
    1328         988 :     if (CmpInstr.definesRegister(AArch64::WZR) ||
    1329             :         CmpInstr.definesRegister(AArch64::XZR)) {
    1330           0 :       CmpInstr.eraseFromParent();
    1331           0 :       return true;
    1332             :     }
    1333         494 :     unsigned Opc = CmpInstr.getOpcode();
    1334         494 :     unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
    1335         494 :     if (NewOpc == Opc)
    1336             :       return false;
    1337         494 :     const MCInstrDesc &MCID = get(NewOpc);
    1338             :     CmpInstr.setDesc(MCID);
    1339         494 :     CmpInstr.RemoveOperand(DeadNZCVIdx);
    1340         494 :     bool succeeded = UpdateOperandRegClass(CmpInstr);
    1341             :     (void)succeeded;
    1342             :     assert(succeeded && "Some operands reg class are incompatible!");
    1343         494 :     return true;
    1344             :   }
    1345             : 
    1346             :   // Continue only if we have a "ri" where immediate is zero.
    1347             :   // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
    1348             :   // function.
    1349             :   assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
    1350        1094 :   if (CmpValue != 0 || SrcReg2 != 0)
    1351             :     return false;
    1352             : 
    1353             :   // CmpInstr is a Compare instruction if destination register is not used.
    1354         146 :   if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
    1355             :     return false;
    1356             : 
    1357         137 :   return substituteCmpToZero(CmpInstr, SrcReg, MRI);
    1358             : }
    1359             : 
    1360             : /// Get opcode of S version of Instr.
    1361             : /// If Instr is S version its opcode is returned.
    1362             : /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
    1363             : /// or we are not interested in it.
    1364         171 : static unsigned sForm(MachineInstr &Instr) {
    1365         342 :   switch (Instr.getOpcode()) {
    1366             :   default:
    1367             :     return AArch64::INSTRUCTION_LIST_END;
    1368             : 
    1369           0 :   case AArch64::ADDSWrr:
    1370             :   case AArch64::ADDSWri:
    1371             :   case AArch64::ADDSXrr:
    1372             :   case AArch64::ADDSXri:
    1373             :   case AArch64::SUBSWrr:
    1374             :   case AArch64::SUBSWri:
    1375             :   case AArch64::SUBSXrr:
    1376             :   case AArch64::SUBSXri:
    1377           0 :     return Instr.getOpcode();
    1378             : 
    1379           0 :   case AArch64::ADDWrr:
    1380           0 :     return AArch64::ADDSWrr;
    1381           2 :   case AArch64::ADDWri:
    1382           2 :     return AArch64::ADDSWri;
    1383           0 :   case AArch64::ADDXrr:
    1384           0 :     return AArch64::ADDSXrr;
    1385           4 :   case AArch64::ADDXri:
    1386           4 :     return AArch64::ADDSXri;
    1387           0 :   case AArch64::ADCWr:
    1388           0 :     return AArch64::ADCSWr;
    1389           0 :   case AArch64::ADCXr:
    1390           0 :     return AArch64::ADCSXr;
    1391          12 :   case AArch64::SUBWrr:
    1392          12 :     return AArch64::SUBSWrr;
    1393           0 :   case AArch64::SUBWri:
    1394           0 :     return AArch64::SUBSWri;
    1395           4 :   case AArch64::SUBXrr:
    1396           4 :     return AArch64::SUBSXrr;
    1397          44 :   case AArch64::SUBXri:
    1398          44 :     return AArch64::SUBSXri;
    1399           0 :   case AArch64::SBCWr:
    1400           0 :     return AArch64::SBCSWr;
    1401           0 :   case AArch64::SBCXr:
    1402           0 :     return AArch64::SBCSXr;
    1403           2 :   case AArch64::ANDWri:
    1404           2 :     return AArch64::ANDSWri;
    1405           0 :   case AArch64::ANDXri:
    1406           0 :     return AArch64::ANDSXri;
    1407             :   }
    1408             : }
    1409             : 
    1410             : /// Check if AArch64::NZCV should be alive in successors of MBB.
    1411          34 : static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
    1412          76 :   for (auto *BB : MBB->successors())
    1413          42 :     if (BB->isLiveIn(AArch64::NZCV))
    1414             :       return true;
    1415             :   return false;
    1416             : }
    1417             : 
    1418             : namespace {
    1419             : 
    1420             : struct UsedNZCV {
    1421             :   bool N = false;
    1422             :   bool Z = false;
    1423             :   bool C = false;
    1424             :   bool V = false;
    1425             : 
    1426             :   UsedNZCV() = default;
    1427             : 
    1428             :   UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
    1429             :     this->N |= UsedFlags.N;
    1430             :     this->Z |= UsedFlags.Z;
    1431          38 :     this->C |= UsedFlags.C;
    1432          38 :     this->V |= UsedFlags.V;
    1433             :     return *this;
    1434             :   }
    1435             : };
    1436             : 
    1437             : } // end anonymous namespace
    1438             : 
    1439             : /// Find a condition code used by the instruction.
    1440             : /// Returns AArch64CC::Invalid if either the instruction does not use condition
    1441             : /// codes or we don't optimize CmpInstr in the presence of such instructions.
    1442          38 : static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
    1443          76 :   switch (Instr.getOpcode()) {
    1444             :   default:
    1445             :     return AArch64CC::Invalid;
    1446             : 
    1447          20 :   case AArch64::Bcc: {
    1448          20 :     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
    1449             :     assert(Idx >= 2);
    1450          40 :     return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
    1451             :   }
    1452             : 
    1453          18 :   case AArch64::CSINVWr:
    1454             :   case AArch64::CSINVXr:
    1455             :   case AArch64::CSINCWr:
    1456             :   case AArch64::CSINCXr:
    1457             :   case AArch64::CSELWr:
    1458             :   case AArch64::CSELXr:
    1459             :   case AArch64::CSNEGWr:
    1460             :   case AArch64::CSNEGXr:
    1461             :   case AArch64::FCSELSrrr:
    1462             :   case AArch64::FCSELDrrr: {
    1463          18 :     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
    1464             :     assert(Idx >= 1);
    1465          36 :     return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
    1466             :   }
    1467             :   }
    1468             : }
    1469             : 
    1470             : static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
    1471             :   assert(CC != AArch64CC::Invalid);
    1472             :   UsedNZCV UsedFlags;
    1473             :   switch (CC) {
    1474             :   default:
    1475             :     break;
    1476             : 
    1477             :   case AArch64CC::EQ: // Z set
    1478             :   case AArch64CC::NE: // Z clear
    1479             :     UsedFlags.Z = true;
    1480             :     break;
    1481             : 
    1482             :   case AArch64CC::HI: // Z clear and C set
    1483             :   case AArch64CC::LS: // Z set   or  C clear
    1484             :     UsedFlags.Z = true;
    1485             :     LLVM_FALLTHROUGH;
    1486             :   case AArch64CC::HS: // C set
    1487             :   case AArch64CC::LO: // C clear
    1488             :     UsedFlags.C = true;
    1489             :     break;
    1490             : 
    1491             :   case AArch64CC::MI: // N set
    1492             :   case AArch64CC::PL: // N clear
    1493             :     UsedFlags.N = true;
    1494             :     break;
    1495             : 
    1496             :   case AArch64CC::VS: // V set
    1497             :   case AArch64CC::VC: // V clear
    1498             :     UsedFlags.V = true;
    1499             :     break;
    1500             : 
    1501             :   case AArch64CC::GT: // Z clear, N and V the same
    1502             :   case AArch64CC::LE: // Z set,   N and V differ
    1503             :     UsedFlags.Z = true;
    1504             :     LLVM_FALLTHROUGH;
    1505             :   case AArch64CC::GE: // N and V the same
    1506             :   case AArch64CC::LT: // N and V differ
    1507             :     UsedFlags.N = true;
    1508             :     UsedFlags.V = true;
    1509             :     break;
    1510             :   }
    1511             :   return UsedFlags;
    1512             : }
    1513             : 
    1514             : static bool isADDSRegImm(unsigned Opcode) {
    1515          34 :   return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
    1516             : }
    1517             : 
    1518             : static bool isSUBSRegImm(unsigned Opcode) {
    1519          34 :   return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
    1520             : }
    1521             : 
    1522             : /// Check if CmpInstr can be substituted by MI.
    1523             : ///
    1524             : /// CmpInstr can be substituted:
    1525             : /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
    1526             : /// - and, MI and CmpInstr are from the same MachineBB
    1527             : /// - and, condition flags are not alive in successors of the CmpInstr parent
    1528             : /// - and, if MI opcode is the S form there must be no defs of flags between
    1529             : ///        MI and CmpInstr
    1530             : ///        or if MI opcode is not the S form there must be neither defs of flags
    1531             : ///        nor uses of flags between MI and CmpInstr.
    1532             : /// - and  C/V flags are not used after CmpInstr
    1533          34 : static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
    1534             :                                        const TargetRegisterInfo *TRI) {
    1535             :   assert(MI);
    1536             :   assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
    1537             :   assert(CmpInstr);
    1538             : 
    1539          34 :   const unsigned CmpOpcode = CmpInstr->getOpcode();
    1540          34 :   if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
    1541             :     return false;
    1542             : 
    1543          34 :   if (MI->getParent() != CmpInstr->getParent())
    1544             :     return false;
    1545             : 
    1546          34 :   if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
    1547             :     return false;
    1548             : 
    1549             :   AccessKind AccessToCheck = AK_Write;
    1550          68 :   if (sForm(*MI) != MI->getOpcode())
    1551             :     AccessToCheck = AK_All;
    1552          34 :   if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
    1553             :     return false;
    1554             : 
    1555             :   UsedNZCV NZCVUsedAfterCmp;
    1556          34 :   for (auto I = std::next(CmpInstr->getIterator()),
    1557          34 :             E = CmpInstr->getParent()->instr_end();
    1558         128 :        I != E; ++I) {
    1559             :     const MachineInstr &Instr = *I;
    1560          95 :     if (Instr.readsRegister(AArch64::NZCV, TRI)) {
    1561          38 :       AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
    1562          38 :       if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
    1563             :         return false;
    1564             :       NZCVUsedAfterCmp |= getUsedNZCV(CC);
    1565             :     }
    1566             : 
    1567          95 :     if (Instr.modifiesRegister(AArch64::NZCV, TRI))
    1568             :       break;
    1569             :   }
    1570             : 
    1571          34 :   return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
    1572             : }
    1573             : 
    1574             : /// Substitute an instruction comparing to zero with another instruction
    1575             : /// which produces needed condition flags.
    1576             : ///
    1577             : /// Return true on success.
    1578         137 : bool AArch64InstrInfo::substituteCmpToZero(
    1579             :     MachineInstr &CmpInstr, unsigned SrcReg,
    1580             :     const MachineRegisterInfo *MRI) const {
    1581             :   assert(MRI);
    1582             :   // Get the unique definition of SrcReg.
    1583         137 :   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
    1584         137 :   if (!MI)
    1585             :     return false;
    1586             : 
    1587             :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    1588             : 
    1589         137 :   unsigned NewOpc = sForm(*MI);
    1590         137 :   if (NewOpc == AArch64::INSTRUCTION_LIST_END)
    1591             :     return false;
    1592             : 
    1593          34 :   if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
    1594             :     return false;
    1595             : 
    1596             :   // Update the instruction to set NZCV.
    1597           8 :   MI->setDesc(get(NewOpc));
    1598           8 :   CmpInstr.eraseFromParent();
    1599           8 :   bool succeeded = UpdateOperandRegClass(*MI);
    1600             :   (void)succeeded;
    1601             :   assert(succeeded && "Some operands reg class are incompatible!");
    1602           8 :   MI->addRegisterDefined(AArch64::NZCV, TRI);
    1603           8 :   return true;
    1604             : }
    1605             : 
    1606       11126 : bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
    1607       22252 :   if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
    1608             :     return false;
    1609             : 
    1610          21 :   MachineBasicBlock &MBB = *MI.getParent();
    1611             :   DebugLoc DL = MI.getDebugLoc();
    1612          21 :   unsigned Reg = MI.getOperand(0).getReg();
    1613             :   const GlobalValue *GV =
    1614          21 :       cast<GlobalValue>((*MI.memoperands_begin())->getValue());
    1615          21 :   const TargetMachine &TM = MBB.getParent()->getTarget();
    1616          21 :   unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
    1617             :   const unsigned char MO_NC = AArch64II::MO_NC;
    1618             : 
    1619          21 :   if ((OpFlags & AArch64II::MO_GOT) != 0) {
    1620          30 :     BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
    1621             :         .addGlobalAddress(GV, 0, OpFlags);
    1622          45 :     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
    1623          15 :         .addReg(Reg, RegState::Kill)
    1624             :         .addImm(0)
    1625          15 :         .addMemOperand(*MI.memoperands_begin());
    1626           6 :   } else if (TM.getCodeModel() == CodeModel::Large) {
    1627           4 :     BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
    1628             :         .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
    1629             :         .addImm(0);
    1630           4 :     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
    1631           2 :         .addReg(Reg, RegState::Kill)
    1632             :         .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
    1633             :         .addImm(16);
    1634           4 :     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
    1635           2 :         .addReg(Reg, RegState::Kill)
    1636             :         .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
    1637             :         .addImm(32);
    1638           4 :     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
    1639           2 :         .addReg(Reg, RegState::Kill)
    1640             :         .addGlobalAddress(GV, 0, AArch64II::MO_G3)
    1641             :         .addImm(48);
    1642           6 :     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
    1643           2 :         .addReg(Reg, RegState::Kill)
    1644             :         .addImm(0)
    1645           2 :         .addMemOperand(*MI.memoperands_begin());
    1646           4 :   } else if (TM.getCodeModel() == CodeModel::Tiny) {
    1647           0 :     BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
    1648             :         .addGlobalAddress(GV, 0, OpFlags);
    1649             :   } else {
    1650           8 :     BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
    1651           4 :         .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
    1652           4 :     unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
    1653          12 :     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
    1654           4 :         .addReg(Reg, RegState::Kill)
    1655             :         .addGlobalAddress(GV, 0, LoFlags)
    1656           4 :         .addMemOperand(*MI.memoperands_begin());
    1657             :   }
    1658             : 
    1659          21 :   MBB.erase(MI);
    1660             : 
    1661             :   return true;
    1662             : }
    1663             : 
    1664             : /// Return true if this is this instruction has a non-zero immediate
    1665         401 : bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) {
    1666         802 :   switch (MI.getOpcode()) {
    1667             :   default:
    1668             :     break;
    1669         337 :   case AArch64::ADDSWrs:
    1670             :   case AArch64::ADDSXrs:
    1671             :   case AArch64::ADDWrs:
    1672             :   case AArch64::ADDXrs:
    1673             :   case AArch64::ANDSWrs:
    1674             :   case AArch64::ANDSXrs:
    1675             :   case AArch64::ANDWrs:
    1676             :   case AArch64::ANDXrs:
    1677             :   case AArch64::BICSWrs:
    1678             :   case AArch64::BICSXrs:
    1679             :   case AArch64::BICWrs:
    1680             :   case AArch64::BICXrs:
    1681             :   case AArch64::EONWrs:
    1682             :   case AArch64::EONXrs:
    1683             :   case AArch64::EORWrs:
    1684             :   case AArch64::EORXrs:
    1685             :   case AArch64::ORNWrs:
    1686             :   case AArch64::ORNXrs:
    1687             :   case AArch64::ORRWrs:
    1688             :   case AArch64::ORRXrs:
    1689             :   case AArch64::SUBSWrs:
    1690             :   case AArch64::SUBSXrs:
    1691             :   case AArch64::SUBWrs:
    1692             :   case AArch64::SUBXrs:
    1693         674 :     if (MI.getOperand(3).isImm()) {
    1694         337 :       unsigned val = MI.getOperand(3).getImm();
    1695         337 :       return (val != 0);
    1696           0 :     }
    1697             :     break;
    1698             :   }
    1699             :   return false;
    1700             : }
    1701             : 
    1702             : /// Return true if this is this instruction has a non-zero immediate
    1703          15 : bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) {
    1704          30 :   switch (MI.getOpcode()) {
    1705             :   default:
    1706             :     break;
    1707          15 :   case AArch64::ADDSWrx:
    1708             :   case AArch64::ADDSXrx:
    1709             :   case AArch64::ADDSXrx64:
    1710             :   case AArch64::ADDWrx:
    1711             :   case AArch64::ADDXrx:
    1712             :   case AArch64::ADDXrx64:
    1713             :   case AArch64::SUBSWrx:
    1714             :   case AArch64::SUBSXrx:
    1715             :   case AArch64::SUBSXrx64:
    1716             :   case AArch64::SUBWrx:
    1717             :   case AArch64::SUBXrx:
    1718             :   case AArch64::SUBXrx64:
    1719          30 :     if (MI.getOperand(3).isImm()) {
    1720          15 :       unsigned val = MI.getOperand(3).getImm();
    1721          15 :       return (val != 0);
    1722           0 :     }
    1723             :     break;
    1724             :   }
    1725             : 
    1726             :   return false;
    1727             : }
    1728             : 
    1729             : // Return true if this instruction simply sets its single destination register
    1730             : // to zero. This is equivalent to a register rename of the zero-register.
    1731          90 : bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
    1732         180 :   switch (MI.getOpcode()) {
    1733             :   default:
    1734             :     break;
    1735          46 :   case AArch64::MOVZWi:
    1736             :   case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
    1737          92 :     if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
    1738             :       assert(MI.getDesc().getNumOperands() == 3 &&
    1739             :              MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
    1740           0 :       return true;
    1741             :     }
    1742             :     break;
    1743          16 :   case AArch64::ANDWri: // and Rd, Rzr, #imm
    1744          16 :     return MI.getOperand(1).getReg() == AArch64::WZR;
    1745          28 :   case AArch64::ANDXri:
    1746          28 :     return MI.getOperand(1).getReg() == AArch64::XZR;
    1747           0 :   case TargetOpcode::COPY:
    1748           0 :     return MI.getOperand(1).getReg() == AArch64::WZR;
    1749             :   }
    1750             :   return false;
    1751             : }
    1752             : 
    1753             : // Return true if this instruction simply renames a general register without
    1754             : // modifying bits.
    1755        9925 : bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
    1756       19850 :   switch (MI.getOpcode()) {
    1757             :   default:
    1758             :     break;
    1759        9719 :   case TargetOpcode::COPY: {
    1760             :     // GPR32 copies will by lowered to ORRXrs
    1761        9719 :     unsigned DstReg = MI.getOperand(0).getReg();
    1762       11607 :     return (AArch64::GPR32RegClass.contains(DstReg) ||
    1763        9055 :             AArch64::GPR64RegClass.contains(DstReg));
    1764             :   }
    1765           0 :   case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
    1766           0 :     if (MI.getOperand(1).getReg() == AArch64::XZR) {
    1767             :       assert(MI.getDesc().getNumOperands() == 4 &&
    1768             :              MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
    1769           0 :       return true;
    1770             :     }
    1771             :     break;
    1772           0 :   case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
    1773           0 :     if (MI.getOperand(2).getImm() == 0) {
    1774             :       assert(MI.getDesc().getNumOperands() == 4 &&
    1775             :              MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
    1776           0 :       return true;
    1777             :     }
    1778             :     break;
    1779             :   }
    1780             :   return false;
    1781             : }
    1782             : 
    1783             : // Return true if this instruction simply renames a general register without
    1784             : // modifying bits.
    1785        8500 : bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
    1786       17000 :   switch (MI.getOpcode()) {
    1787             :   default:
    1788             :     break;
    1789        8294 :   case TargetOpcode::COPY: {
    1790             :     // FPR64 copies will by lowered to ORR.16b
    1791        8294 :     unsigned DstReg = MI.getOperand(0).getReg();
    1792        9106 :     return (AArch64::FPR64RegClass.contains(DstReg) ||
    1793        7979 :             AArch64::FPR128RegClass.contains(DstReg));
    1794             :   }
    1795           0 :   case AArch64::ORRv16i8:
    1796           0 :     if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
    1797             :       assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
    1798             :              "invalid ORRv16i8 operands");
    1799           0 :       return true;
    1800             :     }
    1801             :     break;
    1802             :   }
    1803             :   return false;
    1804             : }
    1805             : 
    1806       10950 : unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
    1807             :                                                int &FrameIndex) const {
    1808       21900 :   switch (MI.getOpcode()) {
    1809             :   default:
    1810             :     break;
    1811         806 :   case AArch64::LDRWui:
    1812             :   case AArch64::LDRXui:
    1813             :   case AArch64::LDRBui:
    1814             :   case AArch64::LDRHui:
    1815             :   case AArch64::LDRSui:
    1816             :   case AArch64::LDRDui:
    1817             :   case AArch64::LDRQui:
    1818         806 :     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
    1819        1211 :         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
    1820         405 :       FrameIndex = MI.getOperand(1).getIndex();
    1821         405 :       return MI.getOperand(0).getReg();
    1822             :     }
    1823             :     break;
    1824             :   }
    1825             : 
    1826             :   return 0;
    1827             : }
    1828             : 
    1829        5253 : unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
    1830             :                                               int &FrameIndex) const {
    1831        5253 :   switch (MI.getOpcode()) {
    1832             :   default:
    1833             :     break;
    1834         383 :   case AArch64::STRWui:
    1835             :   case AArch64::STRXui:
    1836             :   case AArch64::STRBui:
    1837             :   case AArch64::STRHui:
    1838             :   case AArch64::STRSui:
    1839             :   case AArch64::STRDui:
    1840             :   case AArch64::STRQui:
    1841         383 :     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
    1842         463 :         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
    1843          71 :       FrameIndex = MI.getOperand(1).getIndex();
    1844          71 :       return MI.getOperand(0).getReg();
    1845             :     }
    1846             :     break;
    1847             :   }
    1848             :   return 0;
    1849             : }
    1850             : 
    1851             : /// Return true if this is load/store scales or extends its register offset.
    1852             : /// This refers to scaling a dynamic index as opposed to scaled immediates.
    1853             : /// MI should be a memory op that allows scaled addressing.
    1854         685 : bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) {
    1855        1370 :   switch (MI.getOpcode()) {
    1856             :   default:
    1857             :     break;
    1858         685 :   case AArch64::LDRBBroW:
    1859             :   case AArch64::LDRBroW:
    1860             :   case AArch64::LDRDroW:
    1861             :   case AArch64::LDRHHroW:
    1862             :   case AArch64::LDRHroW:
    1863             :   case AArch64::LDRQroW:
    1864             :   case AArch64::LDRSBWroW:
    1865             :   case AArch64::LDRSBXroW:
    1866             :   case AArch64::LDRSHWroW:
    1867             :   case AArch64::LDRSHXroW:
    1868             :   case AArch64::LDRSWroW:
    1869             :   case AArch64::LDRSroW:
    1870             :   case AArch64::LDRWroW:
    1871             :   case AArch64::LDRXroW:
    1872             :   case AArch64::STRBBroW:
    1873             :   case AArch64::STRBroW:
    1874             :   case AArch64::STRDroW:
    1875             :   case AArch64::STRHHroW:
    1876             :   case AArch64::STRHroW:
    1877             :   case AArch64::STRQroW:
    1878             :   case AArch64::STRSroW:
    1879             :   case AArch64::STRWroW:
    1880             :   case AArch64::STRXroW:
    1881             :   case AArch64::LDRBBroX:
    1882             :   case AArch64::LDRBroX:
    1883             :   case AArch64::LDRDroX:
    1884             :   case AArch64::LDRHHroX:
    1885             :   case AArch64::LDRHroX:
    1886             :   case AArch64::LDRQroX:
    1887             :   case AArch64::LDRSBWroX:
    1888             :   case AArch64::LDRSBXroX:
    1889             :   case AArch64::LDRSHWroX:
    1890             :   case AArch64::LDRSHXroX:
    1891             :   case AArch64::LDRSWroX:
    1892             :   case AArch64::LDRSroX:
    1893             :   case AArch64::LDRWroX:
    1894             :   case AArch64::LDRXroX:
    1895             :   case AArch64::STRBBroX:
    1896             :   case AArch64::STRBroX:
    1897             :   case AArch64::STRDroX:
    1898             :   case AArch64::STRHHroX:
    1899             :   case AArch64::STRHroX:
    1900             :   case AArch64::STRQroX:
    1901             :   case AArch64::STRSroX:
    1902             :   case AArch64::STRWroX:
    1903             :   case AArch64::STRXroX:
    1904             : 
    1905         685 :     unsigned Val = MI.getOperand(3).getImm();
    1906             :     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
    1907         685 :     return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
    1908             :   }
    1909             :   return false;
    1910             : }
    1911             : 
    1912             : /// Check all MachineMemOperands for a hint to suppress pairing.
    1913       25617 : bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
    1914       25617 :   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
    1915           0 :     return MMO->getFlags() & MOSuppressPair;
    1916       25617 :   });
    1917             : }
    1918             : 
    1919             : /// Set a flag on the first MachineMemOperand to suppress pairing.
    1920           9 : void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
    1921           9 :   if (MI.memoperands_empty())
    1922             :     return;
    1923           9 :   (*MI.memoperands_begin())->setFlags(MOSuppressPair);
    1924             : }
    1925             : 
    1926             : /// Check all MachineMemOperands for a hint that the load/store is strided.
    1927         133 : bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
    1928         133 :   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
    1929           0 :     return MMO->getFlags() & MOStridedAccess;
    1930         133 :   });
    1931             : }
    1932             : 
    1933       40183 : bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
    1934       40183 :   switch (Opc) {
    1935             :   default:
    1936             :     return false;
    1937        1159 :   case AArch64::STURSi:
    1938             :   case AArch64::STURDi:
    1939             :   case AArch64::STURQi:
    1940             :   case AArch64::STURBBi:
    1941             :   case AArch64::STURHHi:
    1942             :   case AArch64::STURWi:
    1943             :   case AArch64::STURXi:
    1944             :   case AArch64::LDURSi:
    1945             :   case AArch64::LDURDi:
    1946             :   case AArch64::LDURQi:
    1947             :   case AArch64::LDURWi:
    1948             :   case AArch64::LDURXi:
    1949             :   case AArch64::LDURSWi:
    1950             :   case AArch64::LDURHHi:
    1951             :   case AArch64::LDURBBi:
    1952             :   case AArch64::LDURSBWi:
    1953             :   case AArch64::LDURSHWi:
    1954        1159 :     return true;
    1955             :   }
    1956             : }
    1957             : 
    1958       76204 : bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
    1959      152408 :   switch (MI.getOpcode()) {
    1960             :   default:
    1961             :     return false;
    1962             :   // Scaled instructions.
    1963       12457 :   case AArch64::STRSui:
    1964             :   case AArch64::STRDui:
    1965             :   case AArch64::STRQui:
    1966             :   case AArch64::STRXui:
    1967             :   case AArch64::STRWui:
    1968             :   case AArch64::LDRSui:
    1969             :   case AArch64::LDRDui:
    1970             :   case AArch64::LDRQui:
    1971             :   case AArch64::LDRXui:
    1972             :   case AArch64::LDRWui:
    1973             :   case AArch64::LDRSWui:
    1974             :   // Unscaled instructions.
    1975             :   case AArch64::STURSi:
    1976             :   case AArch64::STURDi:
    1977             :   case AArch64::STURQi:
    1978             :   case AArch64::STURWi:
    1979             :   case AArch64::STURXi:
    1980             :   case AArch64::LDURSi:
    1981             :   case AArch64::LDURDi:
    1982             :   case AArch64::LDURQi:
    1983             :   case AArch64::LDURWi:
    1984             :   case AArch64::LDURXi:
    1985             :   case AArch64::LDURSWi:
    1986       12457 :     return true;
    1987             :   }
    1988             : }
    1989             : 
    1990          23 : unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
    1991             :                                                    bool &Is64Bit) {
    1992          23 :   switch (Opc) {
    1993           0 :   default:
    1994           0 :     llvm_unreachable("Opcode has no flag setting equivalent!");
    1995             :   // 32-bit cases:
    1996           3 :   case AArch64::ADDWri:
    1997           3 :     Is64Bit = false;
    1998           3 :     return AArch64::ADDSWri;
    1999           4 :   case AArch64::ADDWrr:
    2000           4 :     Is64Bit = false;
    2001           4 :     return AArch64::ADDSWrr;
    2002           0 :   case AArch64::ADDWrs:
    2003           0 :     Is64Bit = false;
    2004           0 :     return AArch64::ADDSWrs;
    2005           0 :   case AArch64::ADDWrx:
    2006           0 :     Is64Bit = false;
    2007           0 :     return AArch64::ADDSWrx;
    2008           3 :   case AArch64::ANDWri:
    2009           3 :     Is64Bit = false;
    2010           3 :     return AArch64::ANDSWri;
    2011           0 :   case AArch64::ANDWrr:
    2012           0 :     Is64Bit = false;
    2013           0 :     return AArch64::ANDSWrr;
    2014           0 :   case AArch64::ANDWrs:
    2015           0 :     Is64Bit = false;
    2016           0 :     return AArch64::ANDSWrs;
    2017           1 :   case AArch64::BICWrr:
    2018           1 :     Is64Bit = false;
    2019           1 :     return AArch64::BICSWrr;
    2020           0 :   case AArch64::BICWrs:
    2021           0 :     Is64Bit = false;
    2022           0 :     return AArch64::BICSWrs;
    2023           0 :   case AArch64::SUBWri:
    2024           0 :     Is64Bit = false;
    2025           0 :     return AArch64::SUBSWri;
    2026           0 :   case AArch64::SUBWrr:
    2027           0 :     Is64Bit = false;
    2028           0 :     return AArch64::SUBSWrr;
    2029           0 :   case AArch64::SUBWrs:
    2030           0 :     Is64Bit = false;
    2031           0 :     return AArch64::SUBSWrs;
    2032           0 :   case AArch64::SUBWrx:
    2033           0 :     Is64Bit = false;
    2034           0 :     return AArch64::SUBSWrx;
    2035             :   // 64-bit cases:
    2036          10 :   case AArch64::ADDXri:
    2037          10 :     Is64Bit = true;
    2038          10 :     return AArch64::ADDSXri;
    2039           1 :   case AArch64::ADDXrr:
    2040           1 :     Is64Bit = true;
    2041           1 :     return AArch64::ADDSXrr;
    2042           0 :   case AArch64::ADDXrs:
    2043           0 :     Is64Bit = true;
    2044           0 :     return AArch64::ADDSXrs;
    2045           0 :   case AArch64::ADDXrx:
    2046           0 :     Is64Bit = true;
    2047           0 :     return AArch64::ADDSXrx;
    2048           1 :   case AArch64::ANDXri:
    2049           1 :     Is64Bit = true;
    2050           1 :     return AArch64::ANDSXri;
    2051           0 :   case AArch64::ANDXrr:
    2052           0 :     Is64Bit = true;
    2053           0 :     return AArch64::ANDSXrr;
    2054           0 :   case AArch64::ANDXrs:
    2055           0 :     Is64Bit = true;
    2056           0 :     return AArch64::ANDSXrs;
    2057           0 :   case AArch64::BICXrr:
    2058           0 :     Is64Bit = true;
    2059           0 :     return AArch64::BICSXrr;
    2060           0 :   case AArch64::BICXrs:
    2061           0 :     Is64Bit = true;
    2062           0 :     return AArch64::BICSXrs;
    2063           0 :   case AArch64::SUBXri:
    2064           0 :     Is64Bit = true;
    2065           0 :     return AArch64::SUBSXri;
    2066           0 :   case AArch64::SUBXrr:
    2067           0 :     Is64Bit = true;
    2068           0 :     return AArch64::SUBSXrr;
    2069           0 :   case AArch64::SUBXrs:
    2070           0 :     Is64Bit = true;
    2071           0 :     return AArch64::SUBSXrs;
    2072           0 :   case AArch64::SUBXrx:
    2073           0 :     Is64Bit = true;
    2074           0 :     return AArch64::SUBSXrx;
    2075             :   }
    2076             : }
    2077             : 
    2078             : // Is this a candidate for ld/st merging or pairing?  For example, we don't
    2079             : // touch volatiles or load/stores that have a hint to avoid pair formation.
    2080       12380 : bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
    2081             :   // If this is a volatile load/store, don't mess with it.
    2082       12380 :   if (MI.hasOrderedMemoryRef())
    2083             :     return false;
    2084             : 
    2085             :   // Make sure this is a reg+imm (as opposed to an address reloc).
    2086             :   assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
    2087       17158 :   if (!MI.getOperand(2).isImm())
    2088             :     return false;
    2089             : 
    2090             :   // Can't merge/pair if the instruction modifies the base register.
    2091             :   // e.g., ldr x0, [x0]
    2092        7863 :   unsigned BaseReg = MI.getOperand(1).getReg();
    2093             :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    2094        7863 :   if (MI.modifiesRegister(BaseReg, TRI))
    2095             :     return false;
    2096             : 
    2097             :   // Check if this load/store has a hint to avoid pair formation.
    2098             :   // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
    2099        7744 :   if (isLdStPairSuppressed(MI))
    2100             :     return false;
    2101             : 
    2102             :   // On some CPUs quad load/store pairs are slower than two single load/stores.
    2103        7733 :   if (Subtarget.isPaired128Slow()) {
    2104         244 :     switch (MI.getOpcode()) {
    2105             :     default:
    2106             :       break;
    2107          98 :     case AArch64::LDURQi:
    2108             :     case AArch64::STURQi:
    2109             :     case AArch64::LDRQui:
    2110             :     case AArch64::STRQui:
    2111          98 :       return false;
    2112             :     }
    2113             :   }
    2114             : 
    2115             :   return true;
    2116             : }
    2117             : 
    2118       15842 : bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
    2119             :     MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
    2120             :     const TargetRegisterInfo *TRI) const {
    2121             :   unsigned Width;
    2122       15842 :   return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
    2123             : }
    2124             : 
    2125       27043 : bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
    2126             :     MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
    2127             :     const TargetRegisterInfo *TRI) const {
    2128             :   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
    2129             :   // Handle only loads/stores with base register followed by immediate offset.
    2130       27043 :   if (LdSt.getNumExplicitOperands() == 3) {
    2131             :     // Non-paired instruction (e.g., ldr x1, [x0, #8]).
    2132       44686 :     if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
    2133             :       return false;
    2134        4700 :   } else if (LdSt.getNumExplicitOperands() == 4) {
    2135             :     // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
    2136        5276 :     if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
    2137             :         !LdSt.getOperand(3).isImm())
    2138             :       return false;
    2139             :   } else
    2140             :     return false;
    2141             : 
    2142             :   // Get the scaling factor for the instruction and set the width for the
    2143             :   // instruction.
    2144       16800 :   unsigned Scale = 0;
    2145             :   int64_t Dummy1, Dummy2;
    2146             : 
    2147             :   // If this returns false, then it's an instruction we don't want to handle.
    2148       33600 :   if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
    2149             :     return false;
    2150             : 
    2151             :   // Compute the offset. Offset is calculated as the immediate operand
    2152             :   // multiplied by the scaling factor. Unscaled instructions have scaling factor
    2153             :   // set to 1.
    2154       16479 :   if (LdSt.getNumExplicitOperands() == 3) {
    2155       14819 :     BaseReg = LdSt.getOperand(1).getReg();
    2156       14819 :     Offset = LdSt.getOperand(2).getImm() * Scale;
    2157             :   } else {
    2158             :     assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
    2159        1660 :     BaseReg = LdSt.getOperand(2).getReg();
    2160        1660 :     Offset = LdSt.getOperand(3).getImm() * Scale;
    2161             :   }
    2162             :   return true;
    2163             : }
    2164             : 
    2165             : MachineOperand &
    2166           0 : AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
    2167             :   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
    2168           0 :   MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
    2169             :   assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
    2170           0 :   return OfsOp;
    2171             : }
    2172             : 
    2173       16897 : bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
    2174             :                                     unsigned &Width, int64_t &MinOffset,
    2175             :                                     int64_t &MaxOffset) const {
    2176       16897 :   switch (Opcode) {
    2177             :   // Not a memory operation or something we want to handle.
    2178         321 :   default:
    2179         321 :     Scale = Width = 0;
    2180         321 :     MinOffset = MaxOffset = 0;
    2181         321 :     return false;
    2182          19 :   case AArch64::STRWpost:
    2183             :   case AArch64::LDRWpost:
    2184          19 :     Width = 32;
    2185          19 :     Scale = 4;
    2186          19 :     MinOffset = -256;
    2187          19 :     MaxOffset = 255;
    2188          19 :     break;
    2189         208 :   case AArch64::LDURQi:
    2190             :   case AArch64::STURQi:
    2191         208 :     Width = 16;
    2192         208 :     Scale = 1;
    2193         208 :     MinOffset = -256;
    2194         208 :     MaxOffset = 255;
    2195         208 :     break;
    2196         194 :   case AArch64::LDURXi:
    2197             :   case AArch64::LDURDi:
    2198             :   case AArch64::STURXi:
    2199             :   case AArch64::STURDi:
    2200         194 :     Width = 8;
    2201         194 :     Scale = 1;
    2202         194 :     MinOffset = -256;
    2203         194 :     MaxOffset = 255;
    2204         194 :     break;
    2205         220 :   case AArch64::LDURWi:
    2206             :   case AArch64::LDURSi:
    2207             :   case AArch64::LDURSWi:
    2208             :   case AArch64::STURWi:
    2209             :   case AArch64::STURSi:
    2210         220 :     Width = 4;
    2211         220 :     Scale = 1;
    2212         220 :     MinOffset = -256;
    2213         220 :     MaxOffset = 255;
    2214         220 :     break;
    2215         106 :   case AArch64::LDURHi:
    2216             :   case AArch64::LDURHHi:
    2217             :   case AArch64::LDURSHXi:
    2218             :   case AArch64::LDURSHWi:
    2219             :   case AArch64::STURHi:
    2220             :   case AArch64::STURHHi:
    2221         106 :     Width = 2;
    2222         106 :     Scale = 1;
    2223         106 :     MinOffset = -256;
    2224         106 :     MaxOffset = 255;
    2225         106 :     break;
    2226          84 :   case AArch64::LDURBi:
    2227             :   case AArch64::LDURBBi:
    2228             :   case AArch64::LDURSBXi:
    2229             :   case AArch64::LDURSBWi:
    2230             :   case AArch64::STURBi:
    2231             :   case AArch64::STURBBi:
    2232          84 :     Width = 1;
    2233          84 :     Scale = 1;
    2234          84 :     MinOffset = -256;
    2235          84 :     MaxOffset = 255;
    2236          84 :     break;
    2237         917 :   case AArch64::LDPQi:
    2238             :   case AArch64::LDNPQi:
    2239             :   case AArch64::STPQi:
    2240             :   case AArch64::STNPQi:
    2241         917 :     Scale = 16;
    2242         917 :     Width = 32;
    2243         917 :     MinOffset = -64;
    2244         917 :     MaxOffset = 63;
    2245         917 :     break;
    2246        3874 :   case AArch64::LDRQui:
    2247             :   case AArch64::STRQui:
    2248        3874 :     Scale = Width = 16;
    2249        3874 :     MinOffset = 0;
    2250        3874 :     MaxOffset = 4095;
    2251        3874 :     break;
    2252         561 :   case AArch64::LDPXi:
    2253             :   case AArch64::LDPDi:
    2254             :   case AArch64::LDNPXi:
    2255             :   case AArch64::LDNPDi:
    2256             :   case AArch64::STPXi:
    2257             :   case AArch64::STPDi:
    2258             :   case AArch64::STNPXi:
    2259             :   case AArch64::STNPDi:
    2260         561 :     Scale = 8;
    2261         561 :     Width = 16;
    2262         561 :     MinOffset = -64;
    2263         561 :     MaxOffset = 63;
    2264         561 :     break;
    2265        6931 :   case AArch64::LDRXui:
    2266             :   case AArch64::LDRDui:
    2267             :   case AArch64::STRXui:
    2268             :   case AArch64::STRDui:
    2269        6931 :     Scale = Width = 8;
    2270        6931 :     MinOffset = 0;
    2271        6931 :     MaxOffset = 4095;
    2272        6931 :     break;
    2273         170 :   case AArch64::LDPWi:
    2274             :   case AArch64::LDPSi:
    2275             :   case AArch64::LDNPWi:
    2276             :   case AArch64::LDNPSi:
    2277             :   case AArch64::STPWi:
    2278             :   case AArch64::STPSi:
    2279             :   case AArch64::STNPWi:
    2280             :   case AArch64::STNPSi:
    2281         170 :     Scale = 4;
    2282         170 :     Width = 8;
    2283         170 :     MinOffset = -64;
    2284         170 :     MaxOffset = 63;
    2285         170 :     break;
    2286        2170 :   case AArch64::LDRWui:
    2287             :   case AArch64::LDRSui:
    2288             :   case AArch64::LDRSWui:
    2289             :   case AArch64::STRWui:
    2290             :   case AArch64::STRSui:
    2291        2170 :     Scale = Width = 4;
    2292        2170 :     MinOffset = 0;
    2293        2170 :     MaxOffset = 4095;
    2294        2170 :     break;
    2295         239 :   case AArch64::LDRHui:
    2296             :   case AArch64::LDRHHui:
    2297             :   case AArch64::STRHui:
    2298             :   case AArch64::STRHHui:
    2299         239 :     Scale = Width = 2;
    2300         239 :     MinOffset = 0;
    2301         239 :     MaxOffset = 4095;
    2302         239 :     break;
    2303         883 :   case AArch64::LDRBui:
    2304             :   case AArch64::LDRBBui:
    2305             :   case AArch64::STRBui:
    2306             :   case AArch64::STRBBui:
    2307         883 :     Scale = Width = 1;
    2308         883 :     MinOffset = 0;
    2309         883 :     MaxOffset = 4095;
    2310         883 :     break;
    2311             :   }
    2312             : 
    2313             :   return true;
    2314             : }
    2315             : 
    2316             : // Scale the unscaled offsets.  Returns false if the unscaled offset can't be
    2317             : // scaled.
    2318          55 : static bool scaleOffset(unsigned Opc, int64_t &Offset) {
    2319             :   unsigned OffsetStride = 1;
    2320          55 :   switch (Opc) {
    2321             :   default:
    2322             :     return false;
    2323             :   case AArch64::LDURQi:
    2324             :   case AArch64::STURQi:
    2325             :     OffsetStride = 16;
    2326             :     break;
    2327          12 :   case AArch64::LDURXi:
    2328             :   case AArch64::LDURDi:
    2329             :   case AArch64::STURXi:
    2330             :   case AArch64::STURDi:
    2331             :     OffsetStride = 8;
    2332          12 :     break;
    2333          35 :   case AArch64::LDURWi:
    2334             :   case AArch64::LDURSi:
    2335             :   case AArch64::LDURSWi:
    2336             :   case AArch64::STURWi:
    2337             :   case AArch64::STURSi:
    2338             :     OffsetStride = 4;
    2339          35 :     break;
    2340             :   }
    2341             :   // If the byte-offset isn't a multiple of the stride, we can't scale this
    2342             :   // offset.
    2343          55 :   if (Offset % OffsetStride != 0)
    2344             :     return false;
    2345             : 
    2346             :   // Convert the byte-offset used by unscaled into an "element" offset used
    2347             :   // by the scaled pair load/store instructions.
    2348          48 :   Offset /= OffsetStride;
    2349          48 :   return true;
    2350             : }
    2351             : 
    2352             : static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
    2353         556 :   if (FirstOpc == SecondOpc)
    2354             :     return true;
    2355             :   // We can also pair sign-ext and zero-ext instructions.
    2356          74 :   switch (FirstOpc) {
    2357             :   default:
    2358             :     return false;
    2359           3 :   case AArch64::LDRWui:
    2360             :   case AArch64::LDURWi:
    2361           3 :     return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
    2362           4 :   case AArch64::LDRSWui:
    2363             :   case AArch64::LDURSWi:
    2364           4 :     return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
    2365             :   }
    2366             :   // These instructions can't be paired based on their opcodes.
    2367             :   return false;
    2368             : }
    2369             : 
    2370             : /// Detect opportunities for ldp/stp formation.
    2371             : ///
    2372             : /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
    2373        1539 : bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
    2374             :                                            unsigned BaseReg1,
    2375             :                                            MachineInstr &SecondLdSt,
    2376             :                                            unsigned BaseReg2,
    2377             :                                            unsigned NumLoads) const {
    2378        1539 :   if (BaseReg1 != BaseReg2)
    2379             :     return false;
    2380             : 
    2381             :   // Only cluster up to a single pair.
    2382         783 :   if (NumLoads > 1)
    2383             :     return false;
    2384             : 
    2385         627 :   if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
    2386          71 :     return false;
    2387             : 
    2388             :   // Can we pair these instructions based on their opcodes?
    2389         556 :   unsigned FirstOpc = FirstLdSt.getOpcode();
    2390         556 :   unsigned SecondOpc = SecondLdSt.getOpcode();
    2391           7 :   if (!canPairLdStOpc(FirstOpc, SecondOpc))
    2392             :     return false;
    2393             : 
    2394             :   // Can't merge volatiles or load/stores that have a hint to avoid pair
    2395             :   // formation, for example.
    2396         946 :   if (!isCandidateToMergeOrPair(FirstLdSt) ||
    2397         458 :       !isCandidateToMergeOrPair(SecondLdSt))
    2398          32 :     return false;
    2399             : 
    2400             :   // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
    2401         456 :   int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
    2402         456 :   if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
    2403             :     return false;
    2404             : 
    2405         449 :   int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
    2406         449 :   if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
    2407             :     return false;
    2408             : 
    2409             :   // Pairwise instructions have a 7-bit signed offset field.
    2410         449 :   if (Offset1 > 63 || Offset1 < -64)
    2411             :     return false;
    2412             : 
    2413             :   // The caller should already have ordered First/SecondLdSt by offset.
    2414             :   assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
    2415         435 :   return Offset1 + 1 == Offset2;
    2416             : }
    2417             : 
    2418          87 : static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
    2419             :                                             unsigned Reg, unsigned SubIdx,
    2420             :                                             unsigned State,
    2421             :                                             const TargetRegisterInfo *TRI) {
    2422          87 :   if (!SubIdx)
    2423           0 :     return MIB.addReg(Reg, State);
    2424             : 
    2425          87 :   if (TargetRegisterInfo::isPhysicalRegister(Reg))
    2426          87 :     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
    2427           0 :   return MIB.addReg(Reg, State, SubIdx);
    2428             : }
    2429             : 
    2430             : static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
    2431             :                                         unsigned NumRegs) {
    2432             :   // We really want the positive remainder mod 32 here, that happens to be
    2433             :   // easily obtainable with a mask.
    2434          11 :   return ((DestReg - SrcReg) & 0x1f) < NumRegs;
    2435             : }
    2436             : 
    2437          11 : void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
    2438             :                                         MachineBasicBlock::iterator I,
    2439             :                                         const DebugLoc &DL, unsigned DestReg,
    2440             :                                         unsigned SrcReg, bool KillSrc,
    2441             :                                         unsigned Opcode,
    2442             :                                         ArrayRef<unsigned> Indices) const {
    2443             :   assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
    2444             :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    2445          11 :   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
    2446             :   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
    2447          11 :   unsigned NumRegs = Indices.size();
    2448             : 
    2449          11 :   int SubReg = 0, End = NumRegs, Incr = 1;
    2450          11 :   if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
    2451           4 :     SubReg = NumRegs - 1;
    2452             :     End = -1;
    2453             :     Incr = -1;
    2454             :   }
    2455             : 
    2456          40 :   for (; SubReg != End; SubReg += Incr) {
    2457          58 :     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
    2458          58 :     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
    2459          29 :     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
    2460          29 :     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
    2461             :   }
    2462          11 : }
    2463             : 
    2464        3599 : void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
    2465             :                                    MachineBasicBlock::iterator I,
    2466             :                                    const DebugLoc &DL, unsigned DestReg,
    2467             :                                    unsigned SrcReg, bool KillSrc) const {
    2468        4808 :   if (AArch64::GPR32spRegClass.contains(DestReg) &&
    2469         700 :       (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
    2470             :     const TargetRegisterInfo *TRI = &getRegisterInfo();
    2471             : 
    2472         943 :     if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
    2473             :       // If either operand is WSP, expand to ADD #0.
    2474           0 :       if (Subtarget.hasZeroCycleRegMove()) {
    2475             :         // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
    2476           0 :         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
    2477             :                                                      &AArch64::GPR64spRegClass);
    2478           0 :         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
    2479             :                                                     &AArch64::GPR64spRegClass);
    2480             :         // This instruction is reading and writing X registers.  This may upset
    2481             :         // the register scavenger and machine verifier, so we need to indicate
    2482             :         // that we are reading an undefined value from SrcRegX, but a proper
    2483             :         // value from SrcReg.
    2484           0 :         BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
    2485           0 :             .addReg(SrcRegX, RegState::Undef)
    2486             :             .addImm(0)
    2487             :             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
    2488           0 :             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
    2489             :       } else {
    2490           0 :         BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
    2491           0 :             .addReg(SrcReg, getKillRegState(KillSrc))
    2492             :             .addImm(0)
    2493             :             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
    2494             :       }
    2495         943 :     } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
    2496         144 :       BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
    2497             :           .addImm(0)
    2498             :           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
    2499             :     } else {
    2500         871 :       if (Subtarget.hasZeroCycleRegMove()) {
    2501             :         // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
    2502          82 :         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
    2503             :                                                      &AArch64::GPR64spRegClass);
    2504          82 :         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
    2505             :                                                     &AArch64::GPR64spRegClass);
    2506             :         // This instruction is reading and writing X registers.  This may upset
    2507             :         // the register scavenger and machine verifier, so we need to indicate
    2508             :         // that we are reading an undefined value from SrcRegX, but a proper
    2509             :         // value from SrcReg.
    2510         246 :         BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
    2511          82 :             .addReg(AArch64::XZR)
    2512          82 :             .addReg(SrcRegX, RegState::Undef)
    2513          82 :             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
    2514             :       } else {
    2515             :         // Otherwise, expand to ORR WZR.
    2516        2367 :         BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
    2517         789 :             .addReg(AArch64::WZR)
    2518         789 :             .addReg(SrcReg, getKillRegState(KillSrc));
    2519             :       }
    2520             :     }
    2521         943 :     return;
    2522             :   }
    2523             : 
    2524        3689 :   if (AArch64::GPR64spRegClass.contains(DestReg) &&
    2525         260 :       (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
    2526         908 :     if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
    2527             :       // If either operand is SP, expand to ADD #0.
    2528         345 :       BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
    2529         115 :           .addReg(SrcReg, getKillRegState(KillSrc))
    2530             :           .addImm(0)
    2531             :           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
    2532         793 :     } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
    2533          60 :       BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
    2534             :           .addImm(0)
    2535             :           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
    2536             :     } else {
    2537             :       // Otherwise, expand to ORR XZR.
    2538        2289 :       BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
    2539         763 :           .addReg(AArch64::XZR)
    2540         763 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2541             :     }
    2542         908 :     return;
    2543             :   }
    2544             : 
    2545             :   // Copy a DDDD register quad by copying the individual sub-registers.
    2546        1748 :   if (AArch64::DDDDRegClass.contains(DestReg) &&
    2547             :       AArch64::DDDDRegClass.contains(SrcReg)) {
    2548             :     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
    2549             :                                        AArch64::dsub2, AArch64::dsub3};
    2550           0 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
    2551             :                      Indices);
    2552           0 :     return;
    2553             :   }
    2554             : 
    2555             :   // Copy a DDD register triple by copying the individual sub-registers.
    2556        1749 :   if (AArch64::DDDRegClass.contains(DestReg) &&
    2557             :       AArch64::DDDRegClass.contains(SrcReg)) {
    2558             :     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
    2559             :                                        AArch64::dsub2};
    2560           1 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
    2561             :                      Indices);
    2562           1 :     return;
    2563             :   }
    2564             : 
    2565             :   // Copy a DD register pair by copying the individual sub-registers.
    2566        1751 :   if (AArch64::DDRegClass.contains(DestReg) &&
    2567             :       AArch64::DDRegClass.contains(SrcReg)) {
    2568             :     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
    2569           4 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
    2570             :                      Indices);
    2571           4 :     return;
    2572             :   }
    2573             : 
    2574             :   // Copy a QQQQ register quad by copying the individual sub-registers.
    2575        1745 :   if (AArch64::QQQQRegClass.contains(DestReg) &&
    2576             :       AArch64::QQQQRegClass.contains(SrcReg)) {
    2577             :     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
    2578             :                                        AArch64::qsub2, AArch64::qsub3};
    2579           2 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
    2580             :                      Indices);
    2581           2 :     return;
    2582             :   }
    2583             : 
    2584             :   // Copy a QQQ register triple by copying the individual sub-registers.
    2585        1743 :   if (AArch64::QQQRegClass.contains(DestReg) &&
    2586             :       AArch64::QQQRegClass.contains(SrcReg)) {
    2587             :     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
    2588             :                                        AArch64::qsub2};
    2589           2 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
    2590             :                      Indices);
    2591           2 :     return;
    2592             :   }
    2593             : 
    2594             :   // Copy a QQ register pair by copying the individual sub-registers.
    2595        1741 :   if (AArch64::QQRegClass.contains(DestReg) &&
    2596             :       AArch64::QQRegClass.contains(SrcReg)) {
    2597             :     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
    2598           2 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
    2599             :                      Indices);
    2600           2 :     return;
    2601             :   }
    2602             : 
    2603        2066 :   if (AArch64::FPR128RegClass.contains(DestReg) &&
    2604             :       AArch64::FPR128RegClass.contains(SrcReg)) {
    2605         329 :     if (Subtarget.hasNEON()) {
    2606         984 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2607         328 :           .addReg(SrcReg)
    2608         328 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2609             :     } else {
    2610           2 :       BuildMI(MBB, I, DL, get(AArch64::STRQpre))
    2611           1 :           .addReg(AArch64::SP, RegState::Define)
    2612           1 :           .addReg(SrcReg, getKillRegState(KillSrc))
    2613           1 :           .addReg(AArch64::SP)
    2614             :           .addImm(-16);
    2615           3 :       BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
    2616           1 :           .addReg(AArch64::SP, RegState::Define)
    2617           1 :           .addReg(DestReg, RegState::Define)
    2618           1 :           .addReg(AArch64::SP)
    2619             :           .addImm(16);
    2620             :     }
    2621         329 :     return;
    2622             :   }
    2623             : 
    2624        1764 :   if (AArch64::FPR64RegClass.contains(DestReg) &&
    2625             :       AArch64::FPR64RegClass.contains(SrcReg)) {
    2626         352 :     if (Subtarget.hasNEON()) {
    2627             :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
    2628             :                                        &AArch64::FPR128RegClass);
    2629         349 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
    2630             :                                       &AArch64::FPR128RegClass);
    2631        1047 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2632         349 :           .addReg(SrcReg)
    2633         349 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2634             :     } else {
    2635           9 :       BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
    2636           3 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2637             :     }
    2638         352 :     return;
    2639             :   }
    2640             : 
    2641        1433 :   if (AArch64::FPR32RegClass.contains(DestReg) &&
    2642             :       AArch64::FPR32RegClass.contains(SrcReg)) {
    2643          79 :     if (Subtarget.hasNEON()) {
    2644             :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
    2645             :                                        &AArch64::FPR128RegClass);
    2646          78 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
    2647             :                                       &AArch64::FPR128RegClass);
    2648         234 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2649          78 :           .addReg(SrcReg)
    2650          78 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2651             :     } else {
    2652           3 :       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
    2653           1 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2654             :     }
    2655          79 :     return;
    2656             :   }
    2657             : 
    2658        1017 :   if (AArch64::FPR16RegClass.contains(DestReg) &&
    2659             :       AArch64::FPR16RegClass.contains(SrcReg)) {
    2660          40 :     if (Subtarget.hasNEON()) {
    2661             :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
    2662             :                                        &AArch64::FPR128RegClass);
    2663          40 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
    2664             :                                       &AArch64::FPR128RegClass);
    2665         120 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2666          40 :           .addReg(SrcReg)
    2667          40 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2668             :     } else {
    2669             :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
    2670             :                                        &AArch64::FPR32RegClass);
    2671           0 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
    2672             :                                       &AArch64::FPR32RegClass);
    2673           0 :       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
    2674           0 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2675             :     }
    2676          40 :     return;
    2677             :   }
    2678             : 
    2679         937 :   if (AArch64::FPR8RegClass.contains(DestReg) &&
    2680             :       AArch64::FPR8RegClass.contains(SrcReg)) {
    2681           0 :     if (Subtarget.hasNEON()) {
    2682             :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
    2683             :                                        &AArch64::FPR128RegClass);
    2684           0 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
    2685             :                                       &AArch64::FPR128RegClass);
    2686           0 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2687           0 :           .addReg(SrcReg)
    2688           0 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2689             :     } else {
    2690             :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
    2691             :                                        &AArch64::FPR32RegClass);
    2692           0 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
    2693             :                                       &AArch64::FPR32RegClass);
    2694           0 :       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
    2695           0 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2696             :     }
    2697           0 :     return;
    2698             :   }
    2699             : 
    2700             :   // Copies between GPR64 and FPR64.
    2701         265 :   if (AArch64::FPR64RegClass.contains(DestReg) &&
    2702         132 :       AArch64::GPR64RegClass.contains(SrcReg)) {
    2703         264 :     BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
    2704         132 :         .addReg(SrcReg, getKillRegState(KillSrc));
    2705         132 :     return;
    2706             :   }
    2707         930 :   if (AArch64::GPR64RegClass.contains(DestReg) &&
    2708             :       AArch64::FPR64RegClass.contains(SrcReg)) {
    2709         248 :     BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
    2710         124 :         .addReg(SrcReg, getKillRegState(KillSrc));
    2711         124 :     return;
    2712             :   }
    2713             :   // Copies between GPR32 and FPR32.
    2714        1041 :   if (AArch64::FPR32RegClass.contains(DestReg) &&
    2715         413 :       AArch64::GPR32RegClass.contains(SrcReg)) {
    2716         826 :     BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
    2717         413 :         .addReg(SrcReg, getKillRegState(KillSrc));
    2718         413 :     return;
    2719             :   }
    2720         534 :   if (AArch64::GPR32RegClass.contains(DestReg) &&
    2721             :       AArch64::FPR32RegClass.contains(SrcReg)) {
    2722         532 :     BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
    2723         266 :         .addReg(SrcReg, getKillRegState(KillSrc));
    2724         266 :     return;
    2725             :   }
    2726             : 
    2727           2 :   if (DestReg == AArch64::NZCV) {
    2728             :     assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
    2729           2 :     BuildMI(MBB, I, DL, get(AArch64::MSR))
    2730             :         .addImm(AArch64SysReg::NZCV)
    2731           1 :         .addReg(SrcReg, getKillRegState(KillSrc))
    2732           1 :         .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
    2733           1 :     return;
    2734             :   }
    2735             : 
    2736           1 :   if (SrcReg == AArch64::NZCV) {
    2737             :     assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
    2738           2 :     BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
    2739             :         .addImm(AArch64SysReg::NZCV)
    2740           1 :         .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
    2741           1 :     return;
    2742             :   }
    2743             : 
    2744           0 :   llvm_unreachable("unimplemented reg-to-reg copy");
    2745             : }
    2746             : 
    2747           2 : static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
    2748             :                                     MachineBasicBlock &MBB,
    2749             :                                     MachineBasicBlock::iterator InsertBefore,
    2750             :                                     const MCInstrDesc &MCID,
    2751             :                                     unsigned SrcReg, bool IsKill,
    2752             :                                     unsigned SubIdx0, unsigned SubIdx1, int FI,
    2753             :                                     MachineMemOperand *MMO) {
    2754             :   unsigned SrcReg0 = SrcReg;
    2755             :   unsigned SrcReg1 = SrcReg;
    2756           2 :   if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
    2757           0 :     SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
    2758             :     SubIdx0 = 0;
    2759           0 :     SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
    2760             :     SubIdx1 = 0;
    2761             :   }
    2762           4 :   BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
    2763           2 :       .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
    2764           2 :       .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
    2765             :       .addFrameIndex(FI)
    2766             :       .addImm(0)
    2767             :       .addMemOperand(MMO);
    2768           2 : }
    2769             : 
    2770        1234 : void AArch64InstrInfo::storeRegToStackSlot(
    2771             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
    2772             :     bool isKill, int FI, const TargetRegisterClass *RC,
    2773             :     const TargetRegisterInfo *TRI) const {
    2774        1234 :   MachineFunction &MF = *MBB.getParent();
    2775        1234 :   MachineFrameInfo &MFI = MF.getFrameInfo();
    2776             :   unsigned Align = MFI.getObjectAlignment(FI);
    2777             : 
    2778        1234 :   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
    2779        1234 :   MachineMemOperand *MMO = MF.getMachineMemOperand(
    2780             :       PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
    2781             :   unsigned Opc = 0;
    2782             :   bool Offset = true;
    2783        1234 :   switch (TRI->getSpillSize(*RC)) {
    2784           0 :   case 1:
    2785           0 :     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
    2786             :       Opc = AArch64::STRBui;
    2787             :     break;
    2788           0 :   case 2:
    2789           0 :     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
    2790             :       Opc = AArch64::STRHui;
    2791             :     break;
    2792         141 :   case 4:
    2793         282 :     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
    2794             :       Opc = AArch64::STRWui;
    2795         128 :       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
    2796          33 :         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
    2797             :       else
    2798             :         assert(SrcReg != AArch64::WSP);
    2799          26 :     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
    2800             :       Opc = AArch64::STRSui;
    2801             :     break;
    2802         799 :   case 8:
    2803        1598 :     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
    2804             :       Opc = AArch64::STRXui;
    2805         644 :       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
    2806         309 :         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
    2807             :       else
    2808             :         assert(SrcReg != AArch64::SP);
    2809         310 :     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
    2810             :       Opc = AArch64::STRDui;
    2811           2 :     } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
    2812           2 :       storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
    2813             :                               get(AArch64::STPWi), SrcReg, isKill,
    2814             :                               AArch64::sube32, AArch64::subo32, FI, MMO);
    2815           2 :       return;
    2816             :     }
    2817             :     break;
    2818         287 :   case 16:
    2819         574 :     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
    2820             :       Opc = AArch64::STRQui;
    2821           2 :     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
    2822             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2823             :       Opc = AArch64::ST1Twov1d;
    2824             :       Offset = false;
    2825           2 :     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
    2826           2 :       storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
    2827             :                               get(AArch64::STPXi), SrcReg, isKill,
    2828             :                               AArch64::sube64, AArch64::subo64, FI, MMO);
    2829           1 :       return;
    2830             :     }
    2831             :     break;
    2832           0 :   case 24:
    2833           0 :     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
    2834             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2835             :       Opc = AArch64::ST1Threev1d;
    2836             :       Offset = false;
    2837             :     }
    2838             :     break;
    2839           3 :   case 32:
    2840           6 :     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
    2841             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2842             :       Opc = AArch64::ST1Fourv1d;
    2843             :       Offset = false;
    2844           6 :     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
    2845             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2846             :       Opc = AArch64::ST1Twov2d;
    2847             :       Offset = false;
    2848             :     }
    2849             :     break;
    2850           2 :   case 48:
    2851           4 :     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
    2852             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2853             :       Opc = AArch64::ST1Threev2d;
    2854             :       Offset = false;
    2855             :     }
    2856             :     break;
    2857           2 :   case 64:
    2858           4 :     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
    2859             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2860             :       Opc = AArch64::ST1Fourv2d;
    2861             :       Offset = false;
    2862             :     }
    2863             :     break;
    2864             :   }
    2865             :   assert(Opc && "Unknown register class");
    2866             : 
    2867        2464 :   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
    2868        1232 :                                      .addReg(SrcReg, getKillRegState(isKill))
    2869        1232 :                                      .addFrameIndex(FI);
    2870             : 
    2871        1232 :   if (Offset)
    2872             :     MI.addImm(0);
    2873             :   MI.addMemOperand(MMO);
    2874             : }
    2875             : 
    2876           2 : static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
    2877             :                                      MachineBasicBlock &MBB,
    2878             :                                      MachineBasicBlock::iterator InsertBefore,
    2879             :                                      const MCInstrDesc &MCID,
    2880             :                                      unsigned DestReg, unsigned SubIdx0,
    2881             :                                      unsigned SubIdx1, int FI,
    2882             :                                      MachineMemOperand *MMO) {
    2883             :   unsigned DestReg0 = DestReg;
    2884             :   unsigned DestReg1 = DestReg;
    2885             :   bool IsUndef = true;
    2886           2 :   if (TargetRegisterInfo::isPhysicalRegister(DestReg)) {
    2887           0 :     DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
    2888             :     SubIdx0 = 0;
    2889           0 :     DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
    2890             :     SubIdx1 = 0;
    2891             :     IsUndef = false;
    2892             :   }
    2893           4 :   BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
    2894           2 :       .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
    2895           2 :       .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
    2896             :       .addFrameIndex(FI)
    2897             :       .addImm(0)
    2898             :       .addMemOperand(MMO);
    2899           2 : }
    2900             : 
    2901        1177 : void AArch64InstrInfo::loadRegFromStackSlot(
    2902             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
    2903             :     int FI, const TargetRegisterClass *RC,
    2904             :     const TargetRegisterInfo *TRI) const {
    2905        1177 :   MachineFunction &MF = *MBB.getParent();
    2906        1177 :   MachineFrameInfo &MFI = MF.getFrameInfo();
    2907             :   unsigned Align = MFI.getObjectAlignment(FI);
    2908        1177 :   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
    2909        1177 :   MachineMemOperand *MMO = MF.getMachineMemOperand(
    2910             :       PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
    2911             : 
    2912             :   unsigned Opc = 0;
    2913             :   bool Offset = true;
    2914        1177 :   switch (TRI->getSpillSize(*RC)) {
    2915           0 :   case 1:
    2916           0 :     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
    2917             :       Opc = AArch64::LDRBui;
    2918             :     break;
    2919           0 :   case 2:
    2920           0 :     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
    2921             :       Opc = AArch64::LDRHui;
    2922             :     break;
    2923          78 :   case 4:
    2924         156 :     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
    2925             :       Opc = AArch64::LDRWui;
    2926          69 :       if (TargetRegisterInfo::isVirtualRegister(DestReg))
    2927          34 :         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
    2928             :       else
    2929             :         assert(DestReg != AArch64::WSP);
    2930          18 :     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
    2931             :       Opc = AArch64::LDRSui;
    2932             :     break;
    2933         682 :   case 8:
    2934        1364 :     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
    2935             :       Opc = AArch64::LDRXui;
    2936         578 :       if (TargetRegisterInfo::isVirtualRegister(DestReg))
    2937         299 :         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
    2938             :       else
    2939             :         assert(DestReg != AArch64::SP);
    2940         208 :     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
    2941             :       Opc = AArch64::LDRDui;
    2942           2 :     } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
    2943           2 :       loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
    2944             :                                get(AArch64::LDPWi), DestReg, AArch64::sube32,
    2945             :                                AArch64::subo32, FI, MMO);
    2946           2 :       return;
    2947             :     }
    2948             :     break;
    2949         410 :   case 16:
    2950         820 :     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
    2951             :       Opc = AArch64::LDRQui;
    2952           2 :     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
    2953             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2954             :       Opc = AArch64::LD1Twov1d;
    2955             :       Offset = false;
    2956           2 :     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
    2957           2 :       loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
    2958             :                                get(AArch64::LDPXi), DestReg, AArch64::sube64,
    2959             :                                AArch64::subo64, FI, MMO);
    2960           1 :       return;
    2961             :     }
    2962             :     break;
    2963           0 :   case 24:
    2964           0 :     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
    2965             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2966             :       Opc = AArch64::LD1Threev1d;
    2967             :       Offset = false;
    2968             :     }
    2969             :     break;
    2970           3 :   case 32:
    2971           6 :     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
    2972             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2973             :       Opc = AArch64::LD1Fourv1d;
    2974             :       Offset = false;
    2975           6 :     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
    2976             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2977             :       Opc = AArch64::LD1Twov2d;
    2978             :       Offset = false;
    2979             :     }
    2980             :     break;
    2981           2 :   case 48:
    2982           4 :     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
    2983             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2984             :       Opc = AArch64::LD1Threev2d;
    2985             :       Offset = false;
    2986             :     }
    2987             :     break;
    2988           2 :   case 64:
    2989           4 :     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
    2990             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2991             :       Opc = AArch64::LD1Fourv2d;
    2992             :       Offset = false;
    2993             :     }
    2994             :     break;
    2995             :   }
    2996             :   assert(Opc && "Unknown register class");
    2997             : 
    2998        2350 :   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
    2999        1175 :                                      .addReg(DestReg, getDefRegState(true))
    3000        1175 :                                      .addFrameIndex(FI);
    3001        1175 :   if (Offset)
    3002             :     MI.addImm(0);
    3003             :   MI.addMemOperand(MMO);
    3004             : }
    3005             : 
    3006       16311 : void llvm::emitFrameOffset(MachineBasicBlock &MBB,
    3007             :                            MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
    3008             :                            unsigned DestReg, unsigned SrcReg, int Offset,
    3009             :                            const TargetInstrInfo *TII,
    3010             :                            MachineInstr::MIFlag Flag, bool SetNZCV) {
    3011       16311 :   if (DestReg == SrcReg && Offset == 0)
    3012             :     return;
    3013             : 
    3014             :   assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
    3015             :          "SP increment/decrement not 16-byte aligned");
    3016             : 
    3017             :   bool isSub = Offset < 0;
    3018        2156 :   if (isSub)
    3019         853 :     Offset = -Offset;
    3020             : 
    3021             :   // FIXME: If the offset won't fit in 24-bits, compute the offset into a
    3022             :   // scratch register.  If DestReg is a virtual register, use it as the
    3023             :   // scratch register; otherwise, create a new virtual register (to be
    3024             :   // replaced by the scavenger at the end of PEI).  That case can be optimized
    3025             :   // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
    3026             :   // register can be loaded with offset%8 and the add/sub can use an extending
    3027             :   // instruction with LSL#3.
    3028             :   // Currently the function handles any offsets but generates a poor sequence
    3029             :   // of code.
    3030             :   //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
    3031             : 
    3032             :   unsigned Opc;
    3033        2156 :   if (SetNZCV)
    3034           3 :     Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
    3035             :   else
    3036        2153 :     Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
    3037             :   const unsigned MaxEncoding = 0xfff;
    3038             :   const unsigned ShiftSize = 12;
    3039             :   const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
    3040        2189 :   while (((unsigned)Offset) >= (1 << ShiftSize)) {
    3041             :     unsigned ThisVal;
    3042          41 :     if (((unsigned)Offset) > MaxEncodableValue) {
    3043             :       ThisVal = MaxEncodableValue;
    3044             :     } else {
    3045          31 :       ThisVal = Offset & MaxEncodableValue;
    3046             :     }
    3047             :     assert((ThisVal >> ShiftSize) <= MaxEncoding &&
    3048             :            "Encoding cannot handle value that big");
    3049          82 :     BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
    3050          41 :         .addReg(SrcReg)
    3051          41 :         .addImm(ThisVal >> ShiftSize)
    3052             :         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
    3053             :         .setMIFlag(Flag);
    3054             : 
    3055             :     SrcReg = DestReg;
    3056          41 :     Offset -= ThisVal;
    3057          41 :     if (Offset == 0)
    3058             :       return;
    3059             :   }
    3060        6444 :   BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
    3061        2148 :       .addReg(SrcReg)
    3062        2148 :       .addImm(Offset)
    3063             :       .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
    3064             :       .setMIFlag(Flag);
    3065             : }
    3066             : 
    3067        1265 : MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
    3068             :     MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
    3069             :     MachineBasicBlock::iterator InsertPt, int FrameIndex,
    3070             :     LiveIntervals *LIS) const {
    3071             :   // This is a bit of a hack. Consider this instruction:
    3072             :   //
    3073             :   //   %0 = COPY %sp; GPR64all:%0
    3074             :   //
    3075             :   // We explicitly chose GPR64all for the virtual register so such a copy might
    3076             :   // be eliminated by RegisterCoalescer. However, that may not be possible, and
    3077             :   // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
    3078             :   // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
    3079             :   //
    3080             :   // To prevent that, we are going to constrain the %0 register class here.
    3081             :   //
    3082             :   // <rdar://problem/11522048>
    3083             :   //
    3084             :   if (MI.isFullCopy()) {
    3085         420 :     unsigned DstReg = MI.getOperand(0).getReg();
    3086         420 :     unsigned SrcReg = MI.getOperand(1).getReg();
    3087         420 :     if (SrcReg == AArch64::SP &&
    3088             :         TargetRegisterInfo::isVirtualRegister(DstReg)) {
    3089           1 :       MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
    3090           1 :       return nullptr;
    3091             :     }
    3092         419 :     if (DstReg == AArch64::SP &&
    3093             :         TargetRegisterInfo::isVirtualRegister(SrcReg)) {
    3094           1 :       MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
    3095           1 :       return nullptr;
    3096             :     }
    3097             :   }
    3098             : 
    3099             :   // Handle the case where a copy is being spilled or filled but the source
    3100             :   // and destination register class don't match.  For example:
    3101             :   //
    3102             :   //   %0 = COPY %xzr; GPR64common:%0
    3103             :   //
    3104             :   // In this case we can still safely fold away the COPY and generate the
    3105             :   // following spill code:
    3106             :   //
    3107             :   //   STRXui %xzr, %stack.0
    3108             :   //
    3109             :   // This also eliminates spilled cross register class COPYs (e.g. between x and
    3110             :   // d regs) of the same size.  For example:
    3111             :   //
    3112             :   //   %0 = COPY %1; GPR64:%0, FPR64:%1
    3113             :   //
    3114             :   // will be filled as
    3115             :   //
    3116             :   //   LDRDui %0, fi<#0>
    3117             :   //
    3118             :   // instead of
    3119             :   //
    3120             :   //   LDRXui %Temp, fi<#0>
    3121             :   //   %0 = FMOV %Temp
    3122             :   //
    3123        1263 :   if (MI.isCopy() && Ops.size() == 1 &&
    3124             :       // Make sure we're only folding the explicit COPY defs/uses.
    3125         590 :       (Ops[0] == 0 || Ops[0] == 1)) {
    3126             :     bool IsSpill = Ops[0] == 0;
    3127             :     bool IsFill = !IsSpill;
    3128         590 :     const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
    3129         590 :     const MachineRegisterInfo &MRI = MF.getRegInfo();
    3130         590 :     MachineBasicBlock &MBB = *MI.getParent();
    3131         590 :     const MachineOperand &DstMO = MI.getOperand(0);
    3132             :     const MachineOperand &SrcMO = MI.getOperand(1);
    3133         590 :     unsigned DstReg = DstMO.getReg();
    3134         590 :     unsigned SrcReg = SrcMO.getReg();
    3135             :     // This is slightly expensive to compute for physical regs since
    3136             :     // getMinimalPhysRegClass is slow.
    3137             :     auto getRegClass = [&](unsigned Reg) {
    3138             :       return TargetRegisterInfo::isVirtualRegister(Reg)
    3139             :                  ? MRI.getRegClass(Reg)
    3140             :                  : TRI.getMinimalPhysRegClass(Reg);
    3141         590 :     };
    3142             : 
    3143         590 :     if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
    3144             :       assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
    3145             :                  TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
    3146             :              "Mismatched register size in non subreg COPY");
    3147         418 :       if (IsSpill)
    3148         183 :         storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
    3149             :                             getRegClass(SrcReg), &TRI);
    3150             :       else
    3151         235 :         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
    3152             :                              getRegClass(DstReg), &TRI);
    3153         501 :       return &*--InsertPt;
    3154             :     }
    3155             : 
    3156             :     // Handle cases like spilling def of:
    3157             :     //
    3158             :     //   %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
    3159             :     //
    3160             :     // where the physical register source can be widened and stored to the full
    3161             :     // virtual reg destination stack slot, in this case producing:
    3162             :     //
    3163             :     //   STRXui %xzr, %stack.0
    3164             :     //
    3165         172 :     if (IsSpill && DstMO.isUndef() &&
    3166             :         TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
    3167             :       assert(SrcMO.getSubReg() == 0 &&
    3168             :              "Unexpected subreg on physical register");
    3169             :       const TargetRegisterClass *SpillRC;
    3170             :       unsigned SpillSubreg;
    3171          81 :       switch (DstMO.getSubReg()) {
    3172             :       default:
    3173             :         SpillRC = nullptr;
    3174             :         break;
    3175          57 :       case AArch64::sub_32:
    3176             :       case AArch64::ssub:
    3177          57 :         if (AArch64::GPR32RegClass.contains(SrcReg)) {
    3178             :           SpillRC = &AArch64::GPR64RegClass;
    3179             :           SpillSubreg = AArch64::sub_32;
    3180          54 :         } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
    3181             :           SpillRC = &AArch64::FPR64RegClass;
    3182             :           SpillSubreg = AArch64::ssub;
    3183             :         } else
    3184             :           SpillRC = nullptr;
    3185             :         break;
    3186          24 :       case AArch64::dsub:
    3187          24 :         if (AArch64::FPR64RegClass.contains(SrcReg)) {
    3188             :           SpillRC = &AArch64::FPR128RegClass;
    3189             :           SpillSubreg = AArch64::dsub;
    3190             :         } else
    3191             :           SpillRC = nullptr;
    3192             :         break;
    3193             :       }
    3194             : 
    3195             :       if (SpillRC)
    3196          81 :         if (unsigned WidenedSrcReg =
    3197          81 :                 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
    3198          81 :           storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
    3199             :                               FrameIndex, SpillRC, &TRI);
    3200          81 :           return &*--InsertPt;
    3201             :         }
    3202             :     }
    3203             : 
    3204             :     // Handle cases like filling use of:
    3205             :     //
    3206             :     //   %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
    3207             :     //
    3208             :     // where we can load the full virtual reg source stack slot, into the subreg
    3209             :     // destination, in this case producing:
    3210             :     //
    3211             :     //   LDRWui %0:sub_32<def,read-undef>, %stack.0
    3212             :     //
    3213          91 :     if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
    3214             :       const TargetRegisterClass *FillRC;
    3215           2 :       switch (DstMO.getSubReg()) {
    3216             :       default:
    3217             :         FillRC = nullptr;
    3218             :         break;
    3219             :       case AArch64::sub_32:
    3220             :         FillRC = &AArch64::GPR32RegClass;
    3221             :         break;
    3222           1 :       case AArch64::ssub:
    3223             :         FillRC = &AArch64::FPR32RegClass;
    3224           1 :         break;
    3225           0 :       case AArch64::dsub:
    3226             :         FillRC = &AArch64::FPR64RegClass;
    3227           0 :         break;
    3228             :       }
    3229             : 
    3230             :       if (FillRC) {
    3231             :         assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
    3232             :                    TRI.getRegSizeInBits(*FillRC) &&
    3233             :                "Mismatched regclass size on folded subreg COPY");
    3234           2 :         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
    3235             :         MachineInstr &LoadMI = *--InsertPt;
    3236           2 :         MachineOperand &LoadDst = LoadMI.getOperand(0);
    3237             :         assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
    3238             :         LoadDst.setSubReg(DstMO.getSubReg());
    3239             :         LoadDst.setIsUndef();
    3240           2 :         return &LoadMI;
    3241             :       }
    3242             :     }
    3243             :   }
    3244             : 
    3245             :   // Cannot fold.
    3246             :   return nullptr;
    3247             : }
    3248             : 
    3249        8598 : int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
    3250             :                                     bool *OutUseUnscaledOp,
    3251             :                                     unsigned *OutUnscaledOp,
    3252             :                                     int *EmittableOffset) {
    3253             :   int Scale = 1;
    3254             :   bool IsSigned = false;
    3255             :   // The ImmIdx should be changed case by case if it is not 2.
    3256             :   unsigned ImmIdx = 2;
    3257             :   unsigned UnscaledOp = 0;
    3258             :   // Set output values in case of early exit.
    3259        8598 :   if (EmittableOffset)
    3260        3691 :     *EmittableOffset = 0;
    3261        8598 :   if (OutUseUnscaledOp)
    3262        3691 :     *OutUseUnscaledOp = false;
    3263        8598 :   if (OutUnscaledOp)
    3264        3691 :     *OutUnscaledOp = 0;
    3265       17196 :   switch (MI.getOpcode()) {
    3266           0 :   default:
    3267           0 :     llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
    3268             :   // Vector spills/fills can't take an immediate offset.
    3269             :   case AArch64::LD1Twov2d:
    3270             :   case AArch64::LD1Threev2d:
    3271             :   case AArch64::LD1Fourv2d:
    3272             :   case AArch64::LD1Twov1d:
    3273             :   case AArch64::LD1Threev1d:
    3274             :   case AArch64::LD1Fourv1d:
    3275             :   case AArch64::ST1Twov2d:
    3276             :   case AArch64::ST1Threev2d:
    3277             :   case AArch64::ST1Fourv2d:
    3278             :   case AArch64::ST1Twov1d:
    3279             :   case AArch64::ST1Threev1d:
    3280             :   case AArch64::ST1Fourv1d:
    3281             :     return AArch64FrameOffsetCannotUpdate;
    3282             :   case AArch64::PRFMui:
    3283             :     Scale = 8;
    3284             :     UnscaledOp = AArch64::PRFUMi;
    3285             :     break;
    3286        1634 :   case AArch64::LDRXui:
    3287             :     Scale = 8;
    3288             :     UnscaledOp = AArch64::LDURXi;
    3289        1634 :     break;
    3290         436 :   case AArch64::LDRWui:
    3291             :     Scale = 4;
    3292             :     UnscaledOp = AArch64::LDURWi;
    3293         436 :     break;
    3294           0 :   case AArch64::LDRBui:
    3295             :     Scale = 1;
    3296             :     UnscaledOp = AArch64::LDURBi;
    3297           0 :     break;
    3298           4 :   case AArch64::LDRHui:
    3299             :     Scale = 2;
    3300             :     UnscaledOp = AArch64::LDURHi;
    3301           4 :     break;
    3302          87 :   case AArch64::LDRSui:
    3303             :     Scale = 4;
    3304             :     UnscaledOp = AArch64::LDURSi;
    3305          87 :     break;
    3306         366 :   case AArch64::LDRDui:
    3307             :     Scale = 8;
    3308             :     UnscaledOp = AArch64::LDURDi;
    3309         366 :     break;
    3310        1052 :   case AArch64::LDRQui:
    3311             :     Scale = 16;
    3312             :     UnscaledOp = AArch64::LDURQi;
    3313        1052 :     break;
    3314          47 :   case AArch64::LDRBBui:
    3315             :     Scale = 1;
    3316             :     UnscaledOp = AArch64::LDURBBi;
    3317          47 :     break;
    3318          33 :   case AArch64::LDRHHui:
    3319             :     Scale = 2;
    3320             :     UnscaledOp = AArch64::LDURHHi;
    3321          33 :     break;
    3322           0 :   case AArch64::LDRSBXui:
    3323             :     Scale = 1;
    3324             :     UnscaledOp = AArch64::LDURSBXi;
    3325           0 :     break;
    3326          15 :   case AArch64::LDRSBWui:
    3327             :     Scale = 1;
    3328             :     UnscaledOp = AArch64::LDURSBWi;
    3329          15 :     break;
    3330           0 :   case AArch64::LDRSHXui:
    3331             :     Scale = 2;
    3332             :     UnscaledOp = AArch64::LDURSHXi;
    3333           0 :     break;
    3334          12 :   case AArch64::LDRSHWui:
    3335             :     Scale = 2;
    3336             :     UnscaledOp = AArch64::LDURSHWi;
    3337          12 :     break;
    3338           6 :   case AArch64::LDRSWui:
    3339             :     Scale = 4;
    3340             :     UnscaledOp = AArch64::LDURSWi;
    3341           6 :     break;
    3342             : 
    3343        1995 :   case AArch64::STRXui:
    3344             :     Scale = 8;
    3345             :     UnscaledOp = AArch64::STURXi;
    3346        1995 :     break;
    3347        1006 :   case AArch64::STRWui:
    3348             :     Scale = 4;
    3349             :     UnscaledOp = AArch64::STURWi;
    3350        1006 :     break;
    3351           0 :   case AArch64::STRBui:
    3352             :     Scale = 1;
    3353             :     UnscaledOp = AArch64::STURBi;
    3354           0 :     break;
    3355           0 :   case AArch64::STRHui:
    3356             :     Scale = 2;
    3357             :     UnscaledOp = AArch64::STURHi;
    3358           0 :     break;
    3359          48 :   case AArch64::STRSui:
    3360             :     Scale = 4;
    3361             :     UnscaledOp = AArch64::STURSi;
    3362          48 :     break;
    3363         446 :   case AArch64::STRDui:
    3364             :     Scale = 8;
    3365             :     UnscaledOp = AArch64::STURDi;
    3366         446 :     break;
    3367        1271 :   case AArch64::STRQui:
    3368             :     Scale = 16;
    3369             :     UnscaledOp = AArch64::STURQi;
    3370        1271 :     break;
    3371          47 :   case AArch64::STRBBui:
    3372             :     Scale = 1;
    3373             :     UnscaledOp = AArch64::STURBBi;
    3374          47 :     break;
    3375          30 :   case AArch64::STRHHui:
    3376             :     Scale = 2;
    3377             :     UnscaledOp = AArch64::STURHHi;
    3378          30 :     break;
    3379             : 
    3380           6 :   case AArch64::LDPXi:
    3381             :   case AArch64::LDPDi:
    3382             :   case AArch64::STPXi:
    3383             :   case AArch64::STPDi:
    3384             :   case AArch64::LDNPXi:
    3385             :   case AArch64::LDNPDi:
    3386             :   case AArch64::STNPXi:
    3387             :   case AArch64::STNPDi:
    3388             :     ImmIdx = 3;
    3389             :     IsSigned = true;
    3390             :     Scale = 8;
    3391           6 :     break;
    3392           0 :   case AArch64::LDPQi:
    3393             :   case AArch64::STPQi:
    3394             :   case AArch64::LDNPQi:
    3395             :   case AArch64::STNPQi:
    3396             :     ImmIdx = 3;
    3397             :     IsSigned = true;
    3398             :     Scale = 16;
    3399           0 :     break;
    3400           0 :   case AArch64::LDPWi:
    3401             :   case AArch64::LDPSi:
    3402             :   case AArch64::STPWi:
    3403             :   case AArch64::STPSi:
    3404             :   case AArch64::LDNPWi:
    3405             :   case AArch64::LDNPSi:
    3406             :   case AArch64::STNPWi:
    3407             :   case AArch64::STNPSi:
    3408             :     ImmIdx = 3;
    3409             :     IsSigned = true;
    3410             :     Scale = 4;
    3411           0 :     break;
    3412             : 
    3413          10 :   case AArch64::LDURXi:
    3414             :   case AArch64::LDURWi:
    3415             :   case AArch64::LDURBi:
    3416             :   case AArch64::LDURHi:
    3417             :   case AArch64::LDURSi:
    3418             :   case AArch64::LDURDi:
    3419             :   case AArch64::LDURQi:
    3420             :   case AArch64::LDURHHi:
    3421             :   case AArch64::LDURBBi:
    3422             :   case AArch64::LDURSBXi:
    3423             :   case AArch64::LDURSBWi:
    3424             :   case AArch64::LDURSHXi:
    3425             :   case AArch64::LDURSHWi:
    3426             :   case AArch64::LDURSWi:
    3427             :   case AArch64::STURXi:
    3428             :   case AArch64::STURWi:
    3429             :   case AArch64::STURBi:
    3430             :   case AArch64::STURHi:
    3431             :   case AArch64::STURSi:
    3432             :   case AArch64::STURDi:
    3433             :   case AArch64::STURQi:
    3434             :   case AArch64::STURBBi:
    3435             :   case AArch64::STURHHi:
    3436             :     Scale = 1;
    3437          10 :     break;
    3438             :   }
    3439             : 
    3440        8575 :   Offset += MI.getOperand(ImmIdx).getImm() * Scale;
    3441             : 
    3442             :   bool useUnscaledOp = false;
    3443             :   // If the offset doesn't match the scale, we rewrite the instruction to
    3444             :   // use the unscaled instruction instead. Likewise, if we have a negative
    3445             :   // offset (and have an unscaled op to use).
    3446        8575 :   if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
    3447             :     useUnscaledOp = true;
    3448             : 
    3449             :   // Use an unscaled addressing mode if the instruction has a negative offset
    3450             :   // (or if the instruction is already using an unscaled addressing mode).
    3451             :   unsigned MaskBits;
    3452        8575 :   if (IsSigned) {
    3453             :     // ldp/stp instructions.
    3454             :     MaskBits = 7;
    3455           6 :     Offset /= Scale;
    3456        8569 :   } else if (UnscaledOp == 0 || useUnscaledOp) {
    3457             :     MaskBits = 9;
    3458             :     IsSigned = true;
    3459             :     Scale = 1;
    3460             :   } else {
    3461             :     MaskBits = 12;
    3462             :     IsSigned = false;
    3463        8196 :     Offset /= Scale;
    3464             :   }
    3465             : 
    3466             :   // Attempt to fold address computation.
    3467        8575 :   int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
    3468        8575 :   int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
    3469        8575 :   if (Offset >= MinOff && Offset <= MaxOff) {
    3470        8398 :     if (EmittableOffset)
    3471        3675 :       *EmittableOffset = Offset;
    3472        8398 :     Offset = 0;
    3473             :   } else {
    3474         177 :     int NewOff = Offset < 0 ? MinOff : MaxOff;
    3475         177 :     if (EmittableOffset)
    3476           1 :       *EmittableOffset = NewOff;
    3477         177 :     Offset = (Offset - NewOff) * Scale;
    3478             :   }
    3479        8575 :   if (OutUseUnscaledOp)
    3480        3676 :     *OutUseUnscaledOp = useUnscaledOp;
    3481        8575 :   if (OutUnscaledOp)
    3482        3676 :     *OutUnscaledOp = UnscaledOp;
    3483        8575 :   return AArch64FrameOffsetCanUpdate |
    3484        8575 :          (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
    3485             : }
    3486             : 
    3487        3905 : bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
    3488             :                                     unsigned FrameReg, int &Offset,
    3489             :                                     const AArch64InstrInfo *TII) {
    3490        3905 :   unsigned Opcode = MI.getOpcode();
    3491        3905 :   unsigned ImmIdx = FrameRegIdx + 1;
    3492             : 
    3493        3905 :   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
    3494         214 :     Offset += MI.getOperand(ImmIdx).getImm();
    3495         428 :     emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
    3496             :                     MI.getOperand(0).getReg(), FrameReg, Offset, TII,
    3497             :                     MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
    3498         214 :     MI.eraseFromParent();
    3499         214 :     Offset = 0;
    3500         214 :     return true;
    3501             :   }
    3502             : 
    3503             :   int NewOffset;
    3504             :   unsigned UnscaledOp;
    3505             :   bool UseUnscaledOp;
    3506        3691 :   int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
    3507             :                                          &UnscaledOp, &NewOffset);
    3508        3691 :   if (Status & AArch64FrameOffsetCanUpdate) {
    3509        3676 :     if (Status & AArch64FrameOffsetIsLegal)
    3510             :       // Replace the FrameIndex with FrameReg.
    3511        7350 :       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
    3512        3676 :     if (UseUnscaledOp)
    3513         173 :       MI.setDesc(TII->get(UnscaledOp));
    3514             : 
    3515        7352 :     MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
    3516        3676 :     return Offset == 0;
    3517             :   }
    3518             : 
    3519             :   return false;
    3520             : }
    3521             : 
    3522           0 : void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
    3523             :   NopInst.setOpcode(AArch64::HINT);
    3524           0 :   NopInst.addOperand(MCOperand::createImm(0));
    3525           0 : }
    3526             : 
    3527             : // AArch64 supports MachineCombiner.
    3528       14114 : bool AArch64InstrInfo::useMachineCombiner() const { return true; }
    3529             : 
    3530             : // True when Opc sets flag
    3531             : static bool isCombineInstrSettingFlag(unsigned Opc) {
    3532        2862 :   switch (Opc) {
    3533             :   case AArch64::ADDSWrr:
    3534             :   case AArch64::ADDSWri:
    3535             :   case AArch64::ADDSXrr:
    3536             :   case AArch64::ADDSXri:
    3537             :   case AArch64::SUBSWrr:
    3538             :   case AArch64::SUBSXrr:
    3539             :   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
    3540             :   case AArch64::SUBSWri:
    3541             :   case AArch64::SUBSXri:
    3542             :     return true;
    3543             :   default:
    3544             :     break;
    3545             :   }
    3546             :   return false;
    3547             : }
    3548             : 
    3549             : // 32b Opcodes that can be combined with a MUL
    3550             : static bool isCombineInstrCandidate32(unsigned Opc) {
    3551      119915 :   switch (Opc) {
    3552             :   case AArch64::ADDWrr:
    3553             :   case AArch64::ADDWri:
    3554             :   case AArch64::SUBWrr:
    3555             :   case AArch64::ADDSWrr:
    3556             :   case AArch64::ADDSWri:
    3557             :   case AArch64::SUBSWrr:
    3558             :   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
    3559             :   case AArch64::SUBWri:
    3560             :   case AArch64::SUBSWri:
    3561             :     return true;
    3562             :   default:
    3563             :     break;
    3564             :   }
    3565             :   return false;
    3566             : }
    3567             : 
    3568             : // 64b Opcodes that can be combined with a MUL
    3569             : static bool isCombineInstrCandidate64(unsigned Opc) {
    3570      118450 :   switch (Opc) {
    3571             :   case AArch64::ADDXrr:
    3572             :   case AArch64::ADDXri:
    3573             :   case AArch64::SUBXrr:
    3574             :   case AArch64::ADDSXrr:
    3575             :   case AArch64::ADDSXri:
    3576             :   case AArch64::SUBSXrr:
    3577             :   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
    3578             :   case AArch64::SUBXri:
    3579             :   case AArch64::SUBSXri:
    3580             :     return true;
    3581             :   default:
    3582             :     break;
    3583             :   }
    3584             :   return false;
    3585             : }
    3586             : 
    3587             : // FP Opcodes that can be combined with a FMUL
    3588      119714 : static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
    3589      239428 :   switch (Inst.getOpcode()) {
    3590             :   default:
    3591      119111 :     break;
    3592         603 :   case AArch64::FADDSrr:
    3593             :   case AArch64::FADDDrr:
    3594             :   case AArch64::FADDv2f32:
    3595             :   case AArch64::FADDv2f64:
    3596             :   case AArch64::FADDv4f32:
    3597             :   case AArch64::FSUBSrr:
    3598             :   case AArch64::FSUBDrr:
    3599             :   case AArch64::FSUBv2f32:
    3600             :   case AArch64::FSUBv2f64:
    3601             :   case AArch64::FSUBv4f32:
    3602         603 :     TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
    3603        1055 :     return (Options.UnsafeFPMath ||
    3604         603 :             Options.AllowFPOpFusion == FPOpFusion::Fast);
    3605             :   }
    3606      119111 :   return false;
    3607             : }
    3608             : 
    3609             : // Opcodes that can be combined with a MUL
    3610      119915 : static bool isCombineInstrCandidate(unsigned Opc) {
    3611      119915 :   return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
    3612             : }
    3613             : 
    3614             : //
    3615             : // Utility routine that checks if \param MO is defined by an
    3616             : // \param CombineOpc instruction in the basic block \param MBB
    3617        3801 : static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
    3618             :                        unsigned CombineOpc, unsigned ZeroReg = 0,
    3619             :                        bool CheckZeroReg = false) {
    3620        3801 :   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    3621             :   MachineInstr *MI = nullptr;
    3622             : 
    3623        3801 :   if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
    3624        3641 :     MI = MRI.getUniqueVRegDef(MO.getReg());
    3625             :   // And it needs to be in the trace (otherwise, it won't have a depth).
    3626        3641 :   if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
    3627        3508 :     return false;
    3628             :   // Must only used by the user we combine with.
    3629         293 :   if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
    3630             :     return false;
    3631             : 
    3632         278 :   if (CheckZeroReg) {
    3633             :     assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
    3634             :            MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
    3635             :            MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
    3636             :     // The third input reg must be zero.
    3637         210 :     if (MI->getOperand(3).getReg() != ZeroReg)
    3638           8 :       return false;
    3639             :   }
    3640             : 
    3641             :   return true;
    3642             : }
    3643             : 
    3644             : //
    3645             : // Is \param MO defined by an integer multiply and can be combined?
    3646             : static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
    3647             :                               unsigned MulOpc, unsigned ZeroReg) {
    3648        3082 :   return canCombine(MBB, MO, MulOpc, ZeroReg, true);
    3649             : }
    3650             : 
    3651             : //
    3652             : // Is \param MO defined by a floating-point multiply and can be combined?
    3653             : static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
    3654             :                                unsigned MulOpc) {
    3655         719 :   return canCombine(MBB, MO, MulOpc);
    3656             : }
    3657             : 
    3658             : // TODO: There are many more machine instruction opcodes to match:
    3659             : //       1. Other data types (integer, vectors)
    3660             : //       2. Other math / logic operations (xor, or)
    3661             : //       3. Other forms of the same operation (intrinsics and other variants)
    3662      119659 : bool AArch64InstrInfo::isAssociativeAndCommutative(
    3663             :     const MachineInstr &Inst) const {
    3664      239318 :   switch (Inst.getOpcode()) {
    3665         908 :   case AArch64::FADDDrr:
    3666             :   case AArch64::FADDSrr:
    3667             :   case AArch64::FADDv2f32:
    3668             :   case AArch64::FADDv2f64:
    3669             :   case AArch64::FADDv4f32:
    3670             :   case AArch64::FMULDrr:
    3671             :   case AArch64::FMULSrr:
    3672             :   case AArch64::FMULX32:
    3673             :   case AArch64::FMULX64:
    3674             :   case AArch64::FMULXv2f32:
    3675             :   case AArch64::FMULXv2f64:
    3676             :   case AArch64::FMULXv4f32:
    3677             :   case AArch64::FMULv2f32:
    3678             :   case AArch64::FMULv2f64:
    3679             :   case AArch64::FMULv4f32:
    3680         908 :     return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
    3681             :   default:
    3682             :     return false;
    3683             :   }
    3684             : }
    3685             : 
    3686             : /// Find instructions that can be turned into madd.
    3687      119915 : static bool getMaddPatterns(MachineInstr &Root,
    3688             :                             SmallVectorImpl<MachineCombinerPattern> &Patterns) {
    3689      119915 :   unsigned Opc = Root.getOpcode();
    3690      119915 :   MachineBasicBlock &MBB = *Root.getParent();
    3691             :   bool Found = false;
    3692             : 
    3693      119915 :   if (!isCombineInstrCandidate(Opc))
    3694             :     return false;
    3695             :   if (isCombineInstrSettingFlag(Opc)) {
    3696        1444 :     int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
    3697             :     // When NZCV is live bail out.
    3698        1444 :     if (Cmp_NZCV == -1)
    3699             :       return false;
    3700         645 :     unsigned NewOpc = convertToNonFlagSettingOpc(Root);
    3701             :     // When opcode can't change bail out.
    3702             :     // CHECKME: do we miss any cases for opcode conversion?
    3703         645 :     if (NewOpc == Opc)
    3704             :       return false;
    3705             :     Opc = NewOpc;
    3706             :   }
    3707             : 
    3708        2063 :   switch (Opc) {
    3709             :   default:
    3710             :     break;
    3711         298 :   case AArch64::ADDWrr:
    3712             :     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
    3713             :            "ADDWrr does not have register operands");
    3714         298 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
    3715             :                           AArch64::WZR)) {
    3716           1 :       Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
    3717             :       Found = true;
    3718             :     }
    3719         298 :     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
    3720             :                           AArch64::WZR)) {
    3721           2 :       Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
    3722             :       Found = true;
    3723             :     }
    3724             :     break;
    3725         255 :   case AArch64::ADDXrr:
    3726         255 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
    3727             :                           AArch64::XZR)) {
    3728           8 :       Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
    3729             :       Found = true;
    3730             :     }
    3731         255 :     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
    3732             :                           AArch64::XZR)) {
    3733          40 :       Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
    3734             :       Found = true;
    3735             :     }
    3736             :     break;
    3737         312 :   case AArch64::SUBWrr:
    3738         312 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
    3739             :                           AArch64::WZR)) {
    3740           0 :       Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
    3741             :       Found = true;
    3742             :     }
    3743         312 :     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
    3744             :                           AArch64::WZR)) {
    3745         130 :       Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
    3746             :       Found = true;
    3747             :     }
    3748             :     break;
    3749         154 :   case AArch64::SUBXrr:
    3750         154 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
    3751             :                           AArch64::XZR)) {
    3752           0 :       Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
    3753             :       Found = true;
    3754             :     }
    3755         154 :     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
    3756             :                           AArch64::XZR)) {
    3757          17 :       Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
    3758             :       Found = true;
    3759             :     }
    3760             :     break;
    3761         231 :   case AArch64::ADDWri:
    3762         231 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
    3763             :                           AArch64::WZR)) {
    3764           2 :       Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
    3765             :       Found = true;
    3766             :     }
    3767             :     break;
    3768         600 :   case AArch64::ADDXri:
    3769         600 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
    3770             :                           AArch64::XZR)) {
    3771           1 :       Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
    3772             :       Found = true;
    3773             :     }
    3774             :     break;
    3775          79 :   case AArch64::SUBWri:
    3776          79 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
    3777             :                           AArch64::WZR)) {
    3778           0 :       Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
    3779             :       Found = true;
    3780             :     }
    3781             :     break;
    3782         134 :   case AArch64::SUBXri:
    3783         134 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
    3784             :                           AArch64::XZR)) {
    3785           1 :       Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
    3786             :       Found = true;
    3787             :     }
    3788             :     break;
    3789             :   }
    3790             :   return Found;
    3791             : }
    3792             : /// Floating-Point Support
    3793             : 
    3794             : /// Find instructions that can be turned into madd.
    3795      119714 : static bool getFMAPatterns(MachineInstr &Root,
    3796             :                            SmallVectorImpl<MachineCombinerPattern> &Patterns) {
    3797             : 
    3798      119714 :   if (!isCombineInstrCandidateFP(Root))
    3799             :     return false;
    3800             : 
    3801         183 :   MachineBasicBlock &MBB = *Root.getParent();
    3802             :   bool Found = false;
    3803             : 
    3804         366 :   switch (Root.getOpcode()) {
    3805             :   default:
    3806             :     assert(false && "Unsupported FP instruction in combiner\n");
    3807             :     break;
    3808          55 :   case AArch64::FADDSrr:
    3809             :     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
    3810             :            "FADDWrr does not have register operands");
    3811          55 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
    3812           1 :       Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
    3813             :       Found = true;
    3814          54 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3815             :                                   AArch64::FMULv1i32_indexed)) {
    3816           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
    3817             :       Found = true;
    3818             :     }
    3819          55 :     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
    3820           0 :       Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
    3821             :       Found = true;
    3822          55 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3823             :                                   AArch64::FMULv1i32_indexed)) {
    3824           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
    3825             :       Found = true;
    3826             :     }
    3827             :     break;
    3828          30 :   case AArch64::FADDDrr:
    3829          30 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
    3830           1 :       Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
    3831             :       Found = true;
    3832          29 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3833             :                                   AArch64::FMULv1i64_indexed)) {
    3834           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
    3835             :       Found = true;
    3836             :     }
    3837          30 :     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
    3838           2 :       Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
    3839             :       Found = true;
    3840          28 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3841             :                                   AArch64::FMULv1i64_indexed)) {
    3842           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
    3843             :       Found = true;
    3844             :     }
    3845             :     break;
    3846           5 :   case AArch64::FADDv2f32:
    3847           5 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3848             :                            AArch64::FMULv2i32_indexed)) {
    3849           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
    3850             :       Found = true;
    3851           4 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3852             :                                   AArch64::FMULv2f32)) {
    3853           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
    3854             :       Found = true;
    3855             :     }
    3856           5 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3857             :                            AArch64::FMULv2i32_indexed)) {
    3858           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
    3859             :       Found = true;
    3860           5 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3861             :                                   AArch64::FMULv2f32)) {
    3862           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
    3863             :       Found = true;
    3864             :     }
    3865             :     break;
    3866          10 :   case AArch64::FADDv2f64:
    3867          10 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3868             :                            AArch64::FMULv2i64_indexed)) {
    3869           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
    3870             :       Found = true;
    3871           9 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3872             :                                   AArch64::FMULv2f64)) {
    3873           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
    3874             :       Found = true;
    3875             :     }
    3876          10 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3877             :                            AArch64::FMULv2i64_indexed)) {
    3878           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
    3879             :       Found = true;
    3880          10 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3881             :                                   AArch64::FMULv2f64)) {
    3882           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
    3883             :       Found = true;
    3884             :     }
    3885             :     break;
    3886          31 :   case AArch64::FADDv4f32:
    3887          31 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3888             :                            AArch64::FMULv4i32_indexed)) {
    3889           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
    3890             :       Found = true;
    3891          30 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3892             :                                   AArch64::FMULv4f32)) {
    3893           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
    3894             :       Found = true;
    3895             :     }
    3896          31 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3897             :                            AArch64::FMULv4i32_indexed)) {
    3898           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
    3899             :       Found = true;
    3900          31 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3901             :                                   AArch64::FMULv4f32)) {
    3902           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
    3903             :       Found = true;
    3904             :     }
    3905             :     break;
    3906             : 
    3907           6 :   case AArch64::FSUBSrr:
    3908           6 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
    3909           0 :       Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
    3910             :       Found = true;
    3911             :     }
    3912           6 :     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
    3913           0 :       Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
    3914             :       Found = true;
    3915           6 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3916             :                                   AArch64::FMULv1i32_indexed)) {
    3917           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
    3918             :       Found = true;
    3919             :     }
    3920           6 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
    3921           2 :       Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
    3922             :       Found = true;
    3923             :     }
    3924             :     break;
    3925           4 :   case AArch64::FSUBDrr:
    3926           4 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
    3927           0 :       Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
    3928             :       Found = true;
    3929             :     }
    3930           4 :     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
    3931           0 :       Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
    3932             :       Found = true;
    3933           4 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3934             :                                   AArch64::FMULv1i64_indexed)) {
    3935           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
    3936             :       Found = true;
    3937             :     }
    3938           4 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
    3939           2 :       Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
    3940             :       Found = true;
    3941             :     }
    3942             :     break;
    3943          14 :   case AArch64::FSUBv2f32:
    3944          14 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3945             :                            AArch64::FMULv2i32_indexed)) {
    3946           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
    3947             :       Found = true;
    3948          12 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3949             :                                   AArch64::FMULv2f32)) {
    3950           6 :       Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
    3951             :       Found = true;
    3952             :     }
    3953          14 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3954             :                            AArch64::FMULv2i32_indexed)) {
    3955           0 :       Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
    3956             :       Found = true;
    3957          14 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3958             :                                   AArch64::FMULv2f32)) {
    3959           8 :       Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
    3960             :       Found = true;
    3961             :     }
    3962             :     break;
    3963          16 :   case AArch64::FSUBv2f64:
    3964          16 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3965             :                            AArch64::FMULv2i64_indexed)) {
    3966           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
    3967             :       Found = true;
    3968          14 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3969             :                                   AArch64::FMULv2f64)) {
    3970           6 :       Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
    3971             :       Found = true;
    3972             :     }
    3973          16 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3974             :                            AArch64::FMULv2i64_indexed)) {
    3975           0 :       Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
    3976             :       Found = true;
    3977          16 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3978             :                                   AArch64::FMULv2f64)) {
    3979           8 :       Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
    3980             :       Found = true;
    3981             :     }
    3982             :     break;
    3983          12 :   case AArch64::FSUBv4f32:
    3984          12 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3985             :                            AArch64::FMULv4i32_indexed)) {
    3986           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
    3987             :       Found = true;
    3988          10 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3989             :                                   AArch64::FMULv4f32)) {
    3990           6 :       Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
    3991             :       Found = true;
    3992             :     }
    3993          12 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3994             :                            AArch64::FMULv4i32_indexed)) {
    3995           0 :       Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
    3996             :       Found = true;
    3997          12 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3998             :                                   AArch64::FMULv4f32)) {
    3999           8 :       Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
    4000             :       Found = true;
    4001             :     }
    4002             :     break;
    4003             :   }
    4004             :   return Found;
    4005             : }
    4006             : 
    4007             : /// Return true when a code sequence can improve throughput. It
    4008             : /// should be called only for instructions in loops.
    4009             : /// \param Pattern - combiner pattern
    4010          39 : bool AArch64InstrInfo::isThroughputPattern(
    4011             :     MachineCombinerPattern Pattern) const {
    4012             :   switch (Pattern) {
    4013             :   default:
    4014             :     break;
    4015             :   case MachineCombinerPattern::FMULADDS_OP1:
    4016             :   case MachineCombinerPattern::FMULADDS_OP2:
    4017             :   case MachineCombinerPattern::FMULSUBS_OP1:
    4018             :   case MachineCombinerPattern::FMULSUBS_OP2:
    4019             :   case MachineCombinerPattern::FMULADDD_OP1:
    4020             :   case MachineCombinerPattern::FMULADDD_OP2:
    4021             :   case MachineCombinerPattern::FMULSUBD_OP1:
    4022             :   case MachineCombinerPattern::FMULSUBD_OP2:
    4023             :   case MachineCombinerPattern::FNMULSUBS_OP1:
    4024             :   case MachineCombinerPattern::FNMULSUBD_OP1:
    4025             :   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
    4026             :   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
    4027             :   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
    4028             :   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
    4029             :   case MachineCombinerPattern::FMLAv2f32_OP2:
    4030             :   case MachineCombinerPattern::FMLAv2f32_OP1:
    4031             :   case MachineCombinerPattern::FMLAv2f64_OP1:
    4032             :   case MachineCombinerPattern::FMLAv2f64_OP2:
    4033             :   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
    4034             :   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
    4035             :   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
    4036             :   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
    4037             :   case MachineCombinerPattern::FMLAv4f32_OP1:
    4038             :   case MachineCombinerPattern::FMLAv4f32_OP2:
    4039             :   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
    4040             :   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
    4041             :   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
    4042             :   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
    4043             :   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
    4044             :   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
    4045             :   case MachineCombinerPattern::FMLSv2f32_OP2:
    4046             :   case MachineCombinerPattern::FMLSv2f64_OP2:
    4047             :   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
    4048             :   case MachineCombinerPattern::FMLSv4f32_OP2:
    4049             :     return true;
    4050             :   } // end switch (Pattern)
    4051             :   return false;
    4052             : }
    4053             : /// Return true when there is potentially a faster code sequence for an
    4054             : /// instruction chain ending in \p Root. All potential patterns are listed in
    4055             : /// the \p Pattern vector. Pattern should be sorted in priority order since the
    4056             : /// pattern evaluator stops checking as soon as it finds a faster sequence.
    4057             : 
    4058      119915 : bool AArch64InstrInfo::getMachineCombinerPatterns(
    4059             :     MachineInstr &Root,
    4060             :     SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
    4061             :   // Integer patterns
    4062      119915 :   if (getMaddPatterns(Root, Patterns))
    4063             :     return true;
    4064             :   // Floating point patterns
    4065      119714 :   if (getFMAPatterns(Root, Patterns))
    4066             :     return true;
    4067             : 
    4068      119659 :   return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
    4069             : }
    4070             : 
    4071             : enum class FMAInstKind { Default, Indexed, Accumulator };
    4072             : /// genFusedMultiply - Generate fused multiply instructions.
    4073             : /// This function supports both integer and floating point instructions.
    4074             : /// A typical example:
    4075             : ///  F|MUL I=A,B,0
    4076             : ///  F|ADD R,I,C
    4077             : ///  ==> F|MADD R,A,B,C
    4078             : /// \param MF Containing MachineFunction
    4079             : /// \param MRI Register information
    4080             : /// \param TII Target information
    4081             : /// \param Root is the F|ADD instruction
    4082             : /// \param [out] InsInstrs is a vector of machine instructions and will
    4083             : /// contain the generated madd instruction
    4084             : /// \param IdxMulOpd is index of operand in Root that is the result of
    4085             : /// the F|MUL. In the example above IdxMulOpd is 1.
    4086             : /// \param MaddOpc the opcode fo the f|madd instruction
    4087             : /// \param RC Register class of operands
    4088             : /// \param kind of fma instruction (addressing mode) to be generated
    4089             : /// \param ReplacedAddend is the result register from the instruction
    4090             : /// replacing the non-combined operand, if any.
    4091             : static MachineInstr *
    4092         290 : genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
    4093             :                  const TargetInstrInfo *TII, MachineInstr &Root,
    4094             :                  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
    4095             :                  unsigned MaddOpc, const TargetRegisterClass *RC,
    4096             :                  FMAInstKind kind = FMAInstKind::Default,
    4097             :                  const unsigned *ReplacedAddend = nullptr) {
    4098             :   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
    4099             : 
    4100         290 :   unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
    4101         580 :   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
    4102         290 :   unsigned ResultReg = Root.getOperand(0).getReg();
    4103         290 :   unsigned SrcReg0 = MUL->getOperand(1).getReg();
    4104             :   bool Src0IsKill = MUL->getOperand(1).isKill();
    4105         290 :   unsigned SrcReg1 = MUL->getOperand(2).getReg();
    4106             :   bool Src1IsKill = MUL->getOperand(2).isKill();
    4107             : 
    4108             :   unsigned SrcReg2;
    4109             :   bool Src2IsKill;
    4110         290 :   if (ReplacedAddend) {
    4111             :     // If we just generated a new addend, we must be it's only use.
    4112          36 :     SrcReg2 = *ReplacedAddend;
    4113             :     Src2IsKill = true;
    4114             :   } else {
    4115         254 :     SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
    4116             :     Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
    4117             :   }
    4118             : 
    4119         290 :   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
    4120         290 :     MRI.constrainRegClass(ResultReg, RC);
    4121         290 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
    4122         290 :     MRI.constrainRegClass(SrcReg0, RC);
    4123         290 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
    4124         290 :     MRI.constrainRegClass(SrcReg1, RC);
    4125         290 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
    4126         290 :     MRI.constrainRegClass(SrcReg2, RC);
    4127             : 
    4128             :   MachineInstrBuilder MIB;
    4129         290 :   if (kind == FMAInstKind::Default)
    4130         618 :     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
    4131         206 :               .addReg(SrcReg0, getKillRegState(Src0IsKill))
    4132         206 :               .addReg(SrcReg1, getKillRegState(Src1IsKill))
    4133         206 :               .addReg(SrcReg2, getKillRegState(Src2IsKill));
    4134          84 :   else if (kind == FMAInstKind::Indexed)
    4135          45 :     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
    4136          15 :               .addReg(SrcReg2, getKillRegState(Src2IsKill))
    4137          15 :               .addReg(SrcReg0, getKillRegState(Src0IsKill))
    4138          15 :               .addReg(SrcReg1, getKillRegState(Src1IsKill))
    4139          15 :               .addImm(MUL->getOperand(3).getImm());
    4140          69 :   else if (kind == FMAInstKind::Accumulator)
    4141         207 :     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
    4142          69 :               .addReg(SrcReg2, getKillRegState(Src2IsKill))
    4143          69 :               .addReg(SrcReg0, getKillRegState(Src0IsKill))
    4144          69 :               .addReg(SrcReg1, getKillRegState(Src1IsKill));
    4145             :   else
    4146             :     assert(false && "Invalid FMA instruction kind \n");
    4147             :   // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
    4148         290 :   InsInstrs.push_back(MIB);
    4149         290 :   return MUL;
    4150             : }
    4151             : 
    4152             : /// genMaddR - Generate madd instruction and combine mul and add using
    4153             : /// an extra virtual register
    4154             : /// Example - an ADD intermediate needs to be stored in a register:
    4155             : ///   MUL I=A,B,0
    4156             : ///   ADD R,I,Imm
    4157             : ///   ==> ORR  V, ZR, Imm
    4158             : ///   ==> MADD R,A,B,V
    4159             : /// \param MF Containing MachineFunction
    4160             : /// \param MRI Register information
    4161             : /// \param TII Target information
    4162             : /// \param Root is the ADD instruction
    4163             : /// \param [out] InsInstrs is a vector of machine instructions and will
    4164             : /// contain the generated madd instruction
    4165             : /// \param IdxMulOpd is index of operand in Root that is the result of
    4166             : /// the MUL. In the example above IdxMulOpd is 1.
    4167             : /// \param MaddOpc the opcode fo the madd instruction
    4168             : /// \param VR is a virtual register that holds the value of an ADD operand
    4169             : /// (V in the example above).
    4170             : /// \param RC Register class of operands
    4171           3 : static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
    4172             :                               const TargetInstrInfo *TII, MachineInstr &Root,
    4173             :                               SmallVectorImpl<MachineInstr *> &InsInstrs,
    4174             :                               unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
    4175             :                               const TargetRegisterClass *RC) {
    4176             :   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
    4177             : 
    4178           6 :   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
    4179           3 :   unsigned ResultReg = Root.getOperand(0).getReg();
    4180           3 :   unsigned SrcReg0 = MUL->getOperand(1).getReg();
    4181             :   bool Src0IsKill = MUL->getOperand(1).isKill();
    4182           3 :   unsigned SrcReg1 = MUL->getOperand(2).getReg();
    4183             :   bool Src1IsKill = MUL->getOperand(2).isKill();
    4184             : 
    4185           3 :   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
    4186           3 :     MRI.constrainRegClass(ResultReg, RC);
    4187           3 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
    4188           3 :     MRI.constrainRegClass(SrcReg0, RC);
    4189           3 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
    4190           3 :     MRI.constrainRegClass(SrcReg1, RC);
    4191           3 :   if (TargetRegisterInfo::isVirtualRegister(VR))
    4192           3 :     MRI.constrainRegClass(VR, RC);
    4193             : 
    4194             :   MachineInstrBuilder MIB =
    4195           6 :       BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
    4196           3 :           .addReg(SrcReg0, getKillRegState(Src0IsKill))
    4197           3 :           .addReg(SrcReg1, getKillRegState(Src1IsKill))
    4198           3 :           .addReg(VR);
    4199             :   // Insert the MADD
    4200           3 :   InsInstrs.push_back(MIB);
    4201           3 :   return MUL;
    4202             : }
    4203             : 
    4204             : /// When getMachineCombinerPatterns() finds potential patterns,
    4205             : /// this function generates the instructions that could replace the
    4206             : /// original code sequence
    4207         476 : void AArch64InstrInfo::genAlternativeCodeSequence(
    4208             :     MachineInstr &Root, MachineCombinerPattern Pattern,
    4209             :     SmallVectorImpl<MachineInstr *> &InsInstrs,
    4210             :     SmallVectorImpl<MachineInstr *> &DelInstrs,
    4211             :     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
    4212         476 :   MachineBasicBlock &MBB = *Root.getParent();
    4213         476 :   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    4214             :   MachineFunction &MF = *MBB.getParent();
    4215         476 :   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
    4216             : 
    4217             :   MachineInstr *MUL;
    4218             :   const TargetRegisterClass *RC;
    4219             :   unsigned Opc;
    4220         476 :   switch (Pattern) {
    4221         182 :   default:
    4222             :     // Reassociate instructions.
    4223         182 :     TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
    4224             :                                                 DelInstrs, InstrIdxForVirtReg);
    4225         182 :     return;
    4226           9 :   case MachineCombinerPattern::MULADDW_OP1:
    4227             :   case MachineCombinerPattern::MULADDX_OP1:
    4228             :     // MUL I=A,B,0
    4229             :     // ADD R,I,C
    4230             :     // ==> MADD R,A,B,C
    4231             :     // --- Create(MADD);
    4232           9 :     if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
    4233             :       Opc = AArch64::MADDWrrr;
    4234             :       RC = &AArch64::GPR32RegClass;
    4235             :     } else {
    4236             :       Opc = AArch64::MADDXrrr;
    4237             :       RC = &AArch64::GPR64RegClass;
    4238             :     }
    4239           9 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    4240           9 :     break;
    4241          42 :   case MachineCombinerPattern::MULADDW_OP2:
    4242             :   case MachineCombinerPattern::MULADDX_OP2:
    4243             :     // MUL I=A,B,0
    4244             :     // ADD R,C,I
    4245             :     // ==> MADD R,A,B,C
    4246             :     // --- Create(MADD);
    4247          42 :     if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
    4248             :       Opc = AArch64::MADDWrrr;
    4249             :       RC = &AArch64::GPR32RegClass;
    4250             :     } else {
    4251             :       Opc = AArch64::MADDXrrr;
    4252             :       RC = &AArch64::GPR64RegClass;
    4253             :     }
    4254          42 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    4255          42 :     break;
    4256           3 :   case MachineCombinerPattern::MULADDWI_OP1:
    4257             :   case MachineCombinerPattern::MULADDXI_OP1: {
    4258             :     // MUL I=A,B,0
    4259             :     // ADD R,I,Imm
    4260             :     // ==> ORR  V, ZR, Imm
    4261             :     // ==> MADD R,A,B,V
    4262             :     // --- Create(MADD);
    4263             :     const TargetRegisterClass *OrrRC;
    4264             :     unsigned BitSize, OrrOpc, ZeroReg;
    4265           3 :     if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
    4266             :       OrrOpc = AArch64::ORRWri;
    4267             :       OrrRC = &AArch64::GPR32spRegClass;
    4268             :       BitSize = 32;
    4269             :       ZeroReg = AArch64::WZR;
    4270             :       Opc = AArch64::MADDWrrr;
    4271             :       RC = &AArch64::GPR32RegClass;
    4272             :     } else {
    4273             :       OrrOpc = AArch64::ORRXri;
    4274             :       OrrRC = &AArch64::GPR64spRegClass;
    4275             :       BitSize = 64;
    4276             :       ZeroReg = AArch64::XZR;
    4277             :       Opc = AArch64::MADDXrrr;
    4278             :       RC = &AArch64::GPR64RegClass;
    4279             :     }
    4280           3 :     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
    4281           3 :     uint64_t Imm = Root.getOperand(2).getImm();
    4282             : 
    4283           3 :     if (Root.getOperand(3).isImm()) {
    4284           3 :       unsigned Val = Root.getOperand(3).getImm();
    4285           3 :       Imm = Imm << Val;
    4286             :     }
    4287           3 :     uint64_t UImm = SignExtend64(Imm, BitSize);
    4288             :     uint64_t Encoding;
    4289           3 :     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
    4290             :       MachineInstrBuilder MIB1 =
    4291           4 :           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
    4292           2 :               .addReg(ZeroReg)
    4293           2 :               .addImm(Encoding);
    4294           2 :       InsInstrs.push_back(MIB1);
    4295           2 :       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4296           2 :       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
    4297             :     }
    4298             :     break;
    4299             :   }
    4300           0 :   case MachineCombinerPattern::MULSUBW_OP1:
    4301             :   case MachineCombinerPattern::MULSUBX_OP1: {
    4302             :     // MUL I=A,B,0
    4303             :     // SUB R,I, C
    4304             :     // ==> SUB  V, 0, C
    4305             :     // ==> MADD R,A,B,V // = -C + A*B
    4306             :     // --- Create(MADD);
    4307             :     const TargetRegisterClass *SubRC;
    4308             :     unsigned SubOpc, ZeroReg;
    4309           0 :     if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
    4310             :       SubOpc = AArch64::SUBWrr;
    4311             :       SubRC = &AArch64::GPR32spRegClass;
    4312             :       ZeroReg = AArch64::WZR;
    4313             :       Opc = AArch64::MADDWrrr;
    4314             :       RC = &AArch64::GPR32RegClass;
    4315             :     } else {
    4316             :       SubOpc = AArch64::SUBXrr;
    4317             :       SubRC = &AArch64::GPR64spRegClass;
    4318             :       ZeroReg = AArch64::XZR;
    4319             :       Opc = AArch64::MADDXrrr;
    4320             :       RC = &AArch64::GPR64RegClass;
    4321             :     }
    4322           0 :     unsigned NewVR = MRI.createVirtualRegister(SubRC);
    4323             :     // SUB NewVR, 0, C
    4324             :     MachineInstrBuilder MIB1 =
    4325           0 :         BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
    4326           0 :             .addReg(ZeroReg)
    4327           0 :             .add(Root.getOperand(2));
    4328           0 :     InsInstrs.push_back(MIB1);
    4329           0 :     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4330           0 :     MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
    4331             :     break;
    4332             :   }
    4333         147 :   case MachineCombinerPattern::MULSUBW_OP2:
    4334             :   case MachineCombinerPattern::MULSUBX_OP2:
    4335             :     // MUL I=A,B,0
    4336             :     // SUB R,C,I
    4337             :     // ==> MSUB R,A,B,C (computes C - A*B)
    4338             :     // --- Create(MSUB);
    4339         147 :     if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
    4340             :       Opc = AArch64::MSUBWrrr;
    4341             :       RC = &AArch64::GPR32RegClass;
    4342             :     } else {
    4343             :       Opc = AArch64::MSUBXrrr;
    4344             :       RC = &AArch64::GPR64RegClass;
    4345             :     }
    4346         147 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    4347         147 :     break;
    4348           1 :   case MachineCombinerPattern::MULSUBWI_OP1:
    4349             :   case MachineCombinerPattern::MULSUBXI_OP1: {
    4350             :     // MUL I=A,B,0
    4351             :     // SUB R,I, Imm
    4352             :     // ==> ORR  V, ZR, -Imm
    4353             :     // ==> MADD R,A,B,V // = -Imm + A*B
    4354             :     // --- Create(MADD);
    4355             :     const TargetRegisterClass *OrrRC;
    4356             :     unsigned BitSize, OrrOpc, ZeroReg;
    4357           1 :     if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
    4358             :       OrrOpc = AArch64::ORRWri;
    4359             :       OrrRC = &AArch64::GPR32spRegClass;
    4360             :       BitSize = 32;
    4361             :       ZeroReg = AArch64::WZR;
    4362             :       Opc = AArch64::MADDWrrr;
    4363             :       RC = &AArch64::GPR32RegClass;
    4364             :     } else {
    4365             :       OrrOpc = AArch64::ORRXri;
    4366             :       OrrRC = &AArch64::GPR64spRegClass;
    4367             :       BitSize = 64;
    4368             :       ZeroReg = AArch64::XZR;
    4369             :       Opc = AArch64::MADDXrrr;
    4370             :       RC = &AArch64::GPR64RegClass;
    4371             :     }
    4372           1 :     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
    4373           1 :     uint64_t Imm = Root.getOperand(2).getImm();
    4374           1 :     if (Root.getOperand(3).isImm()) {
    4375           1 :       unsigned Val = Root.getOperand(3).getImm();
    4376           1 :       Imm = Imm << Val;
    4377             :     }
    4378           1 :     uint64_t UImm = SignExtend64(-Imm, BitSize);
    4379             :     uint64_t Encoding;
    4380           1 :     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
    4381             :       MachineInstrBuilder MIB1 =
    4382           2 :           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
    4383           1 :               .addReg(ZeroReg)
    4384           1 :               .addImm(Encoding);
    4385           1 :       InsInstrs.push_back(MIB1);
    4386           1 :       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4387           1 :       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
    4388             :     }
    4389             :     break;
    4390             :   }
    4391             :   // Floating Point Support
    4392           2 :   case MachineCombinerPattern::FMULADDS_OP1:
    4393             :   case MachineCombinerPattern::FMULADDD_OP1:
    4394             :     // MUL I=A,B,0
    4395             :     // ADD R,I,C
    4396             :     // ==> MADD R,A,B,C
    4397             :     // --- Create(MADD);
    4398           2 :     if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
    4399             :       Opc = AArch64::FMADDSrrr;
    4400             :       RC = &AArch64::FPR32RegClass;
    4401             :     } else {
    4402             :       Opc = AArch64::FMADDDrrr;
    4403             :       RC = &AArch64::FPR64RegClass;
    4404             :     }
    4405           2 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    4406           2 :     break;
    4407           2 :   case MachineCombinerPattern::FMULADDS_OP2:
    4408             :   case MachineCombinerPattern::FMULADDD_OP2:
    4409             :     // FMUL I=A,B,0
    4410             :     // FADD R,C,I
    4411             :     // ==> FMADD R,A,B,C
    4412             :     // --- Create(FMADD);
    4413           2 :     if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
    4414             :       Opc = AArch64::FMADDSrrr;
    4415             :       RC = &AArch64::FPR32RegClass;
    4416             :     } else {
    4417             :       Opc = AArch64::FMADDDrrr;
    4418             :       RC = &AArch64::FPR64RegClass;
    4419             :     }
    4420           2 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    4421           2 :     break;
    4422             : 
    4423           1 :   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
    4424             :     Opc = AArch64::FMLAv1i32_indexed;
    4425             :     RC = &AArch64::FPR32RegClass;
    4426           1 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4427             :                            FMAInstKind::Indexed);
    4428           1 :     break;
    4429           0 :   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
    4430             :     Opc = AArch64::FMLAv1i32_indexed;
    4431             :     RC = &AArch64::FPR32RegClass;
    4432           0 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4433             :                            FMAInstKind::Indexed);
    4434           0 :     break;
    4435             : 
    4436           1 :   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
    4437             :     Opc = AArch64::FMLAv1i64_indexed;
    4438             :     RC = &AArch64::FPR64RegClass;
    4439           1 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4440             :                            FMAInstKind::Indexed);
    4441           1 :     break;
    4442           0 :   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
    4443             :     Opc = AArch64::FMLAv1i64_indexed;
    4444             :     RC = &AArch64::FPR64RegClass;
    4445           0 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4446             :                            FMAInstKind::Indexed);
    4447           0 :     break;
    4448             : 
    4449           2 :   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
    4450             :   case MachineCombinerPattern::FMLAv2f32_OP1:
    4451             :     RC = &AArch64::FPR64RegClass;
    4452           2 :     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
    4453             :       Opc = AArch64::FMLAv2i32_indexed;
    4454           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4455             :                              FMAInstKind::Indexed);
    4456             :     } else {
    4457             :       Opc = AArch64::FMLAv2f32;
    4458           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4459             :                              FMAInstKind::Accumulator);
    4460             :     }
    4461             :     break;
    4462           0 :   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
    4463             :   case MachineCombinerPattern::FMLAv2f32_OP2:
    4464             :     RC = &AArch64::FPR64RegClass;
    4465           0 :     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
    4466             :       Opc = AArch64::FMLAv2i32_indexed;
    4467           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4468             :                              FMAInstKind::Indexed);
    4469             :     } else {
    4470             :       Opc = AArch64::FMLAv2f32;
    4471           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4472             :                              FMAInstKind::Accumulator);
    4473             :     }
    4474             :     break;
    4475             : 
    4476           2 :   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
    4477             :   case MachineCombinerPattern::FMLAv2f64_OP1:
    4478             :     RC = &AArch64::FPR128RegClass;
    4479           2 :     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
    4480             :       Opc = AArch64::FMLAv2i64_indexed;
    4481           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4482             :                              FMAInstKind::Indexed);
    4483             :     } else {
    4484             :       Opc = AArch64::FMLAv2f64;
    4485           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4486             :                              FMAInstKind::Accumulator);
    4487             :     }
    4488             :     break;
    4489           0 :   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
    4490             :   case MachineCombinerPattern::FMLAv2f64_OP2:
    4491             :     RC = &AArch64::FPR128RegClass;
    4492           0 :     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
    4493             :       Opc = AArch64::FMLAv2i64_indexed;
    4494           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4495             :                              FMAInstKind::Indexed);
    4496             :     } else {
    4497             :       Opc = AArch64::FMLAv2f64;
    4498           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4499             :                              FMAInstKind::Accumulator);
    4500             :     }
    4501             :     break;
    4502             : 
    4503           2 :   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
    4504             :   case MachineCombinerPattern::FMLAv4f32_OP1:
    4505             :     RC = &AArch64::FPR128RegClass;
    4506           2 :     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
    4507             :       Opc = AArch64::FMLAv4i32_indexed;
    4508           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4509             :                              FMAInstKind::Indexed);
    4510             :     } else {
    4511             :       Opc = AArch64::FMLAv4f32;
    4512           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4513             :                              FMAInstKind::Accumulator);
    4514             :     }
    4515             :     break;
    4516             : 
    4517           0 :   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
    4518             :   case MachineCombinerPattern::FMLAv4f32_OP2:
    4519             :     RC = &AArch64::FPR128RegClass;
    4520           0 :     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
    4521             :       Opc = AArch64::FMLAv4i32_indexed;
    4522           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4523             :                              FMAInstKind::Indexed);
    4524             :     } else {
    4525             :       Opc = AArch64::FMLAv4f32;
    4526           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4527             :                              FMAInstKind::Accumulator);
    4528             :     }
    4529             :     break;
    4530             : 
    4531           0 :   case MachineCombinerPattern::FMULSUBS_OP1:
    4532             :   case MachineCombinerPattern::FMULSUBD_OP1: {
    4533             :     // FMUL I=A,B,0
    4534             :     // FSUB R,I,C
    4535             :     // ==> FNMSUB R,A,B,C // = -C + A*B
    4536             :     // --- Create(FNMSUB);
    4537           0 :     if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
    4538             :       Opc = AArch64::FNMSUBSrrr;
    4539             :       RC = &AArch64::FPR32RegClass;
    4540             :     } else {
    4541             :       Opc = AArch64::FNMSUBDrrr;
    4542             :       RC = &AArch64::FPR64RegClass;
    4543             :     }
    4544           0 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    4545           0 :     break;
    4546             :   }
    4547             : 
    4548           4 :   case MachineCombinerPattern::FNMULSUBS_OP1:
    4549             :   case MachineCombinerPattern::FNMULSUBD_OP1: {
    4550             :     // FNMUL I=A,B,0
    4551             :     // FSUB R,I,C
    4552             :     // ==> FNMADD R,A,B,C // = -A*B - C
    4553             :     // --- Create(FNMADD);
    4554           4 :     if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
    4555             :       Opc = AArch64::FNMADDSrrr;
    4556             :       RC = &AArch64::FPR32RegClass;
    4557             :     } else {
    4558             :       Opc = AArch64::FNMADDDrrr;
    4559             :       RC = &AArch64::FPR64RegClass;
    4560             :     }
    4561           4 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    4562           4 :     break;
    4563             :   }
    4564             : 
    4565           0 :   case MachineCombinerPattern::FMULSUBS_OP2:
    4566             :   case MachineCombinerPattern::FMULSUBD_OP2: {
    4567             :     // FMUL I=A,B,0
    4568             :     // FSUB R,C,I
    4569             :     // ==> FMSUB R,A,B,C (computes C - A*B)
    4570             :     // --- Create(FMSUB);
    4571           0 :     if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
    4572             :       Opc = AArch64::FMSUBSrrr;
    4573             :       RC = &AArch64::FPR32RegClass;
    4574             :     } else {
    4575             :       Opc = AArch64::FMSUBDrrr;
    4576             :       RC = &AArch64::FPR64RegClass;
    4577             :     }
    4578           0 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    4579           0 :     break;
    4580             :   }
    4581             : 
    4582           2 :   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
    4583             :     Opc = AArch64::FMLSv1i32_indexed;
    4584             :     RC = &AArch64::FPR32RegClass;
    4585           2 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4586             :                            FMAInstKind::Indexed);
    4587           2 :     break;
    4588             : 
    4589           2 :   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
    4590             :     Opc = AArch64::FMLSv1i64_indexed;
    4591             :     RC = &AArch64::FPR64RegClass;
    4592           2 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4593             :                            FMAInstKind::Indexed);
    4594           2 :     break;
    4595             : 
    4596          12 :   case MachineCombinerPattern::FMLSv2f32_OP2:
    4597             :   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
    4598             :     RC = &AArch64::FPR64RegClass;
    4599          12 :     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
    4600             :       Opc = AArch64::FMLSv2i32_indexed;
    4601           2 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4602             :                              FMAInstKind::Indexed);
    4603             :     } else {
    4604             :       Opc = AArch64::FMLSv2f32;
    4605          10 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4606             :                              FMAInstKind::Accumulator);
    4607             :     }
    4608             :     break;
    4609             : 
    4610          12 :   case MachineCombinerPattern::FMLSv2f64_OP2:
    4611             :   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
    4612             :     RC = &AArch64::FPR128RegClass;
    4613          12 :     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
    4614             :       Opc = AArch64::FMLSv2i64_indexed;
    4615           2 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4616             :                              FMAInstKind::Indexed);
    4617             :     } else {
    4618             :       Opc = AArch64::FMLSv2f64;
    4619          10 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4620             :                              FMAInstKind::Accumulator);
    4621             :     }
    4622             :     break;
    4623             : 
    4624          12 :   case MachineCombinerPattern::FMLSv4f32_OP2:
    4625             :   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
    4626             :     RC = &AArch64::FPR128RegClass;
    4627          12 :     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
    4628             :       Opc = AArch64::FMLSv4i32_indexed;
    4629           2 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4630             :                              FMAInstKind::Indexed);
    4631             :     } else {
    4632             :       Opc = AArch64::FMLSv4f32;
    4633          10 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4634             :                              FMAInstKind::Accumulator);
    4635             :     }
    4636             :     break;
    4637          12 :   case MachineCombinerPattern::FMLSv2f32_OP1:
    4638             :   case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
    4639             :     RC = &AArch64::FPR64RegClass;
    4640          12 :     unsigned NewVR = MRI.createVirtualRegister(RC);
    4641             :     MachineInstrBuilder MIB1 =
    4642          24 :         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
    4643          12 :             .add(Root.getOperand(2));
    4644          12 :     InsInstrs.push_back(MIB1);
    4645          12 :     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4646          12 :     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
    4647             :       Opc = AArch64::FMLAv2i32_indexed;
    4648           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4649             :                              FMAInstKind::Indexed, &NewVR);
    4650             :     } else {
    4651             :       Opc = AArch64::FMLAv2f32;
    4652          12 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4653             :                              FMAInstKind::Accumulator, &NewVR);
    4654             :     }
    4655             :     break;
    4656             :   }
    4657          12 :   case MachineCombinerPattern::FMLSv4f32_OP1:
    4658             :   case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
    4659             :     RC = &AArch64::FPR128RegClass;
    4660          12 :     unsigned NewVR = MRI.createVirtualRegister(RC);
    4661             :     MachineInstrBuilder MIB1 =
    4662          24 :         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
    4663          12 :             .add(Root.getOperand(2));
    4664          12 :     InsInstrs.push_back(MIB1);
    4665          12 :     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4666          12 :     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
    4667             :       Opc = AArch64::FMLAv4i32_indexed;
    4668           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4669             :                              FMAInstKind::Indexed, &NewVR);
    4670             :     } else {
    4671             :       Opc = AArch64::FMLAv4f32;
    4672          12 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4673             :                              FMAInstKind::Accumulator, &NewVR);
    4674             :     }
    4675             :     break;
    4676             :   }
    4677          12 :   case MachineCombinerPattern::FMLSv2f64_OP1:
    4678             :   case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
    4679             :     RC = &AArch64::FPR128RegClass;
    4680          12 :     unsigned NewVR = MRI.createVirtualRegister(RC);
    4681             :     MachineInstrBuilder MIB1 =
    4682          24 :         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
    4683          12 :             .add(Root.getOperand(2));
    4684          12 :     InsInstrs.push_back(MIB1);
    4685          12 :     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4686          12 :     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
    4687             :       Opc = AArch64::FMLAv2i64_indexed;
    4688           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4689             :                              FMAInstKind::Indexed, &NewVR);
    4690             :     } else {
    4691             :       Opc = AArch64::FMLAv2f64;
    4692          12 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4693             :                              FMAInstKind::Accumulator, &NewVR);
    4694             :     }
    4695             :     break;
    4696             :   }
    4697             :   } // end switch (Pattern)
    4698             :   // Record MUL and ADD/SUB for deletion
    4699         294 :   DelInstrs.push_back(MUL);
    4700         294 :   DelInstrs.push_back(&Root);
    4701             : }
    4702             : 
    4703             : /// Replace csincr-branch sequence by simple conditional branch
    4704             : ///
    4705             : /// Examples:
    4706             : /// 1. \code
    4707             : ///   csinc  w9, wzr, wzr, <condition code>
    4708             : ///   tbnz   w9, #0, 0x44
    4709             : ///    \endcode
    4710             : /// to
    4711             : ///    \code
    4712             : ///   b.<inverted condition code>
    4713             : ///    \endcode
    4714             : ///
    4715             : /// 2. \code
    4716             : ///   csinc w9, wzr, wzr, <condition code>
    4717             : ///   tbz   w9, #0, 0x44
    4718             : ///    \endcode
    4719             : /// to
    4720             : ///    \code
    4721             : ///   b.<condition code>
    4722             : ///    \endcode
    4723             : ///
    4724             : /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
    4725             : /// compare's constant operand is power of 2.
    4726             : ///
    4727             : /// Examples:
    4728             : ///    \code
    4729             : ///   and  w8, w8, #0x400
    4730             : ///   cbnz w8, L1
    4731             : ///    \endcode
    4732             : /// to
    4733             : ///    \code
    4734             : ///   tbnz w8, #10, L1
    4735             : ///    \endcode
    4736             : ///
    4737             : /// \param  MI Conditional Branch
    4738             : /// \return True when the simple conditional branch is generated
    4739             : ///
    4740         973 : bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
    4741             :   bool IsNegativeBranch = false;
    4742             :   bool IsTestAndBranch = false;
    4743             :   unsigned TargetBBInMI = 0;
    4744        1946 :   switch (MI.getOpcode()) {
    4745           0 :   default:
    4746           0 :     llvm_unreachable("Unknown branch instruction?");
    4747             :   case AArch64::Bcc:
    4748             :     return false;
    4749             :   case AArch64::CBZW:
    4750             :   case AArch64::CBZX:
    4751             :     TargetBBInMI = 1;
    4752             :     break;
    4753         183 :   case AArch64::CBNZW:
    4754             :   case AArch64::CBNZX:
    4755             :     TargetBBInMI = 1;
    4756             :     IsNegativeBranch = true;
    4757         183 :     break;
    4758          80 :   case AArch64::TBZW:
    4759             :   case AArch64::TBZX:
    4760             :     TargetBBInMI = 2;
    4761             :     IsTestAndBranch = true;
    4762          80 :     break;
    4763          31 :   case AArch64::TBNZW:
    4764             :   case AArch64::TBNZX:
    4765             :     TargetBBInMI = 2;
    4766             :     IsNegativeBranch = true;
    4767             :     IsTestAndBranch = true;
    4768          31 :     break;
    4769             :   }
    4770             :   // So we increment a zero register and test for bits other
    4771             :   // than bit 0? Conservatively bail out in case the verifier
    4772             :   // missed this case.
    4773         389 :   if (IsTestAndBranch && MI.getOperand(1).getImm())
    4774             :     return false;
    4775             : 
    4776             :   // Find Definition.
    4777             :   assert(MI.getParent() && "Incomplete machine instruciton\n");
    4778         350 :   MachineBasicBlock *MBB = MI.getParent();
    4779         350 :   MachineFunction *MF = MBB->getParent();
    4780         350 :   MachineRegisterInfo *MRI = &MF->getRegInfo();
    4781         350 :   unsigned VReg = MI.getOperand(0).getReg();
    4782         350 :   if (!TargetRegisterInfo::isVirtualRegister(VReg))
    4783             :     return false;
    4784             : 
    4785         350 :   MachineInstr *DefMI = MRI->getVRegDef(VReg);
    4786             : 
    4787             :   // Look through COPY instructions to find definition.
    4788         396 :   while (DefMI->isCopy()) {
    4789         201 :     unsigned CopyVReg = DefMI->getOperand(1).getReg();
    4790         201 :     if (!MRI->hasOneNonDBGUse(CopyVReg))
    4791             :       return false;
    4792         145 :     if (!MRI->hasOneDef(CopyVReg))
    4793             :       return false;
    4794          46 :     DefMI = MRI->getVRegDef(CopyVReg);
    4795             :   }
    4796             : 
    4797         195 :   switch (DefMI->getOpcode()) {
    4798             :   default:
    4799             :     return false;
    4800             :   // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
    4801           7 :   case AArch64::ANDWri:
    4802             :   case AArch64::ANDXri: {
    4803           7 :     if (IsTestAndBranch)
    4804             :       return false;
    4805           7 :     if (DefMI->getParent() != MBB)
    4806             :       return false;
    4807           6 :     if (!MRI->hasOneNonDBGUse(VReg))
    4808             :       return false;
    4809             : 
    4810           5 :     bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
    4811           5 :     uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
    4812           5 :         DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
    4813             :     if (!isPowerOf2_64(Mask))
    4814             :       return false;
    4815             : 
    4816             :     MachineOperand &MO = DefMI->getOperand(1);
    4817           5 :     unsigned NewReg = MO.getReg();
    4818           5 :     if (!TargetRegisterInfo::isVirtualRegister(NewReg))
    4819             :       return false;
    4820             : 
    4821             :     assert(!MRI->def_empty(NewReg) && "Register must be defined.");
    4822             : 
    4823             :     MachineBasicBlock &RefToMBB = *MBB;
    4824           5 :     MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
    4825             :     DebugLoc DL = MI.getDebugLoc();
    4826             :     unsigned Imm = Log2_64(Mask);
    4827             :     unsigned Opc = (Imm < 32)
    4828           5 :                        ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
    4829             :                        : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
    4830          10 :     MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
    4831           5 :                               .addReg(NewReg)
    4832           5 :                               .addImm(Imm)
    4833           5 :                               .addMBB(TBB);
    4834             :     // Register lives on to the CBZ now.
    4835             :     MO.setIsKill(false);
    4836             : 
    4837             :     // For immediate smaller than 32, we need to use the 32-bit
    4838             :     // variant (W) in all cases. Indeed the 64-bit variant does not
    4839             :     // allow to encode them.
    4840             :     // Therefore, if the input register is 64-bit, we need to take the
    4841             :     // 32-bit sub-part.
    4842           5 :     if (!Is32Bit && Imm < 32)
    4843           3 :       NewMI->getOperand(0).setSubReg(AArch64::sub_32);
    4844           5 :     MI.eraseFromParent();
    4845             :     return true;
    4846             :   }
    4847             :   // Look for CSINC
    4848           1 :   case AArch64::CSINCWr:
    4849             :   case AArch64::CSINCXr: {
    4850           2 :     if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
    4851           1 :           DefMI->getOperand(2).getReg() == AArch64::WZR) &&
    4852           0 :         !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
    4853           0 :           DefMI->getOperand(2).getReg() == AArch64::XZR))
    4854             :       return false;
    4855             : 
    4856           1 :     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
    4857             :       return false;
    4858             : 
    4859           1 :     AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
    4860             :     // Convert only when the condition code is not modified between
    4861             :     // the CSINC and the branch. The CC may be used by other
    4862             :     // instructions in between.
    4863           1 :     if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
    4864             :       return false;
    4865             :     MachineBasicBlock &RefToMBB = *MBB;
    4866           2 :     MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
    4867             :     DebugLoc DL = MI.getDebugLoc();
    4868           1 :     if (IsNegativeBranch)
    4869             :       CC = AArch64CC::getInvertedCondCode(CC);
    4870           2 :     BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
    4871           1 :     MI.eraseFromParent();
    4872             :     return true;
    4873             :   }
    4874             :   }
    4875             : }
    4876             : 
    4877             : std::pair<unsigned, unsigned>
    4878        2514 : AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
    4879             :   const unsigned Mask = AArch64II::MO_FRAGMENT;
    4880        2514 :   return std::make_pair(TF & Mask, TF & ~Mask);
    4881             : }
    4882             : 
    4883             : ArrayRef<std::pair<unsigned, const char *>>
    4884        2525 : AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
    4885             :   using namespace AArch64II;
    4886             : 
    4887             :   static const std::pair<unsigned, const char *> TargetFlags[] = {
    4888             :       {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
    4889             :       {MO_G3, "aarch64-g3"},     {MO_G2, "aarch64-g2"},
    4890             :       {MO_G1, "aarch64-g1"},     {MO_G0, "aarch64-g0"},
    4891             :       {MO_HI12, "aarch64-hi12"}};
    4892        2525 :   return makeArrayRef(TargetFlags);
    4893             : }
    4894             : 
    4895             : ArrayRef<std::pair<unsigned, const char *>>
    4896        1277 : AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
    4897             :   using namespace AArch64II;
    4898             : 
    4899             :   static const std::pair<unsigned, const char *> TargetFlags[] = {
    4900             :       {MO_COFFSTUB, "aarch64-coffstub"},
    4901             :       {MO_GOT, "aarch64-got"},   {MO_NC, "aarch64-nc"},
    4902             :       {MO_TLS, "aarch64-tls"},   {MO_DLLIMPORT, "aarch64-dllimport"}};
    4903        1277 :   return makeArrayRef(TargetFlags);
    4904             : }
    4905             : 
    4906             : ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
    4907          42 : AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
    4908             :   static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
    4909             :       {{MOSuppressPair, "aarch64-suppress-pair"},
    4910             :        {MOStridedAccess, "aarch64-strided-access"}};
    4911          42 :   return makeArrayRef(TargetFlags);
    4912             : }
    4913             : 
    4914             : /// Constants defining how certain sequences should be outlined.
    4915             : /// This encompasses how an outlined function should be called, and what kind of
    4916             : /// frame should be emitted for that outlined function.
    4917             : ///
    4918             : /// \p MachineOutlinerDefault implies that the function should be called with
    4919             : /// a save and restore of LR to the stack.
    4920             : ///
    4921             : /// That is,
    4922             : ///
    4923             : /// I1     Save LR                    OUTLINED_FUNCTION:
    4924             : /// I2 --> BL OUTLINED_FUNCTION       I1
    4925             : /// I3     Restore LR                 I2
    4926             : ///                                   I3
    4927             : ///                                   RET
    4928             : ///
    4929             : /// * Call construction overhead: 3 (save + BL + restore)
    4930             : /// * Frame construction overhead: 1 (ret)
    4931             : /// * Requires stack fixups? Yes
    4932             : ///
    4933             : /// \p MachineOutlinerTailCall implies that the function is being created from
    4934             : /// a sequence of instructions ending in a return.
    4935             : ///
    4936             : /// That is,
    4937             : ///
    4938             : /// I1                             OUTLINED_FUNCTION:
    4939             : /// I2 --> B OUTLINED_FUNCTION     I1
    4940             : /// RET                            I2
    4941             : ///                                RET
    4942             : ///
    4943             : /// * Call construction overhead: 1 (B)
    4944             : /// * Frame construction overhead: 0 (Return included in sequence)
    4945             : /// * Requires stack fixups? No
    4946             : ///
    4947             : /// \p MachineOutlinerNoLRSave implies that the function should be called using
    4948             : /// a BL instruction, but doesn't require LR to be saved and restored. This
    4949             : /// happens when LR is known to be dead.
    4950             : ///
    4951             : /// That is,
    4952             : ///
    4953             : /// I1                                OUTLINED_FUNCTION:
    4954             : /// I2 --> BL OUTLINED_FUNCTION       I1
    4955             : /// I3                                I2
    4956             : ///                                   I3
    4957             : ///                                   RET
    4958             : ///
    4959             : /// * Call construction overhead: 1 (BL)
    4960             : /// * Frame construction overhead: 1 (RET)
    4961             : /// * Requires stack fixups? No
    4962             : ///
    4963             : /// \p MachineOutlinerThunk implies that the function is being created from
    4964             : /// a sequence of instructions ending in a call. The outlined function is
    4965             : /// called with a BL instruction, and the outlined function tail-calls the
    4966             : /// original call destination.
    4967             : ///
    4968             : /// That is,
    4969             : ///
    4970             : /// I1                                OUTLINED_FUNCTION:
    4971             : /// I2 --> BL OUTLINED_FUNCTION       I1
    4972             : /// BL f                              I2
    4973             : ///                                   B f
    4974             : /// * Call construction overhead: 1 (BL)
    4975             : /// * Frame construction overhead: 0
    4976             : /// * Requires stack fixups? No
    4977             : ///
    4978             : /// \p MachineOutlinerRegSave implies that the function should be called with a
    4979             : /// save and restore of LR to an available register. This allows us to avoid
    4980             : /// stack fixups. Note that this outlining variant is compatible with the
    4981             : /// NoLRSave case.
    4982             : ///
    4983             : /// That is,
    4984             : ///
    4985             : /// I1     Save LR                    OUTLINED_FUNCTION:
    4986             : /// I2 --> BL OUTLINED_FUNCTION       I1
    4987             : /// I3     Restore LR                 I2
    4988             : ///                                   I3
    4989             : ///                                   RET
    4990             : ///
    4991             : /// * Call construction overhead: 3 (save + BL + restore)
    4992             : /// * Frame construction overhead: 1 (ret)
    4993             : /// * Requires stack fixups? No
    4994             : enum MachineOutlinerClass {
    4995             :   MachineOutlinerDefault,  /// Emit a save, restore, call, and return.
    4996             :   MachineOutlinerTailCall, /// Only emit a branch.
    4997             :   MachineOutlinerNoLRSave, /// Emit a call and return.
    4998             :   MachineOutlinerThunk,    /// Emit a call and tail-call.
    4999             :   MachineOutlinerRegSave   /// Same as default, but save to a register.
    5000             : };
    5001             : 
    5002             : enum MachineOutlinerMBBFlags {
    5003             :   LRUnavailableSomewhere = 0x2,
    5004             :   HasCalls = 0x4
    5005             : };
    5006             : 
    5007             : unsigned
    5008         254 : AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
    5009         254 :   MachineFunction *MF = C.getMF();
    5010             :   const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
    5011         254 :       MF->getSubtarget().getRegisterInfo());
    5012             : 
    5013             :   // Check if there is an available register across the sequence that we can
    5014             :   // use.
    5015         448 :   for (unsigned Reg : AArch64::GPR64RegClass) {
    5016         443 :     if (!ARI->isReservedReg(*MF, Reg) &&
    5017         856 :         Reg != AArch64::LR &&  // LR is not reserved, but don't use it.
    5018         428 :         Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
    5019         413 :         Reg != AArch64::X17 && // Ditto for X17.
    5020        1115 :         C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
    5021         249 :       return Reg;
    5022             :   }
    5023             : 
    5024             :   // No suitable register. Return 0.
    5025             :   return 0u;
    5026             : }
    5027             : 
    5028             : outliner::OutlinedFunction
    5029         182 : AArch64InstrInfo::getOutliningCandidateInfo(
    5030             :     std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
    5031         182 :   unsigned SequenceSize = std::accumulate(
    5032         182 :       RepeatedSequenceLocs[0].front(),
    5033         182 :       std::next(RepeatedSequenceLocs[0].back()),
    5034             :       0, [this](unsigned Sum, const MachineInstr &MI) {
    5035         880 :         return Sum + getInstSizeInBytes(MI);
    5036         182 :       });
    5037             : 
    5038             :   // Compute liveness information for each candidate.
    5039         182 :   const TargetRegisterInfo &TRI = getRegisterInfo();
    5040             :   std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
    5041         451 :                 [&TRI](outliner::Candidate &C) { C.initLRU(TRI); });
    5042             : 
    5043             :   // According to the AArch64 Procedure Call Standard, the following are
    5044             :   // undefined on entry/exit from a function call:
    5045             :   //
    5046             :   // * Registers x16, x17, (and thus w16, w17)
    5047             :   // * Condition codes (and thus the NZCV register)
    5048             :   //
    5049             :   // Because if this, we can't outline any sequence of instructions where
    5050             :   // one
    5051             :   // of these registers is live into/across it. Thus, we need to delete
    5052             :   // those
    5053             :   // candidates.
    5054             :   auto CantGuaranteeValueAcrossCall = [](outliner::Candidate &C) {
    5055             :     LiveRegUnits LRU = C.LRU;
    5056             :     return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
    5057             :             !LRU.available(AArch64::NZCV));
    5058             :   };
    5059             : 
    5060             :   // Erase every candidate that violates the restrictions above. (It could be
    5061             :   // true that we have viable candidates, so it's not worth bailing out in
    5062             :   // the case that, say, 1 out of 20 candidates violate the restructions.)
    5063             :   RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
    5064             :                                             RepeatedSequenceLocs.end(),
    5065         182 :                                             CantGuaranteeValueAcrossCall),
    5066             :                              RepeatedSequenceLocs.end());
    5067             : 
    5068             :   // If the sequence is empty, we're done.
    5069         182 :   if (RepeatedSequenceLocs.empty())
    5070             :     return outliner::OutlinedFunction();
    5071             : 
    5072             :   // At this point, we have only "safe" candidates to outline. Figure out
    5073             :   // frame + call instruction information.
    5074             : 
    5075         177 :   unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
    5076             : 
    5077             :   // Helper lambda which sets call information for every candidate.
    5078             :   auto SetCandidateCallInfo =
    5079             :       [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
    5080         606 :         for (outliner::Candidate &C : RepeatedSequenceLocs)
    5081             :           C.setCallInfo(CallID, NumBytesForCall);
    5082             :       };
    5083             : 
    5084             :   unsigned FrameID = MachineOutlinerDefault;
    5085             :   unsigned NumBytesToCreateFrame = 4;
    5086             : 
    5087             :   bool HasBTI =
    5088             :       std::any_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
    5089             :                   [](outliner::Candidate &C) {
    5090           0 :                     return C.getMF()->getFunction().hasFnAttribute(
    5091             :                         "branch-target-enforcement");
    5092             :                   });
    5093             : 
    5094             :   // If the last instruction in any candidate is a terminator, then we should
    5095             :   // tail call all of the candidates.
    5096         177 :   if (RepeatedSequenceLocs[0].back()->isTerminator()) {
    5097             :     FrameID = MachineOutlinerTailCall;
    5098             :     NumBytesToCreateFrame = 0;
    5099             :     SetCandidateCallInfo(MachineOutlinerTailCall, 4);
    5100             :   }
    5101             : 
    5102         142 :   else if (LastInstrOpcode == AArch64::BL ||
    5103         125 :            (LastInstrOpcode == AArch64::BLR && !HasBTI)) {
    5104             :     // FIXME: Do we need to check if the code after this uses the value of LR?
    5105             :     FrameID = MachineOutlinerThunk;
    5106             :     NumBytesToCreateFrame = 0;
    5107             :     SetCandidateCallInfo(MachineOutlinerThunk, 4);
    5108             :   }
    5109             : 
    5110             :   // Make sure that LR isn't live on entry to this candidate. The only
    5111             :   // instructions that use LR that could possibly appear in a repeated sequence
    5112             :   // are calls. Therefore, we only have to check and see if LR is dead on entry
    5113             :   // to (or exit from) some candidate.
    5114         120 :   else if (std::all_of(RepeatedSequenceLocs.begin(),
    5115             :                        RepeatedSequenceLocs.end(),
    5116             :                        [](outliner::Candidate &C) {
    5117           0 :                          return C.LRU.available(AArch64::LR);
    5118             :                          })) {
    5119             :     FrameID = MachineOutlinerNoLRSave;
    5120             :     NumBytesToCreateFrame = 4;
    5121             :     SetCandidateCallInfo(MachineOutlinerNoLRSave, 4);
    5122             :   }
    5123             : 
    5124             :   // LR is live, so we need to save it. Decide whether it should be saved to
    5125             :   // the stack, or if it can be saved to a register.
    5126             :   else {
    5127         103 :     if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
    5128             :                     [this](outliner::Candidate &C) {
    5129           0 :                       return findRegisterToSaveLRTo(C);
    5130             :                     })) {
    5131             :       // Every candidate has an available callee-saved register for the save.
    5132             :       // We can save LR to a register.
    5133             :       FrameID = MachineOutlinerRegSave;
    5134             :       NumBytesToCreateFrame = 4;
    5135             :       SetCandidateCallInfo(MachineOutlinerRegSave, 12);
    5136             :     }
    5137             : 
    5138             :     else {
    5139             :       // At least one candidate does not have an available callee-saved
    5140             :       // register. We must save LR to the stack.
    5141             :       FrameID = MachineOutlinerDefault;
    5142             :       NumBytesToCreateFrame = 4;
    5143             :       SetCandidateCallInfo(MachineOutlinerDefault, 12);
    5144             :     }
    5145             :   }
    5146             : 
    5147             :   // Check if the range contains a call. These require a save + restore of the
    5148             :   // link register.
    5149         177 :   if (std::any_of(RepeatedSequenceLocs[0].front(),
    5150         177 :                   RepeatedSequenceLocs[0].back(),
    5151             :                   [](const MachineInstr &MI) { return MI.isCall(); }))
    5152           7 :     NumBytesToCreateFrame += 8; // Save + restore the link register.
    5153             : 
    5154             :   // Handle the last instruction separately. If this is a tail call, then the
    5155             :   // last instruction is a call. We don't want to save + restore in this case.
    5156             :   // However, it could be possible that the last instruction is a call without
    5157             :   // it being valid to tail call this sequence. We should consider this as well.
    5158         340 :   else if (FrameID != MachineOutlinerThunk &&
    5159         283 :            FrameID != MachineOutlinerTailCall &&
    5160         113 :            RepeatedSequenceLocs[0].back()->isCall())
    5161           1 :     NumBytesToCreateFrame += 8;
    5162             : 
    5163             :   return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
    5164         177 :                                     NumBytesToCreateFrame, FrameID);
    5165             : }
    5166             : 
    5167         117 : bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
    5168             :     MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
    5169         117 :   const Function &F = MF.getFunction();
    5170             : 
    5171             :   // Can F be deduplicated by the linker? If it can, don't outline from it.
    5172         117 :   if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
    5173             :     return false;
    5174             : 
    5175             :   // Don't outline from functions with section markings; the program could
    5176             :   // expect that all the code is in the named section.
    5177             :   // FIXME: Allow outlining from multiple functions with the same section
    5178             :   // marking.
    5179         115 :   if (F.hasSection())
    5180             :     return false;
    5181             : 
    5182             :   // Outlining from functions with redzones is unsafe since the outliner may
    5183             :   // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
    5184             :   // outline from it.
    5185         112 :   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
    5186         336 :   if (!AFI || AFI->hasRedZone().getValueOr(true))
    5187           1 :     return false;
    5188             : 
    5189             :   // It's safe to outline from MF.
    5190             :   return true;
    5191             : }
    5192             : 
    5193             : unsigned
    5194         129 : AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const {
    5195             :   unsigned Flags = 0x0;
    5196             :   // Check if there's a call inside this MachineBasicBlock. If there is, then
    5197             :   // set a flag.
    5198         129 :   if (std::any_of(MBB.begin(), MBB.end(),
    5199             :                   [](MachineInstr &MI) { return MI.isCall(); }))
    5200             :     Flags |= MachineOutlinerMBBFlags::HasCalls;
    5201             : 
    5202             :   // Check if LR is available through all of the MBB. If it's not, then set
    5203             :   // a flag.
    5204             :   assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
    5205             :          "Suitable Machine Function for outlining must track liveness");
    5206         129 :   LiveRegUnits LRU(getRegisterInfo());
    5207         129 :   LRU.addLiveOuts(MBB);
    5208             : 
    5209             :   std::for_each(MBB.rbegin(),
    5210             :                 MBB.rend(),
    5211        1418 :                 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
    5212             : 
    5213         129 :   if (!LRU.available(AArch64::LR))
    5214         113 :       Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
    5215             : 
    5216         129 :   return Flags;
    5217             : }
    5218             : 
    5219             : outliner::InstrType
    5220        1289 : AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
    5221             :                                    unsigned Flags) const {
    5222             :   MachineInstr &MI = *MIT;
    5223        1289 :   MachineBasicBlock *MBB = MI.getParent();
    5224        1289 :   MachineFunction *MF = MBB->getParent();
    5225        1289 :   AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
    5226             : 
    5227             :   // Don't outline LOHs.
    5228        1289 :   if (FuncInfo->getLOHRelated().count(&MI))
    5229             :     return outliner::InstrType::Illegal;
    5230             : 
    5231             :   // Don't allow debug values to impact outlining type.
    5232             :   if (MI.isDebugInstr() || MI.isIndirectDebugValue())
    5233             :     return outliner::InstrType::Invisible;
    5234             : 
    5235             :   // At this point, KILL instructions don't really tell us much so we can go
    5236             :   // ahead and skip over them.
    5237        1283 :   if (MI.isKill())
    5238             :     return outliner::InstrType::Invisible;
    5239             : 
    5240             :   // Is this a terminator for a basic block?
    5241        1282 :   if (MI.isTerminator()) {
    5242             : 
    5243             :     // Is this the end of a function?
    5244         216 :     if (MI.getParent()->succ_empty())
    5245             :       return outliner::InstrType::Legal;
    5246             : 
    5247             :     // It's not, so don't outline it.
    5248           7 :     return outliner::InstrType::Illegal;
    5249             :   }
    5250             : 
    5251             :   // Make sure none of the operands are un-outlinable.
    5252        4917 :   for (const MachineOperand &MOP : MI.operands()) {
    5253        3837 :     if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
    5254             :         MOP.isTargetIndex())
    5255             :       return outliner::InstrType::Illegal;
    5256             : 
    5257             :     // If it uses LR or W30 explicitly, then don't touch it.
    5258        3776 :     if (MOP.isReg() && !MOP.isImplicit() &&
    5259        2402 :         (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
    5260             :       return outliner::InstrType::Illegal;
    5261             :   }
    5262             : 
    5263             :   // Special cases for instructions that can always be outlined, but will fail
    5264             :   // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
    5265             :   // be outlined because they don't require a *specific* value to be in LR.
    5266        2160 :   if (MI.getOpcode() == AArch64::ADRP)
    5267             :     return outliner::InstrType::Legal;
    5268             : 
    5269             :   // If MI is a call we might be able to outline it. We don't want to outline
    5270             :   // any calls that rely on the position of items on the stack. When we outline
    5271             :   // something containing a call, we have to emit a save and restore of LR in
    5272             :   // the outlined function. Currently, this always happens by saving LR to the
    5273             :   // stack. Thus, if we outline, say, half the parameters for a function call
    5274             :   // plus the call, then we'll break the callee's expectations for the layout
    5275             :   // of the stack.
    5276             :   //
    5277             :   // FIXME: Allow calls to functions which construct a stack frame, as long
    5278             :   // as they don't access arguments on the stack.
    5279             :   // FIXME: Figure out some way to analyze functions defined in other modules.
    5280             :   // We should be able to compute the memory usage based on the IR calling
    5281             :   // convention, even if we can't see the definition.
    5282        1022 :   if (MI.isCall()) {
    5283             :     // Get the function associated with the call. Look at each operand and find
    5284             :     // the one that represents the callee and get its name.
    5285             :     const Function *Callee = nullptr;
    5286          56 :     for (const MachineOperand &MOP : MI.operands()) {
    5287          51 :       if (MOP.isGlobal()) {
    5288          16 :         Callee = dyn_cast<Function>(MOP.getGlobal());
    5289             :         break;
    5290             :       }
    5291             :     }
    5292             : 
    5293             :     // Never outline calls to mcount.  There isn't any rule that would require
    5294             :     // this, but the Linux kernel's "ftrace" feature depends on it.
    5295          21 :     if (Callee && Callee->getName() == "\01_mcount")
    5296             :       return outliner::InstrType::Illegal;
    5297             : 
    5298             :     // If we don't know anything about the callee, assume it depends on the
    5299             :     // stack layout of the caller. In that case, it's only legal to outline
    5300             :     // as a tail-call.  Whitelist the call instructions we know about so we
    5301             :     // don't get unexpected results with call pseudo-instructions.
    5302             :     auto UnknownCallOutlineType = outliner::InstrType::Illegal;
    5303          42 :     if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
    5304             :       UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
    5305             : 
    5306          21 :     if (!Callee)
    5307             :       return UnknownCallOutlineType;
    5308             : 
    5309             :     // We have a function we have information about. Check it if it's something
    5310             :     // can safely outline.
    5311          16 :     MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
    5312             : 
    5313             :     // We don't know what's going on with the callee at all. Don't touch it.
    5314          16 :     if (!CalleeMF)
    5315             :       return UnknownCallOutlineType;
    5316             : 
    5317             :     // Check if we know anything about the callee saves on the function. If we
    5318             :     // don't, then don't touch it, since that implies that we haven't
    5319             :     // computed anything about its stack frame yet.
    5320          10 :     MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
    5321          10 :     if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
    5322             :         MFI.getNumObjects() > 0)
    5323           2 :       return UnknownCallOutlineType;
    5324             : 
    5325             :     // At this point, we can say that CalleeMF ought to not pass anything on the
    5326             :     // stack. Therefore, we can outline it.
    5327             :     return outliner::InstrType::Legal;
    5328             :   }
    5329             : 
    5330             :   // Don't outline positions.
    5331             :   if (MI.isPosition())
    5332             :     return outliner::InstrType::Illegal;
    5333             : 
    5334             :   // Don't touch the link register or W30.
    5335        2002 :   if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
    5336             :       MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
    5337           0 :     return outliner::InstrType::Illegal;
    5338             : 
    5339             :   // Does this use the stack?
    5340        1951 :   if (MI.modifiesRegister(AArch64::SP, &RI) ||
    5341             :       MI.readsRegister(AArch64::SP, &RI)) {
    5342             :     // True if there is no chance that any outlined candidate from this range
    5343             :     // could require stack fixups. That is, both
    5344             :     // * LR is available in the range (No save/restore around call)
    5345             :     // * The range doesn't include calls (No save/restore in outlined frame)
    5346             :     // are true.
    5347             :     // FIXME: This is very restrictive; the flags check the whole block,
    5348             :     // not just the bit we will try to outline.
    5349             :     bool MightNeedStackFixUp =
    5350         157 :         (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
    5351             :                   MachineOutlinerMBBFlags::HasCalls));
    5352             : 
    5353             :     // If this instruction is in a range where it *never* needs to be fixed
    5354             :     // up, then we can *always* outline it. This is true even if it's not
    5355             :     // possible to fix that instruction up.
    5356             :     //
    5357             :     // Why? Consider two equivalent instructions I1, I2 where both I1 and I2
    5358             :     // use SP. Suppose that I1 sits within a range that definitely doesn't
    5359             :     // need stack fixups, while I2 sits in a range that does.
    5360             :     //
    5361             :     // First, I1 can be outlined as long as we *never* fix up the stack in
    5362             :     // any sequence containing it. I1 is already a safe instruction in the
    5363             :     // original program, so as long as we don't modify it we're good to go.
    5364             :     // So this leaves us with showing that outlining I2 won't break our
    5365             :     // program.
    5366             :     //
    5367             :     // Suppose I1 and I2 belong to equivalent candidate sequences. When we
    5368             :     // look at I2, we need to see if it can be fixed up. Suppose I2, (and
    5369             :     // thus I1) cannot be fixed up. Then I2 will be assigned an unique
    5370             :     // integer label; thus, I2 cannot belong to any candidate sequence (a
    5371             :     // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up
    5372             :     // as well, so we're good. Thus, I1 is always safe to outline.
    5373             :     //
    5374             :     // This gives us two things: first off, it buys us some more instructions
    5375             :     // for our search space by deeming stack instructions illegal only when
    5376             :     // they can't be fixed up AND we might have to fix them up. Second off,
    5377             :     // This allows us to catch tricky instructions like, say,
    5378             :     // %xi = ADDXri %sp, n, 0. We can't safely outline these since they might
    5379             :     // be paired with later SUBXris, which might *not* end up being outlined.
    5380             :     // If we mess with the stack to save something, then an ADDXri messes with
    5381             :     // it *after*, then we aren't going to restore the right something from
    5382             :     // the stack if we don't outline the corresponding SUBXri first. ADDXris and
    5383             :     // SUBXris are extremely common in prologue/epilogue code, so supporting
    5384             :     // them in the outliner can be a pretty big win!
    5385         157 :     if (!MightNeedStackFixUp)
    5386             :       return outliner::InstrType::Legal;
    5387             : 
    5388             :     // Any modification of SP will break our code to save/restore LR.
    5389             :     // FIXME: We could handle some instructions which add a constant offset to
    5390             :     // SP, with a bit more work.
    5391         155 :     if (MI.modifiesRegister(AArch64::SP, &RI))
    5392             :       return outliner::InstrType::Illegal;
    5393             : 
    5394             :     // At this point, we have a stack instruction that we might need to fix
    5395             :     // up. We'll handle it if it's a load or store.
    5396         104 :     if (MI.mayLoadOrStore()) {
    5397             :       unsigned Base;  // Filled with the base regiser of MI.
    5398             :       int64_t Offset; // Filled with the offset of MI.
    5399             :       unsigned DummyWidth;
    5400             : 
    5401             :       // Does it allow us to offset the base register and is the base SP?
    5402          97 :       if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
    5403          97 :           Base != AArch64::SP)
    5404             :         return outliner::InstrType::Illegal;
    5405             : 
    5406             :       // Find the minimum/maximum offset for this instruction and check if
    5407             :       // fixing it up would be in range.
    5408             :       int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction.
    5409             :       unsigned Scale;               // The scale to multiply the offsets by.
    5410         194 :       getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
    5411             : 
    5412             :       // TODO: We should really test what happens if an instruction overflows.
    5413             :       // This is tricky to test with IR tests, but when the outliner is moved
    5414             :       // to a MIR test, it really ought to be checked.
    5415          97 :       Offset += 16; // Update the offset to what it would be if we outlined.
    5416          97 :       if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
    5417             :         return outliner::InstrType::Illegal;
    5418             : 
    5419             :       // It's in range, so we can outline it.
    5420          97 :       return outliner::InstrType::Legal;
    5421             :     }
    5422             : 
    5423             :     // FIXME: Add handling for instructions like "add x0, sp, #8".
    5424             : 
    5425             :     // We can't fix it up, so don't outline it.
    5426             :     return outliner::InstrType::Illegal;
    5427             :   }
    5428             : 
    5429             :   return outliner::InstrType::Legal;
    5430             : }
    5431             : 
    5432           3 : void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
    5433          21 :   for (MachineInstr &MI : MBB) {
    5434             :     unsigned Base, Width;
    5435             :     int64_t Offset;
    5436             : 
    5437             :     // Is this a load or store with an immediate offset with SP as the base?
    5438          18 :     if (!MI.mayLoadOrStore() ||
    5439          18 :         !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
    5440           0 :         Base != AArch64::SP)
    5441          18 :       continue;
    5442             : 
    5443             :     // It is, so we have to fix it up.
    5444             :     unsigned Scale;
    5445             :     int64_t Dummy1, Dummy2;
    5446             : 
    5447           0 :     MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
    5448             :     assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
    5449           0 :     getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
    5450             :     assert(Scale != 0 && "Unexpected opcode!");
    5451             : 
    5452             :     // We've pushed the return address to the stack, so add 16 to the offset.
    5453             :     // This is safe, since we already checked if it would overflow when we
    5454             :     // checked if this instruction was legal to outline.
    5455           0 :     int64_t NewImm = (Offset + 16) / Scale;
    5456             :     StackOffsetOperand.setImm(NewImm);
    5457             :   }
    5458           3 : }
    5459             : 
    5460          26 : void AArch64InstrInfo::buildOutlinedFrame(
    5461             :     MachineBasicBlock &MBB, MachineFunction &MF,
    5462             :     const outliner::OutlinedFunction &OF) const {
    5463             :   // For thunk outlining, rewrite the last instruction from a call to a
    5464             :   // tail-call.
    5465          26 :   if (OF.FrameConstructionID == MachineOutlinerThunk) {
    5466             :     MachineInstr *Call = &*--MBB.instr_end();
    5467             :     unsigned TailOpcode;
    5468          10 :     if (Call->getOpcode() == AArch64::BL) {
    5469             :       TailOpcode = AArch64::TCRETURNdi;
    5470             :     } else {
    5471             :       assert(Call->getOpcode() == AArch64::BLR);
    5472             :       TailOpcode = AArch64::TCRETURNriALL;
    5473             :     }
    5474          15 :     MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
    5475           5 :                             .add(Call->getOperand(0))
    5476             :                             .addImm(0);
    5477             :     MBB.insert(MBB.end(), TC);
    5478           5 :     Call->eraseFromParent();
    5479             :   }
    5480             : 
    5481             :   // Is there a call in the outlined range?
    5482             :   auto IsNonTailCall = [](MachineInstr &MI) {
    5483             :     return MI.isCall() && !MI.isReturn();
    5484             :   };
    5485          26 :   if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
    5486             :     // Fix up the instructions in the range, since we're going to modify the
    5487             :     // stack.
    5488             :     assert(OF.FrameConstructionID != MachineOutlinerDefault &&
    5489             :            "Can only fix up stack references once");
    5490           2 :     fixupPostOutline(MBB);
    5491             : 
    5492             :     // LR has to be a live in so that we can save it.
    5493             :     MBB.addLiveIn(AArch64::LR);
    5494             : 
    5495             :     MachineBasicBlock::iterator It = MBB.begin();
    5496             :     MachineBasicBlock::iterator Et = MBB.end();
    5497             : 
    5498           2 :     if (OF.FrameConstructionID == MachineOutlinerTailCall ||
    5499             :         OF.FrameConstructionID == MachineOutlinerThunk)
    5500           0 :       Et = std::prev(MBB.end());
    5501             : 
    5502             :     // Insert a save before the outlined region
    5503           6 :     MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
    5504           2 :                                 .addReg(AArch64::SP, RegState::Define)
    5505           2 :                                 .addReg(AArch64::LR)
    5506           2 :                                 .addReg(AArch64::SP)
    5507           2 :                                 .addImm(-16);
    5508             :     It = MBB.insert(It, STRXpre);
    5509             : 
    5510           2 :     const TargetSubtargetInfo &STI = MF.getSubtarget();
    5511           2 :     const MCRegisterInfo *MRI = STI.getRegisterInfo();
    5512           2 :     unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
    5513             : 
    5514             :     // Add a CFI saying the stack was moved 16 B down.
    5515             :     int64_t StackPosEntry =
    5516           2 :         MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16));
    5517           4 :     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
    5518             :         .addCFIIndex(StackPosEntry)
    5519             :         .setMIFlags(MachineInstr::FrameSetup);
    5520             : 
    5521             :     // Add a CFI saying that the LR that we want to find is now 16 B higher than
    5522             :     // before.
    5523             :     int64_t LRPosEntry =
    5524           2 :         MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
    5525           4 :     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
    5526             :         .addCFIIndex(LRPosEntry)
    5527             :         .setMIFlags(MachineInstr::FrameSetup);
    5528             : 
    5529             :     // Insert a restore before the terminator for the function.
    5530           6 :     MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
    5531           2 :                                  .addReg(AArch64::SP, RegState::Define)
    5532           2 :                                  .addReg(AArch64::LR, RegState::Define)
    5533           2 :                                  .addReg(AArch64::SP)
    5534           2 :                                  .addImm(16);
    5535             :     Et = MBB.insert(Et, LDRXpost);
    5536             :   }
    5537             : 
    5538             :   // If this is a tail call outlined function, then there's already a return.
    5539          26 :   if (OF.FrameConstructionID == MachineOutlinerTailCall ||
    5540             :       OF.FrameConstructionID == MachineOutlinerThunk)
    5541             :     return;
    5542             : 
    5543             :   // It's not a tail call, so we have to insert the return ourselves.
    5544          42 :   MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
    5545          14 :                           .addReg(AArch64::LR, RegState::Undef);
    5546             :   MBB.insert(MBB.end(), ret);
    5547             : 
    5548             :   // Did we have to modify the stack by saving the link register?
    5549          14 :   if (OF.FrameConstructionID != MachineOutlinerDefault)
    5550             :     return;
    5551             : 
    5552             :   // We modified the stack.
    5553             :   // Walk over the basic block and fix up all the stack accesses.
    5554           1 :   fixupPostOutline(MBB);
    5555             : }
    5556             : 
    5557          75 : MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
    5558             :     Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
    5559             :     MachineFunction &MF, const outliner::Candidate &C) const {
    5560             : 
    5561             :   // Are we tail calling?
    5562          75 :   if (C.CallConstructionID == MachineOutlinerTailCall) {
    5563             :     // If yes, then we can just branch to the label.
    5564          52 :     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
    5565          26 :                             .addGlobalAddress(M.getNamedValue(MF.getName()))
    5566          26 :                             .addImm(0));
    5567          26 :     return It;
    5568             :   }
    5569             : 
    5570             :   // Are we saving the link register?
    5571          49 :   if (C.CallConstructionID == MachineOutlinerNoLRSave ||
    5572             :       C.CallConstructionID == MachineOutlinerThunk) {
    5573             :     // No, so just insert the call.
    5574          48 :     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
    5575          48 :                             .addGlobalAddress(M.getNamedValue(MF.getName())));
    5576          24 :     return It;
    5577             :   }
    5578             : 
    5579             :   // We want to return the spot where we inserted the call.
    5580             :   MachineBasicBlock::iterator CallPt;
    5581             : 
    5582             :   // Instructions for saving and restoring LR around the call instruction we're
    5583             :   // going to insert.
    5584             :   MachineInstr *Save;
    5585             :   MachineInstr *Restore;
    5586             :   // Can we save to a register?
    5587          25 :   if (C.CallConstructionID == MachineOutlinerRegSave) {
    5588             :     // FIXME: This logic should be sunk into a target-specific interface so that
    5589             :     // we don't have to recompute the register.
    5590          22 :     unsigned Reg = findRegisterToSaveLRTo(C);
    5591             :     assert(Reg != 0 && "No callee-saved register available?");
    5592             : 
    5593             :     // Save and restore LR from that register.
    5594          44 :     Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
    5595          22 :                .addReg(AArch64::XZR)
    5596          22 :                .addReg(AArch64::LR)
    5597          22 :                .addImm(0);
    5598          66 :     Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
    5599          22 :                 .addReg(AArch64::XZR)
    5600          22 :                 .addReg(Reg)
    5601          22 :                 .addImm(0);
    5602             :   } else {
    5603             :     // We have the default case. Save and restore from SP.
    5604           6 :     Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
    5605           3 :                .addReg(AArch64::SP, RegState::Define)
    5606           3 :                .addReg(AArch64::LR)
    5607           3 :                .addReg(AArch64::SP)
    5608           3 :                .addImm(-16);
    5609           9 :     Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
    5610           3 :                   .addReg(AArch64::SP, RegState::Define)
    5611           3 :                   .addReg(AArch64::LR, RegState::Define)
    5612           3 :                   .addReg(AArch64::SP)
    5613           3 :                   .addImm(16);
    5614             :   }
    5615             : 
    5616             :   It = MBB.insert(It, Save);
    5617             :   It++;
    5618             : 
    5619             :   // Insert the call.
    5620          50 :   It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
    5621          50 :                           .addGlobalAddress(M.getNamedValue(MF.getName())));
    5622          25 :   CallPt = It;
    5623             :   It++;
    5624             : 
    5625          25 :   It = MBB.insert(It, Restore);
    5626          25 :   return CallPt;
    5627             : }
    5628             : 
    5629       14017 : bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
    5630             :   MachineFunction &MF) const {
    5631       14017 :   return MF.getFunction().optForMinSize();
    5632             : }

Generated by: LCOV version 1.13