LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64InstrInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1738 2048 84.9 %
Date: 2017-09-14 15:23:50 Functions: 87 88 98.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file contains the AArch64 implementation of the TargetInstrInfo class.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "AArch64InstrInfo.h"
      15             : #include "AArch64MachineFunctionInfo.h"
      16             : #include "AArch64Subtarget.h"
      17             : #include "MCTargetDesc/AArch64AddressingModes.h"
      18             : #include "Utils/AArch64BaseInfo.h"
      19             : #include "llvm/ADT/ArrayRef.h"
      20             : #include "llvm/ADT/STLExtras.h"
      21             : #include "llvm/ADT/SmallVector.h"
      22             : #include "llvm/CodeGen/MachineBasicBlock.h"
      23             : #include "llvm/CodeGen/MachineFrameInfo.h"
      24             : #include "llvm/CodeGen/MachineFunction.h"
      25             : #include "llvm/CodeGen/MachineInstr.h"
      26             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      27             : #include "llvm/CodeGen/MachineMemOperand.h"
      28             : #include "llvm/CodeGen/MachineOperand.h"
      29             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      30             : #include "llvm/CodeGen/StackMaps.h"
      31             : #include "llvm/IR/DebugLoc.h"
      32             : #include "llvm/IR/GlobalValue.h"
      33             : #include "llvm/MC/MCInst.h"
      34             : #include "llvm/MC/MCInstrDesc.h"
      35             : #include "llvm/Support/Casting.h"
      36             : #include "llvm/Support/CodeGen.h"
      37             : #include "llvm/Support/CommandLine.h"
      38             : #include "llvm/Support/Compiler.h"
      39             : #include "llvm/Support/ErrorHandling.h"
      40             : #include "llvm/Support/MathExtras.h"
      41             : #include "llvm/Target/TargetMachine.h"
      42             : #include "llvm/Target/TargetOptions.h"
      43             : #include "llvm/Target/TargetRegisterInfo.h"
      44             : #include "llvm/Target/TargetSubtargetInfo.h"
      45             : #include <cassert>
      46             : #include <cstdint>
      47             : #include <iterator>
      48             : #include <utility>
      49             : 
      50             : using namespace llvm;
      51             : 
      52             : #define GET_INSTRINFO_CTOR_DTOR
      53             : #include "AArch64GenInstrInfo.inc"
      54             : 
      55       72306 : static cl::opt<unsigned> TBZDisplacementBits(
      56      216918 :     "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
      57      289224 :     cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
      58             : 
      59       72306 : static cl::opt<unsigned> CBZDisplacementBits(
      60      216918 :     "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
      61      289224 :     cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
      62             : 
      63             : static cl::opt<unsigned>
      64      289224 :     BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
      65      289224 :                         cl::desc("Restrict range of Bcc instructions (DEBUG)"));
      66             : 
      67        1217 : AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
      68             :     : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
      69        2434 :       RI(STI.getTargetTriple()), Subtarget(STI) {}
      70             : 
      71             : /// GetInstSize - Return the number of bytes of code the specified
      72             : /// instruction may be.  This returns the maximum number of bytes.
      73       71661 : unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
      74       71661 :   const MachineBasicBlock &MBB = *MI.getParent();
      75       71661 :   const MachineFunction *MF = MBB.getParent();
      76       71661 :   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
      77             : 
      78      143322 :   if (MI.getOpcode() == AArch64::INLINEASM)
      79         194 :     return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
      80             : 
      81             :   // FIXME: We currently only handle pseudoinstructions that don't get expanded
      82             :   //        before the assembly printer.
      83       71467 :   unsigned NumBytes = 0;
      84       71467 :   const MCInstrDesc &Desc = MI.getDesc();
      85       71467 :   switch (Desc.getOpcode()) {
      86             :   default:
      87             :     // Anything not explicitly designated otherwise is a normal 4-byte insn.
      88             :     NumBytes = 4;
      89             :     break;
      90        2715 :   case TargetOpcode::DBG_VALUE:
      91             :   case TargetOpcode::EH_LABEL:
      92             :   case TargetOpcode::IMPLICIT_DEF:
      93             :   case TargetOpcode::KILL:
      94        2715 :     NumBytes = 0;
      95        2715 :     break;
      96          17 :   case TargetOpcode::STACKMAP:
      97             :     // The upper bound for a stackmap intrinsic is the full length of its shadow
      98          34 :     NumBytes = StackMapOpers(&MI).getNumPatchBytes();
      99             :     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
     100          17 :     break;
     101          48 :   case TargetOpcode::PATCHPOINT:
     102             :     // The size of the patchpoint intrinsic is the number of bytes requested
     103          96 :     NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
     104             :     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
     105          48 :     break;
     106          25 :   case AArch64::TLSDESC_CALLSEQ:
     107             :     // This gets lowered to an instruction sequence which takes 16 bytes
     108          25 :     NumBytes = 16;
     109          25 :     break;
     110             :   }
     111             : 
     112             :   return NumBytes;
     113             : }
     114             : 
     115       35773 : static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
     116             :                             SmallVectorImpl<MachineOperand> &Cond) {
     117             :   // Block ends with fall-through condbranch.
     118       71546 :   switch (LastInst->getOpcode()) {
     119           0 :   default:
     120           0 :     llvm_unreachable("Unknown branch instruction?");
     121       16643 :   case AArch64::Bcc:
     122       16643 :     Target = LastInst->getOperand(1).getMBB();
     123       16643 :     Cond.push_back(LastInst->getOperand(0));
     124       16643 :     break;
     125       11787 :   case AArch64::CBZW:
     126             :   case AArch64::CBZX:
     127             :   case AArch64::CBNZW:
     128             :   case AArch64::CBNZX:
     129       11787 :     Target = LastInst->getOperand(1).getMBB();
     130       23574 :     Cond.push_back(MachineOperand::CreateImm(-1));
     131       35361 :     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
     132       11787 :     Cond.push_back(LastInst->getOperand(0));
     133       11787 :     break;
     134        7343 :   case AArch64::TBZW:
     135             :   case AArch64::TBZX:
     136             :   case AArch64::TBNZW:
     137             :   case AArch64::TBNZX:
     138        7343 :     Target = LastInst->getOperand(2).getMBB();
     139       14686 :     Cond.push_back(MachineOperand::CreateImm(-1));
     140       22029 :     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
     141        7343 :     Cond.push_back(LastInst->getOperand(0));
     142       14686 :     Cond.push_back(LastInst->getOperand(1));
     143             :   }
     144       35773 : }
     145             : 
     146             : static unsigned getBranchDisplacementBits(unsigned Opc) {
     147        1312 :   switch (Opc) {
     148           0 :   default:
     149           0 :     llvm_unreachable("unexpected opcode!");
     150             :   case AArch64::B:
     151             :     return 64;
     152         190 :   case AArch64::TBNZW:
     153             :   case AArch64::TBZW:
     154             :   case AArch64::TBNZX:
     155             :   case AArch64::TBZX:
     156         190 :     return TBZDisplacementBits;
     157         304 :   case AArch64::CBNZW:
     158             :   case AArch64::CBZW:
     159             :   case AArch64::CBNZX:
     160             :   case AArch64::CBZX:
     161         304 :     return CBZDisplacementBits;
     162         586 :   case AArch64::Bcc:
     163         586 :     return BCCDisplacementBits;
     164             :   }
     165             : }
     166             : 
     167        1312 : bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
     168             :                                              int64_t BrOffset) const {
     169        1080 :   unsigned Bits = getBranchDisplacementBits(BranchOp);
     170             :   assert(Bits >= 3 && "max branch displacement must be enough to jump"
     171             :                       "over conditional branch expansion");
     172        2392 :   return isIntN(Bits, BrOffset / 4);
     173             : }
     174             : 
     175             : MachineBasicBlock *
     176        1386 : AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
     177        2772 :   switch (MI.getOpcode()) {
     178           0 :   default:
     179           0 :     llvm_unreachable("unexpected opcode!");
     180         232 :   case AArch64::B:
     181         232 :     return MI.getOperand(0).getMBB();
     182         205 :   case AArch64::TBZW:
     183             :   case AArch64::TBNZW:
     184             :   case AArch64::TBZX:
     185             :   case AArch64::TBNZX:
     186         205 :     return MI.getOperand(2).getMBB();
     187         949 :   case AArch64::CBZW:
     188             :   case AArch64::CBNZW:
     189             :   case AArch64::CBZX:
     190             :   case AArch64::CBNZX:
     191             :   case AArch64::Bcc:
     192         949 :     return MI.getOperand(1).getMBB();
     193             :   }
     194             : }
     195             : 
     196             : // Branch analysis.
     197      304649 : bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
     198             :                                      MachineBasicBlock *&TBB,
     199             :                                      MachineBasicBlock *&FBB,
     200             :                                      SmallVectorImpl<MachineOperand> &Cond,
     201             :                                      bool AllowModify) const {
     202             :   // If the block has no terminators, it just falls into the block after it.
     203      304649 :   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
     204      609298 :   if (I == MBB.end())
     205             :     return false;
     206             : 
     207      606398 :   if (!isUnpredicatedTerminator(*I))
     208             :     return false;
     209             : 
     210             :   // Get the last instruction in the block.
     211      281227 :   MachineInstr *LastInst = &*I;
     212             : 
     213             :   // If there is only one terminator instruction, process it.
     214      281227 :   unsigned LastOpc = LastInst->getOpcode();
     215     1098322 :   if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
     216      265183 :     if (isUncondBranchOpcode(LastOpc)) {
     217        6316 :       TBB = LastInst->getOperand(0).getMBB();
     218        6316 :       return false;
     219             :     }
     220      278836 :     if (isCondBranchOpcode(LastOpc)) {
     221             :       // Block ends with fall-through condbranch.
     222       19969 :       parseCondBranch(LastInst, TBB, Cond);
     223       19969 :       return false;
     224             :     }
     225             :     return true; // Can't handle indirect branch.
     226             :   }
     227             : 
     228             :   // Get the instruction before it if it is a terminator.
     229       16044 :   MachineInstr *SecondLastInst = &*I;
     230       16044 :   unsigned SecondLastOpc = SecondLastInst->getOpcode();
     231             : 
     232             :   // If AllowModify is true and the block ends with two or more unconditional
     233             :   // branches, delete all but the first unconditional branch.
     234       18316 :   if (AllowModify && isUncondBranchOpcode(LastOpc)) {
     235        2264 :     while (isUncondBranchOpcode(SecondLastOpc)) {
     236           2 :       LastInst->eraseFromParent();
     237           2 :       LastInst = SecondLastInst;
     238           4 :       LastOpc = LastInst->getOpcode();
     239           8 :       if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
     240             :         // Return now the only terminator is an unconditional branch.
     241           2 :         TBB = LastInst->getOperand(0).getMBB();
     242           2 :         return false;
     243             :       } else {
     244           0 :         SecondLastInst = &*I;
     245           0 :         SecondLastOpc = SecondLastInst->getOpcode();
     246             :       }
     247             :     }
     248             :   }
     249             : 
     250             :   // If there are three terminators, we don't know what sort of block this is.
     251       77878 :   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
     252             :     return true;
     253             : 
     254             :   // If the block ends with a B and a Bcc, handle it.
     255       31688 :   if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
     256       15804 :     parseCondBranch(SecondLastInst, TBB, Cond);
     257       15804 :     FBB = LastInst->getOperand(0).getMBB();
     258       15804 :     return false;
     259             :   }
     260             : 
     261             :   // If the block ends with two unconditional branches, handle it.  The second
     262             :   // one is not executed, so remove it.
     263          60 :   if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
     264           0 :     TBB = SecondLastInst->getOperand(0).getMBB();
     265           0 :     I = LastInst;
     266           0 :     if (AllowModify)
     267           0 :       I->eraseFromParent();
     268             :     return false;
     269             :   }
     270             : 
     271             :   // ...likewise if it ends with an indirect branch followed by an unconditional
     272             :   // branch.
     273          60 :   if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
     274           0 :     I = LastInst;
     275           0 :     if (AllowModify)
     276           0 :       I->eraseFromParent();
     277             :     return true;
     278             :   }
     279             : 
     280             :   // Otherwise, can't handle this.
     281             :   return true;
     282             : }
     283             : 
     284        1916 : bool AArch64InstrInfo::reverseBranchCondition(
     285             :     SmallVectorImpl<MachineOperand> &Cond) const {
     286        3832 :   if (Cond[0].getImm() != -1) {
     287             :     // Regular Bcc
     288        2480 :     AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
     289        3720 :     Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
     290             :   } else {
     291             :     // Folded compare-and-branch
     292        1352 :     switch (Cond[1].getImm()) {
     293           0 :     default:
     294           0 :       llvm_unreachable("Unknown conditional branch!");
     295         176 :     case AArch64::CBZW:
     296         352 :       Cond[1].setImm(AArch64::CBNZW);
     297             :       break;
     298         169 :     case AArch64::CBNZW:
     299         338 :       Cond[1].setImm(AArch64::CBZW);
     300             :       break;
     301          37 :     case AArch64::CBZX:
     302          74 :       Cond[1].setImm(AArch64::CBNZX);
     303             :       break;
     304          37 :     case AArch64::CBNZX:
     305          74 :       Cond[1].setImm(AArch64::CBZX);
     306             :       break;
     307         108 :     case AArch64::TBZW:
     308         216 :       Cond[1].setImm(AArch64::TBNZW);
     309             :       break;
     310         107 :     case AArch64::TBNZW:
     311         214 :       Cond[1].setImm(AArch64::TBZW);
     312             :       break;
     313          24 :     case AArch64::TBZX:
     314          48 :       Cond[1].setImm(AArch64::TBNZX);
     315             :       break;
     316          18 :     case AArch64::TBNZX:
     317          36 :       Cond[1].setImm(AArch64::TBZX);
     318             :       break;
     319             :     }
     320             :   }
     321             : 
     322        1916 :   return false;
     323             : }
     324             : 
     325        3552 : unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
     326             :                                         int *BytesRemoved) const {
     327        3552 :   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
     328        7104 :   if (I == MBB.end())
     329             :     return 0;
     330             : 
     331        7104 :   if (!isUncondBranchOpcode(I->getOpcode()) &&
     332        1697 :       !isCondBranchOpcode(I->getOpcode()))
     333             :     return 0;
     334             : 
     335             :   // Remove the branch.
     336        3355 :   I->eraseFromParent();
     337             : 
     338        3355 :   I = MBB.end();
     339             : 
     340        6710 :   if (I == MBB.begin()) {
     341         378 :     if (BytesRemoved)
     342           2 :       *BytesRemoved = 4;
     343             :     return 1;
     344             :   }
     345        2977 :   --I;
     346        8931 :   if (!isCondBranchOpcode(I->getOpcode())) {
     347        1928 :     if (BytesRemoved)
     348           6 :       *BytesRemoved = 4;
     349             :     return 1;
     350             :   }
     351             : 
     352             :   // Remove the branch.
     353        1049 :   I->eraseFromParent();
     354        1049 :   if (BytesRemoved)
     355           2 :     *BytesRemoved = 8;
     356             : 
     357             :   return 2;
     358             : }
     359             : 
     360        2511 : void AArch64InstrInfo::instantiateCondBranch(
     361             :     MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
     362             :     ArrayRef<MachineOperand> Cond) const {
     363        2511 :   if (Cond[0].getImm() != -1) {
     364             :     // Regular Bcc
     365        7615 :     BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
     366             :   } else {
     367             :     // Folded compare-and-branch
     368             :     // Note that we use addOperand instead of addReg to keep the flags.
     369             :     const MachineInstrBuilder MIB =
     370        4940 :         BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
     371         988 :     if (Cond.size() > 3)
     372         348 :       MIB.addImm(Cond[3].getImm());
     373         988 :     MIB.addMBB(TBB);
     374             :   }
     375        2511 : }
     376             : 
     377        3388 : unsigned AArch64InstrInfo::insertBranch(
     378             :     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
     379             :     ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
     380             :   // Shouldn't be a fall through.
     381             :   assert(TBB && "insertBranch must not be told to insert a fallthrough");
     382             : 
     383        3388 :   if (!FBB) {
     384        3326 :     if (Cond.empty()) // Unconditional branch?
     385        3508 :       BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
     386             :     else
     387        2449 :       instantiateCondBranch(MBB, DL, TBB, Cond);
     388             : 
     389        3326 :     if (BytesAdded)
     390           2 :       *BytesAdded = 4;
     391             : 
     392             :     return 1;
     393             :   }
     394             : 
     395             :   // Two-way conditional branch.
     396          62 :   instantiateCondBranch(MBB, DL, TBB, Cond);
     397         248 :   BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
     398             : 
     399          62 :   if (BytesAdded)
     400          10 :     *BytesAdded = 8;
     401             : 
     402             :   return 2;
     403             : }
     404             : 
     405             : // Find the original register that VReg is copied from.
     406         240 : static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
     407         732 :   while (TargetRegisterInfo::isVirtualRegister(VReg)) {
     408         397 :     const MachineInstr *DefMI = MRI.getVRegDef(VReg);
     409         246 :     if (!DefMI->isFullCopy())
     410             :       return VReg;
     411         246 :     VReg = DefMI->getOperand(1).getReg();
     412             :   }
     413             :   return VReg;
     414             : }
     415             : 
     416             : // Determine if VReg is defined by an instruction that can be folded into a
     417             : // csel instruction. If so, return the folded opcode, and the replacement
     418             : // register.
     419         224 : static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
     420             :                                 unsigned *NewVReg = nullptr) {
     421         224 :   VReg = removeCopies(MRI, VReg);
     422         224 :   if (!TargetRegisterInfo::isVirtualRegister(VReg))
     423             :     return 0;
     424             : 
     425         302 :   bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
     426         151 :   const MachineInstr *DefMI = MRI.getVRegDef(VReg);
     427         151 :   unsigned Opc = 0;
     428         151 :   unsigned SrcOpNum = 0;
     429         302 :   switch (DefMI->getOpcode()) {
     430           0 :   case AArch64::ADDSXri:
     431             :   case AArch64::ADDSWri:
     432             :     // if NZCV is used, do not fold.
     433           0 :     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
     434             :       return 0;
     435             :     // fall-through to ADDXri and ADDWri.
     436             :     LLVM_FALLTHROUGH;
     437             :   case AArch64::ADDXri:
     438             :   case AArch64::ADDWri:
     439             :     // add x, 1 -> csinc.
     440         202 :     if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
     441          12 :         DefMI->getOperand(3).getImm() != 0)
     442             :       return 0;
     443          12 :     SrcOpNum = 1;
     444          12 :     Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
     445             :     break;
     446             : 
     447           8 :   case AArch64::ORNXrr:
     448             :   case AArch64::ORNWrr: {
     449             :     // not x -> csinv, represented as orn dst, xzr, src.
     450           8 :     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
     451           8 :     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
     452             :       return 0;
     453           8 :     SrcOpNum = 2;
     454           8 :     Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
     455             :     break;
     456             :   }
     457             : 
     458           8 :   case AArch64::SUBSXrr:
     459             :   case AArch64::SUBSWrr:
     460             :     // if NZCV is used, do not fold.
     461           8 :     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
     462             :       return 0;
     463             :     // fall-through to SUBXrr and SUBWrr.
     464             :     LLVM_FALLTHROUGH;
     465             :   case AArch64::SUBXrr:
     466             :   case AArch64::SUBWrr: {
     467             :     // neg x -> csneg, represented as sub dst, xzr, src.
     468           8 :     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
     469           8 :     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
     470             :       return 0;
     471           8 :     SrcOpNum = 2;
     472           8 :     Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
     473             :     break;
     474             :   }
     475             :   default:
     476             :     return 0;
     477             :   }
     478             :   assert(Opc && SrcOpNum && "Missing parameters");
     479             : 
     480          28 :   if (NewVReg)
     481          28 :     *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
     482             :   return Opc;
     483             : }
     484             : 
     485          97 : bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
     486             :                                        ArrayRef<MachineOperand> Cond,
     487             :                                        unsigned TrueReg, unsigned FalseReg,
     488             :                                        int &CondCycles, int &TrueCycles,
     489             :                                        int &FalseCycles) const {
     490             :   // Check register classes.
     491          97 :   const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
     492             :   const TargetRegisterClass *RC =
     493         291 :       RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
     494          97 :   if (!RC)
     495             :     return false;
     496             : 
     497             :   // Expanding cbz/tbz requires an extra cycle of latency on the condition.
     498          97 :   unsigned ExtraCondLat = Cond.size() != 1;
     499             : 
     500             :   // GPRs are handled by csel.
     501             :   // FIXME: Fold in x+1, -x, and ~x when applicable.
     502         235 :   if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
     503          82 :       AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
     504             :     // Single-cycle csel, csinc, csinv, and csneg.
     505          94 :     CondCycles = 1 + ExtraCondLat;
     506          94 :     TrueCycles = FalseCycles = 1;
     507          94 :     if (canFoldIntoCSel(MRI, TrueReg))
     508           6 :       TrueCycles = 0;
     509          88 :     else if (canFoldIntoCSel(MRI, FalseReg))
     510           8 :       FalseCycles = 0;
     511             :     return true;
     512             :   }
     513             : 
     514             :   // Scalar floating point is handled by fcsel.
     515             :   // FIXME: Form fabs, fmin, and fmax when applicable.
     516           9 :   if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
     517           6 :       AArch64::FPR32RegClass.hasSubClassEq(RC)) {
     518           0 :     CondCycles = 5 + ExtraCondLat;
     519           0 :     TrueCycles = FalseCycles = 2;
     520           0 :     return true;
     521             :   }
     522             : 
     523             :   // Can't do vectors.
     524             :   return false;
     525             : }
     526             : 
     527          24 : void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
     528             :                                     MachineBasicBlock::iterator I,
     529             :                                     const DebugLoc &DL, unsigned DstReg,
     530             :                                     ArrayRef<MachineOperand> Cond,
     531             :                                     unsigned TrueReg, unsigned FalseReg) const {
     532          24 :   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
     533             : 
     534             :   // Parse the condition code, see parseCondBranch() above.
     535             :   AArch64CC::CondCode CC;
     536          24 :   switch (Cond.size()) {
     537           0 :   default:
     538           0 :     llvm_unreachable("Unknown condition opcode in Cond");
     539          15 :   case 1: // b.cc
     540          15 :     CC = AArch64CC::CondCode(Cond[0].getImm());
     541          15 :     break;
     542           5 :   case 3: { // cbz/cbnz
     543             :     // We must insert a compare against 0.
     544             :     bool Is64Bit;
     545           5 :     switch (Cond[1].getImm()) {
     546           0 :     default:
     547           0 :       llvm_unreachable("Unknown branch opcode in Cond");
     548             :     case AArch64::CBZW:
     549             :       Is64Bit = false;
     550             :       CC = AArch64CC::EQ;
     551             :       break;
     552           1 :     case AArch64::CBZX:
     553           1 :       Is64Bit = true;
     554           1 :       CC = AArch64CC::EQ;
     555           1 :       break;
     556           2 :     case AArch64::CBNZW:
     557           2 :       Is64Bit = false;
     558           2 :       CC = AArch64CC::NE;
     559           2 :       break;
     560           1 :     case AArch64::CBNZX:
     561           1 :       Is64Bit = true;
     562           1 :       CC = AArch64CC::NE;
     563           1 :       break;
     564             :     }
     565           5 :     unsigned SrcReg = Cond[2].getReg();
     566           5 :     if (Is64Bit) {
     567             :       // cmp reg, #0 is actually subs xzr, reg, #0.
     568           2 :       MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
     569           6 :       BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
     570           2 :           .addReg(SrcReg)
     571           2 :           .addImm(0)
     572           2 :           .addImm(0);
     573             :     } else {
     574           3 :       MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
     575           9 :       BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
     576           3 :           .addReg(SrcReg)
     577           3 :           .addImm(0)
     578           3 :           .addImm(0);
     579             :     }
     580             :     break;
     581             :   }
     582           4 :   case 4: { // tbz/tbnz
     583             :     // We must insert a tst instruction.
     584           4 :     switch (Cond[1].getImm()) {
     585           0 :     default:
     586           0 :       llvm_unreachable("Unknown branch opcode in Cond");
     587             :     case AArch64::TBZW:
     588             :     case AArch64::TBZX:
     589             :       CC = AArch64CC::EQ;
     590             :       break;
     591           2 :     case AArch64::TBNZW:
     592             :     case AArch64::TBNZX:
     593           2 :       CC = AArch64CC::NE;
     594           2 :       break;
     595             :     }
     596             :     // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
     597           4 :     if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
     598           6 :       BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
     599           2 :           .addReg(Cond[2].getReg())
     600           2 :           .addImm(
     601           4 :               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
     602             :     else
     603           6 :       BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
     604           2 :           .addReg(Cond[2].getReg())
     605           2 :           .addImm(
     606           4 :               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
     607             :     break;
     608             :   }
     609             :   }
     610             : 
     611          24 :   unsigned Opc = 0;
     612          24 :   const TargetRegisterClass *RC = nullptr;
     613          24 :   bool TryFold = false;
     614          24 :   if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
     615             :     RC = &AArch64::GPR64RegClass;
     616             :     Opc = AArch64::CSELXr;
     617             :     TryFold = true;
     618          14 :   } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
     619             :     RC = &AArch64::GPR32RegClass;
     620             :     Opc = AArch64::CSELWr;
     621             :     TryFold = true;
     622           0 :   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
     623             :     RC = &AArch64::FPR64RegClass;
     624             :     Opc = AArch64::FCSELDrrr;
     625           0 :   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
     626           0 :     RC = &AArch64::FPR32RegClass;
     627           0 :     Opc = AArch64::FCSELSrrr;
     628             :   }
     629             :   assert(RC && "Unsupported regclass");
     630             : 
     631             :   // Try folding simple instructions into the csel.
     632          24 :   if (TryFold) {
     633          24 :     unsigned NewVReg = 0;
     634          24 :     unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
     635          24 :     if (FoldedOpc) {
     636             :       // The folded opcodes csinc, csinc and csneg apply the operation to
     637             :       // FalseReg, so we need to invert the condition.
     638           6 :       CC = AArch64CC::getInvertedCondCode(CC);
     639           6 :       TrueReg = FalseReg;
     640             :     } else
     641          18 :       FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
     642             : 
     643             :     // Fold the operation. Leave any dead instructions for DCE to clean up.
     644          24 :     if (FoldedOpc) {
     645          14 :       FalseReg = NewVReg;
     646          14 :       Opc = FoldedOpc;
     647             :       // The extends the live range of NewVReg.
     648          14 :       MRI.clearKillFlags(NewVReg);
     649             :     }
     650             :   }
     651             : 
     652             :   // Pull all virtual register into the appropriate class.
     653          24 :   MRI.constrainRegClass(TrueReg, RC);
     654          24 :   MRI.constrainRegClass(FalseReg, RC);
     655             : 
     656             :   // Insert the csel.
     657          72 :   BuildMI(MBB, I, DL, get(Opc), DstReg)
     658          24 :       .addReg(TrueReg)
     659          24 :       .addReg(FalseReg)
     660          48 :       .addImm(CC);
     661          24 : }
     662             : 
     663             : /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an  ORRxx.
     664             : static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
     665          30 :   uint64_t Imm = MI.getOperand(1).getImm();
     666          30 :   uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
     667             :   uint64_t Encoding;
     668          30 :   return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
     669             : }
     670             : 
     671             : // FIXME: this implementation should be micro-architecture dependent, so a
     672             : // micro-architecture target hook should be introduced here in future.
     673       11041 : bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
     674       11041 :   if (!Subtarget.hasCustomCheapAsMoveHandling())
     675       10524 :     return MI.isAsCheapAsAMove();
     676         823 :   if (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
     677         306 :       isExynosShiftLeftFast(MI))
     678             :     return true;
     679             : 
     680        1022 :   switch (MI.getOpcode()) {
     681             :   default:
     682             :     return false;
     683             : 
     684             :   // add/sub on register without shift
     685          17 :   case AArch64::ADDWri:
     686             :   case AArch64::ADDXri:
     687             :   case AArch64::SUBWri:
     688             :   case AArch64::SUBXri:
     689          17 :     return (MI.getOperand(3).getImm() == 0);
     690             : 
     691             :   // logical ops on immediate
     692           0 :   case AArch64::ANDWri:
     693             :   case AArch64::ANDXri:
     694             :   case AArch64::EORWri:
     695             :   case AArch64::EORXri:
     696             :   case AArch64::ORRWri:
     697             :   case AArch64::ORRXri:
     698           0 :     return true;
     699             : 
     700             :   // logical ops on register without shift
     701           0 :   case AArch64::ANDWrr:
     702             :   case AArch64::ANDXrr:
     703             :   case AArch64::BICWrr:
     704             :   case AArch64::BICXrr:
     705             :   case AArch64::EONWrr:
     706             :   case AArch64::EONXrr:
     707             :   case AArch64::EORWrr:
     708             :   case AArch64::EORXrr:
     709             :   case AArch64::ORNWrr:
     710             :   case AArch64::ORNXrr:
     711             :   case AArch64::ORRWrr:
     712             :   case AArch64::ORRXrr:
     713           0 :     return true;
     714             : 
     715             :   // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
     716             :   // ORRXri, it is as cheap as MOV
     717          30 :   case AArch64::MOVi32imm:
     718          30 :     return canBeExpandedToORR(MI, 32);
     719           0 :   case AArch64::MOVi64imm:
     720           0 :     return canBeExpandedToORR(MI, 64);
     721             : 
     722             :   // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
     723             :   // feature.
     724          16 :   case AArch64::FMOVH0:
     725             :   case AArch64::FMOVS0:
     726             :   case AArch64::FMOVD0:
     727          16 :     return Subtarget.hasZeroCycleZeroing();
     728           0 :   case TargetOpcode::COPY:
     729           0 :     return (Subtarget.hasZeroCycleZeroing() &&
     730           0 :             (MI.getOperand(1).getReg() == AArch64::WZR ||
     731           0 :              MI.getOperand(1).getReg() == AArch64::XZR));
     732             :   }
     733             : 
     734             :   llvm_unreachable("Unknown opcode to check as cheap as a move!");
     735             : }
     736             : 
     737         314 : bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr &MI) const {
     738             :   unsigned Imm, Shift;
     739             : 
     740         628 :   switch (MI.getOpcode()) {
     741             :   default:
     742             :     return false;
     743             : 
     744             :   // WriteI
     745           6 :   case AArch64::ADDSWri:
     746             :   case AArch64::ADDSXri:
     747             :   case AArch64::ADDWri:
     748             :   case AArch64::ADDXri:
     749             :   case AArch64::SUBSWri:
     750             :   case AArch64::SUBSXri:
     751             :   case AArch64::SUBWri:
     752             :   case AArch64::SUBXri:
     753           6 :     return true;
     754             : 
     755             :   // WriteISReg
     756           8 :   case AArch64::ADDSWrs:
     757             :   case AArch64::ADDSXrs:
     758             :   case AArch64::ADDWrs:
     759             :   case AArch64::ADDXrs:
     760             :   case AArch64::ANDSWrs:
     761             :   case AArch64::ANDSXrs:
     762             :   case AArch64::ANDWrs:
     763             :   case AArch64::ANDXrs:
     764             :   case AArch64::BICSWrs:
     765             :   case AArch64::BICSXrs:
     766             :   case AArch64::BICWrs:
     767             :   case AArch64::BICXrs:
     768             :   case AArch64::EONWrs:
     769             :   case AArch64::EONXrs:
     770             :   case AArch64::EORWrs:
     771             :   case AArch64::EORXrs:
     772             :   case AArch64::ORNWrs:
     773             :   case AArch64::ORNXrs:
     774             :   case AArch64::ORRWrs:
     775             :   case AArch64::ORRXrs:
     776             :   case AArch64::SUBSWrs:
     777             :   case AArch64::SUBSXrs:
     778             :   case AArch64::SUBWrs:
     779             :   case AArch64::SUBXrs:
     780           8 :     Imm = MI.getOperand(3).getImm();
     781           8 :     Shift = AArch64_AM::getShiftValue(Imm);
     782           8 :     return (Shift == 0 ||
     783           0 :             (Shift <= 3 && AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL));
     784             : 
     785             :   // WriteIEReg
     786           0 :   case AArch64::ADDSWrx:
     787             :   case AArch64::ADDSXrx:
     788             :   case AArch64::ADDSXrx64:
     789             :   case AArch64::ADDWrx:
     790             :   case AArch64::ADDXrx:
     791             :   case AArch64::ADDXrx64:
     792             :   case AArch64::SUBSWrx:
     793             :   case AArch64::SUBSXrx:
     794             :   case AArch64::SUBSXrx64:
     795             :   case AArch64::SUBWrx:
     796             :   case AArch64::SUBXrx:
     797             :   case AArch64::SUBXrx64:
     798           0 :     Imm = MI.getOperand(3).getImm();
     799           0 :     Shift = AArch64_AM::getArithShiftValue(Imm);
     800           0 :     return (Shift == 0 ||
     801           0 :             (Shift <= 3 && AArch64_AM::getExtendType(Imm) == AArch64_AM::UXTX));
     802             :   }
     803             : }
     804             : 
     805          92 : bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
     806         184 :   switch (MI.getOpcode()) {
     807             :   default:
     808             :     return false;
     809             : 
     810           6 :   case AArch64::ADDWrs:
     811             :   case AArch64::ADDXrs:
     812             :   case AArch64::ADDSWrs:
     813             :   case AArch64::ADDSXrs: {
     814           6 :     unsigned Imm = MI.getOperand(3).getImm();
     815           6 :     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
     816           6 :     if (ShiftVal == 0)
     817             :       return true;
     818           0 :     return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
     819             :   }
     820             : 
     821          27 :   case AArch64::ADDWrx:
     822             :   case AArch64::ADDXrx:
     823             :   case AArch64::ADDXrx64:
     824             :   case AArch64::ADDSWrx:
     825             :   case AArch64::ADDSXrx:
     826             :   case AArch64::ADDSXrx64: {
     827          27 :     unsigned Imm = MI.getOperand(3).getImm();
     828          27 :     switch (AArch64_AM::getArithExtendType(Imm)) {
     829             :     default:
     830             :       return false;
     831          27 :     case AArch64_AM::UXTB:
     832             :     case AArch64_AM::UXTH:
     833             :     case AArch64_AM::UXTW:
     834             :     case AArch64_AM::UXTX:
     835          27 :       return AArch64_AM::getArithShiftValue(Imm) <= 4;
     836             :     }
     837             :   }
     838             : 
     839          10 :   case AArch64::SUBWrs:
     840             :   case AArch64::SUBSWrs: {
     841          10 :     unsigned Imm = MI.getOperand(3).getImm();
     842          10 :     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
     843          10 :     return ShiftVal == 0 ||
     844           0 :            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
     845             :   }
     846             : 
     847           0 :   case AArch64::SUBXrs:
     848             :   case AArch64::SUBSXrs: {
     849           0 :     unsigned Imm = MI.getOperand(3).getImm();
     850           0 :     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
     851           0 :     return ShiftVal == 0 ||
     852           0 :            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
     853             :   }
     854             : 
     855           0 :   case AArch64::SUBWrx:
     856             :   case AArch64::SUBXrx:
     857             :   case AArch64::SUBXrx64:
     858             :   case AArch64::SUBSWrx:
     859             :   case AArch64::SUBSXrx:
     860             :   case AArch64::SUBSXrx64: {
     861           0 :     unsigned Imm = MI.getOperand(3).getImm();
     862           0 :     switch (AArch64_AM::getArithExtendType(Imm)) {
     863             :     default:
     864             :       return false;
     865           0 :     case AArch64_AM::UXTB:
     866             :     case AArch64_AM::UXTH:
     867             :     case AArch64_AM::UXTW:
     868             :     case AArch64_AM::UXTX:
     869           0 :       return AArch64_AM::getArithShiftValue(Imm) == 0;
     870             :     }
     871             :   }
     872             : 
     873          49 :   case AArch64::LDRBBroW:
     874             :   case AArch64::LDRBBroX:
     875             :   case AArch64::LDRBroW:
     876             :   case AArch64::LDRBroX:
     877             :   case AArch64::LDRDroW:
     878             :   case AArch64::LDRDroX:
     879             :   case AArch64::LDRHHroW:
     880             :   case AArch64::LDRHHroX:
     881             :   case AArch64::LDRHroW:
     882             :   case AArch64::LDRHroX:
     883             :   case AArch64::LDRQroW:
     884             :   case AArch64::LDRQroX:
     885             :   case AArch64::LDRSBWroW:
     886             :   case AArch64::LDRSBWroX:
     887             :   case AArch64::LDRSBXroW:
     888             :   case AArch64::LDRSBXroX:
     889             :   case AArch64::LDRSHWroW:
     890             :   case AArch64::LDRSHWroX:
     891             :   case AArch64::LDRSHXroW:
     892             :   case AArch64::LDRSHXroX:
     893             :   case AArch64::LDRSWroW:
     894             :   case AArch64::LDRSWroX:
     895             :   case AArch64::LDRSroW:
     896             :   case AArch64::LDRSroX:
     897             :   case AArch64::LDRWroW:
     898             :   case AArch64::LDRWroX:
     899             :   case AArch64::LDRXroW:
     900             :   case AArch64::LDRXroX:
     901             :   case AArch64::PRFMroW:
     902             :   case AArch64::PRFMroX:
     903             :   case AArch64::STRBBroW:
     904             :   case AArch64::STRBBroX:
     905             :   case AArch64::STRBroW:
     906             :   case AArch64::STRBroX:
     907             :   case AArch64::STRDroW:
     908             :   case AArch64::STRDroX:
     909             :   case AArch64::STRHHroW:
     910             :   case AArch64::STRHHroX:
     911             :   case AArch64::STRHroW:
     912             :   case AArch64::STRHroX:
     913             :   case AArch64::STRQroW:
     914             :   case AArch64::STRQroX:
     915             :   case AArch64::STRSroW:
     916             :   case AArch64::STRSroX:
     917             :   case AArch64::STRWroW:
     918             :   case AArch64::STRWroX:
     919             :   case AArch64::STRXroW:
     920             :   case AArch64::STRXroX: {
     921          49 :     unsigned IsSigned = MI.getOperand(3).getImm();
     922          49 :     return !IsSigned;
     923             :   }
     924             :   }
     925             : }
     926             : 
     927       92920 : bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
     928             :                                              unsigned &SrcReg, unsigned &DstReg,
     929             :                                              unsigned &SubIdx) const {
     930      185840 :   switch (MI.getOpcode()) {
     931             :   default:
     932             :     return false;
     933         573 :   case AArch64::SBFMXri: // aka sxtw
     934             :   case AArch64::UBFMXri: // aka uxtw
     935             :     // Check for the 32 -> 64 bit extension case, these instructions can do
     936             :     // much more.
     937         573 :     if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
     938             :       return false;
     939             :     // This is a signed or unsigned 32 -> 64 bit extension.
     940          35 :     SrcReg = MI.getOperand(1).getReg();
     941          35 :     DstReg = MI.getOperand(0).getReg();
     942          35 :     SubIdx = AArch64::sub_32;
     943          35 :     return true;
     944             :   }
     945             : }
     946             : 
     947        7472 : bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
     948             :     MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
     949        7472 :   const TargetRegisterInfo *TRI = &getRegisterInfo();
     950        7472 :   unsigned BaseRegA = 0, BaseRegB = 0;
     951        7472 :   int64_t OffsetA = 0, OffsetB = 0;
     952        7472 :   unsigned WidthA = 0, WidthB = 0;
     953             : 
     954             :   assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
     955             :   assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
     956             : 
     957       22410 :   if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
     958       22404 :       MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
     959             :     return false;
     960             : 
     961             :   // Retrieve the base register, offset from the base register and width. Width
     962             :   // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
     963             :   // base registers are identical, and the offset of a lower memory access +
     964             :   // the width doesn't overlap the offset of a higher memory access,
     965             :   // then the memory accesses are different.
     966       12187 :   if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
     967        4804 :       getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
     968        4252 :     if (BaseRegA == BaseRegB) {
     969        2702 :       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
     970        2702 :       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
     971        2702 :       int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
     972        2702 :       if (LowOffset + LowWidth <= HighOffset)
     973             :         return true;
     974             :     }
     975             :   }
     976             :   return false;
     977             : }
     978             : 
     979             : /// analyzeCompare - For a comparison instruction, return the source registers
     980             : /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
     981             : /// Return true if the comparison instruction can be analyzed.
     982        1143 : bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
     983             :                                       unsigned &SrcReg2, int &CmpMask,
     984             :                                       int &CmpValue) const {
     985        2286 :   switch (MI.getOpcode()) {
     986             :   default:
     987             :     break;
     988         567 :   case AArch64::SUBSWrr:
     989             :   case AArch64::SUBSWrs:
     990             :   case AArch64::SUBSWrx:
     991             :   case AArch64::SUBSXrr:
     992             :   case AArch64::SUBSXrs:
     993             :   case AArch64::SUBSXrx:
     994             :   case AArch64::ADDSWrr:
     995             :   case AArch64::ADDSWrs:
     996             :   case AArch64::ADDSWrx:
     997             :   case AArch64::ADDSXrr:
     998             :   case AArch64::ADDSXrs:
     999             :   case AArch64::ADDSXrx:
    1000             :     // Replace SUBSWrr with SUBWrr if NZCV is not used.
    1001         567 :     SrcReg = MI.getOperand(1).getReg();
    1002         567 :     SrcReg2 = MI.getOperand(2).getReg();
    1003         567 :     CmpMask = ~0;
    1004         567 :     CmpValue = 0;
    1005         567 :     return true;
    1006         548 :   case AArch64::SUBSWri:
    1007             :   case AArch64::ADDSWri:
    1008             :   case AArch64::SUBSXri:
    1009             :   case AArch64::ADDSXri:
    1010         548 :     SrcReg = MI.getOperand(1).getReg();
    1011         548 :     SrcReg2 = 0;
    1012         548 :     CmpMask = ~0;
    1013             :     // FIXME: In order to convert CmpValue to 0 or 1
    1014         548 :     CmpValue = MI.getOperand(2).getImm() != 0;
    1015         548 :     return true;
    1016          28 :   case AArch64::ANDSWri:
    1017             :   case AArch64::ANDSXri:
    1018             :     // ANDS does not use the same encoding scheme as the others xxxS
    1019             :     // instructions.
    1020          28 :     SrcReg = MI.getOperand(1).getReg();
    1021          28 :     SrcReg2 = 0;
    1022          28 :     CmpMask = ~0;
    1023             :     // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
    1024             :     // while the type of CmpValue is int. When converting uint64_t to int,
    1025             :     // the high 32 bits of uint64_t will be lost.
    1026             :     // In fact it causes a bug in spec2006-483.xalancbmk
    1027             :     // CmpValue is only used to compare with zero in OptimizeCompareInstr
    1028          84 :     CmpValue = AArch64_AM::decodeLogicalImmediate(
    1029          28 :                    MI.getOperand(2).getImm(),
    1030          56 :                    MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
    1031          28 :     return true;
    1032             :   }
    1033             : 
    1034             :   return false;
    1035             : }
    1036             : 
    1037         304 : static bool UpdateOperandRegClass(MachineInstr &Instr) {
    1038         304 :   MachineBasicBlock *MBB = Instr.getParent();
    1039             :   assert(MBB && "Can't get MachineBasicBlock here");
    1040         304 :   MachineFunction *MF = MBB->getParent();
    1041             :   assert(MF && "Can't get MachineFunction here");
    1042         304 :   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
    1043         304 :   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
    1044         304 :   MachineRegisterInfo *MRI = &MF->getRegInfo();
    1045             : 
    1046        1400 :   for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
    1047             :        ++OpIdx) {
    1048        2192 :     MachineOperand &MO = Instr.getOperand(OpIdx);
    1049             :     const TargetRegisterClass *OpRegCstraints =
    1050        1096 :         Instr.getRegClassConstraint(OpIdx, TII, TRI);
    1051             : 
    1052             :     // If there's no constraint, there's nothing to do.
    1053        1096 :     if (!OpRegCstraints)
    1054         315 :       continue;
    1055             :     // If the operand is a frame index, there's nothing to do here.
    1056             :     // A frame index operand will resolve correctly during PEI.
    1057         781 :     if (MO.isFI())
    1058           2 :       continue;
    1059             : 
    1060             :     assert(MO.isReg() &&
    1061             :            "Operand has register constraints without being a register!");
    1062             : 
    1063         779 :     unsigned Reg = MO.getReg();
    1064         779 :     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
    1065           0 :       if (!OpRegCstraints->contains(Reg))
    1066             :         return false;
    1067        1651 :     } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
    1068          93 :                !MRI->constrainRegClass(Reg, OpRegCstraints))
    1069             :       return false;
    1070             :   }
    1071             : 
    1072             :   return true;
    1073             : }
    1074             : 
    1075             : /// \brief Return the opcode that does not set flags when possible - otherwise
    1076             : /// return the original opcode. The caller is responsible to do the actual
    1077             : /// substitution and legality checking.
    1078         743 : static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
    1079             :   // Don't convert all compare instructions, because for some the zero register
    1080             :   // encoding becomes the sp register.
    1081         743 :   bool MIDefinesZeroReg = false;
    1082        1486 :   if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
    1083             :     MIDefinesZeroReg = true;
    1084             : 
    1085        1486 :   switch (MI.getOpcode()) {
    1086           0 :   default:
    1087           0 :     return MI.getOpcode();
    1088             :   case AArch64::ADDSWrr:
    1089             :     return AArch64::ADDWrr;
    1090           0 :   case AArch64::ADDSWri:
    1091           0 :     return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
    1092           0 :   case AArch64::ADDSWrs:
    1093           0 :     return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
    1094           0 :   case AArch64::ADDSWrx:
    1095           0 :     return AArch64::ADDWrx;
    1096           0 :   case AArch64::ADDSXrr:
    1097           0 :     return AArch64::ADDXrr;
    1098           0 :   case AArch64::ADDSXri:
    1099           0 :     return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
    1100           0 :   case AArch64::ADDSXrs:
    1101           0 :     return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
    1102           0 :   case AArch64::ADDSXrx:
    1103           0 :     return AArch64::ADDXrx;
    1104         234 :   case AArch64::SUBSWrr:
    1105         234 :     return AArch64::SUBWrr;
    1106         100 :   case AArch64::SUBSWri:
    1107         100 :     return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
    1108          23 :   case AArch64::SUBSWrs:
    1109          23 :     return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
    1110           8 :   case AArch64::SUBSWrx:
    1111           8 :     return AArch64::SUBWrx;
    1112         152 :   case AArch64::SUBSXrr:
    1113         152 :     return AArch64::SUBXrr;
    1114         204 :   case AArch64::SUBSXri:
    1115         204 :     return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
    1116          10 :   case AArch64::SUBSXrs:
    1117          10 :     return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
    1118          12 :   case AArch64::SUBSXrx:
    1119          12 :     return AArch64::SUBXrx;
    1120             :   }
    1121             : }
    1122             : 
    1123             : enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
    1124             : 
    1125             : /// True when condition flags are accessed (either by writing or reading)
    1126             : /// on the instruction trace starting at From and ending at To.
    1127             : ///
    1128             : /// Note: If From and To are from different blocks it's assumed CC are accessed
    1129             : ///       on the path.
    1130          32 : static bool areCFlagsAccessedBetweenInstrs(
    1131             :     MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
    1132             :     const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
    1133             :   // Early exit if To is at the beginning of the BB.
    1134          96 :   if (To == To->getParent()->begin())
    1135             :     return true;
    1136             : 
    1137             :   // Check whether the instructions are in the same basic block
    1138             :   // If not, assume the condition flags might get modified somewhere.
    1139          64 :   if (To->getParent() != From->getParent())
    1140             :     return true;
    1141             : 
    1142             :   // From must be above To.
    1143             :   assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
    1144             :                       [From](MachineInstr &MI) {
    1145             :                         return MI.getIterator() == From;
    1146             :                       }) != To->getParent()->rend());
    1147             : 
    1148             :   // We iterate backward starting \p To until we hit \p From.
    1149         129 :   for (--To; To != From; --To) {
    1150          97 :     const MachineInstr &Instr = *To;
    1151             : 
    1152         194 :     if (((AccessToCheck & AK_Write) &&
    1153         291 :          Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
    1154         194 :         ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
    1155             :       return true;
    1156             :   }
    1157             :   return false;
    1158             : }
    1159             : 
    1160             : /// Try to optimize a compare instruction. A compare instruction is an
    1161             : /// instruction which produces AArch64::NZCV. It can be truly compare
    1162             : /// instruction
    1163             : /// when there are no uses of its destination register.
    1164             : ///
    1165             : /// The following steps are tried in order:
    1166             : /// 1. Convert CmpInstr into an unconditional version.
    1167             : /// 2. Remove CmpInstr if above there is an instruction producing a needed
    1168             : ///    condition code or an instruction which can be converted into such an
    1169             : ///    instruction.
    1170             : ///    Only comparison with zero is supported.
    1171        1137 : bool AArch64InstrInfo::optimizeCompareInstr(
    1172             :     MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
    1173             :     int CmpValue, const MachineRegisterInfo *MRI) const {
    1174             :   assert(CmpInstr.getParent());
    1175             :   assert(MRI);
    1176             : 
    1177             :   // Replace SUBSWrr with SUBWrr if NZCV is not used.
    1178        1137 :   int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
    1179        1137 :   if (DeadNZCVIdx != -1) {
    1180         598 :     if (CmpInstr.definesRegister(AArch64::WZR) ||
    1181         299 :         CmpInstr.definesRegister(AArch64::XZR)) {
    1182           0 :       CmpInstr.eraseFromParent();
    1183           0 :       return true;
    1184             :     }
    1185         598 :     unsigned Opc = CmpInstr.getOpcode();
    1186         299 :     unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
    1187         299 :     if (NewOpc == Opc)
    1188             :       return false;
    1189         598 :     const MCInstrDesc &MCID = get(NewOpc);
    1190         598 :     CmpInstr.setDesc(MCID);
    1191         299 :     CmpInstr.RemoveOperand(DeadNZCVIdx);
    1192         299 :     bool succeeded = UpdateOperandRegClass(CmpInstr);
    1193             :     (void)succeeded;
    1194             :     assert(succeeded && "Some operands reg class are incompatible!");
    1195         299 :     return true;
    1196             :   }
    1197             : 
    1198             :   // Continue only if we have a "ri" where immediate is zero.
    1199             :   // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
    1200             :   // function.
    1201             :   assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
    1202         838 :   if (CmpValue != 0 || SrcReg2 != 0)
    1203             :     return false;
    1204             : 
    1205             :   // CmpInstr is a Compare instruction if destination register is not used.
    1206         254 :   if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
    1207             :     return false;
    1208             : 
    1209         118 :   return substituteCmpToZero(CmpInstr, SrcReg, MRI);
    1210             : }
    1211             : 
    1212             : /// Get opcode of S version of Instr.
    1213             : /// If Instr is S version its opcode is returned.
    1214             : /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
    1215             : /// or we are not interested in it.
    1216         149 : static unsigned sForm(MachineInstr &Instr) {
    1217         298 :   switch (Instr.getOpcode()) {
    1218             :   default:
    1219             :     return AArch64::INSTRUCTION_LIST_END;
    1220             : 
    1221           0 :   case AArch64::ADDSWrr:
    1222             :   case AArch64::ADDSWri:
    1223             :   case AArch64::ADDSXrr:
    1224             :   case AArch64::ADDSXri:
    1225             :   case AArch64::SUBSWrr:
    1226             :   case AArch64::SUBSWri:
    1227             :   case AArch64::SUBSXrr:
    1228             :   case AArch64::SUBSXri:
    1229           0 :     return Instr.getOpcode();
    1230             : 
    1231           0 :   case AArch64::ADDWrr:
    1232           0 :     return AArch64::ADDSWrr;
    1233           2 :   case AArch64::ADDWri:
    1234           2 :     return AArch64::ADDSWri;
    1235           0 :   case AArch64::ADDXrr:
    1236           0 :     return AArch64::ADDSXrr;
    1237           4 :   case AArch64::ADDXri:
    1238           4 :     return AArch64::ADDSXri;
    1239           0 :   case AArch64::ADCWr:
    1240           0 :     return AArch64::ADCSWr;
    1241           0 :   case AArch64::ADCXr:
    1242           0 :     return AArch64::ADCSXr;
    1243           6 :   case AArch64::SUBWrr:
    1244           6 :     return AArch64::SUBSWrr;
    1245           0 :   case AArch64::SUBWri:
    1246           0 :     return AArch64::SUBSWri;
    1247           4 :   case AArch64::SUBXrr:
    1248           4 :     return AArch64::SUBSXrr;
    1249          44 :   case AArch64::SUBXri:
    1250          44 :     return AArch64::SUBSXri;
    1251           0 :   case AArch64::SBCWr:
    1252           0 :     return AArch64::SBCSWr;
    1253           0 :   case AArch64::SBCXr:
    1254           0 :     return AArch64::SBCSXr;
    1255           2 :   case AArch64::ANDWri:
    1256           2 :     return AArch64::ANDSWri;
    1257           0 :   case AArch64::ANDXri:
    1258           0 :     return AArch64::ANDSXri;
    1259             :   }
    1260             : }
    1261             : 
    1262             : /// Check if AArch64::NZCV should be alive in successors of MBB.
    1263          31 : static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
    1264         104 :   for (auto *BB : MBB->successors())
    1265          42 :     if (BB->isLiveIn(AArch64::NZCV))
    1266             :       return true;
    1267             :   return false;
    1268             : }
    1269             : 
    1270             : namespace {
    1271             : 
    1272             : struct UsedNZCV {
    1273             :   bool N = false;
    1274             :   bool Z = false;
    1275             :   bool C = false;
    1276             :   bool V = false;
    1277             : 
    1278             :   UsedNZCV() = default;
    1279             : 
    1280             :   UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
    1281          35 :     this->N |= UsedFlags.N;
    1282          35 :     this->Z |= UsedFlags.Z;
    1283          35 :     this->C |= UsedFlags.C;
    1284          35 :     this->V |= UsedFlags.V;
    1285             :     return *this;
    1286             :   }
    1287             : };
    1288             : 
    1289             : } // end anonymous namespace
    1290             : 
    1291             : /// Find a condition code used by the instruction.
    1292             : /// Returns AArch64CC::Invalid if either the instruction does not use condition
    1293             : /// codes or we don't optimize CmpInstr in the presence of such instructions.
    1294          35 : static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
    1295          70 :   switch (Instr.getOpcode()) {
    1296             :   default:
    1297             :     return AArch64CC::Invalid;
    1298             : 
    1299          20 :   case AArch64::Bcc: {
    1300          20 :     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
    1301             :     assert(Idx >= 2);
    1302          40 :     return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
    1303             :   }
    1304             : 
    1305          15 :   case AArch64::CSINVWr:
    1306             :   case AArch64::CSINVXr:
    1307             :   case AArch64::CSINCWr:
    1308             :   case AArch64::CSINCXr:
    1309             :   case AArch64::CSELWr:
    1310             :   case AArch64::CSELXr:
    1311             :   case AArch64::CSNEGWr:
    1312             :   case AArch64::CSNEGXr:
    1313             :   case AArch64::FCSELSrrr:
    1314             :   case AArch64::FCSELDrrr: {
    1315          15 :     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
    1316             :     assert(Idx >= 1);
    1317          30 :     return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
    1318             :   }
    1319             :   }
    1320             : }
    1321             : 
    1322             : static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
    1323             :   assert(CC != AArch64CC::Invalid);
    1324          35 :   UsedNZCV UsedFlags;
    1325             :   switch (CC) {
    1326             :   default:
    1327             :     break;
    1328             : 
    1329             :   case AArch64CC::EQ: // Z set
    1330             :   case AArch64CC::NE: // Z clear
    1331             :     UsedFlags.Z = true;
    1332             :     break;
    1333             : 
    1334             :   case AArch64CC::HI: // Z clear and C set
    1335             :   case AArch64CC::LS: // Z set   or  C clear
    1336             :     UsedFlags.Z = true;
    1337             :     LLVM_FALLTHROUGH;
    1338             :   case AArch64CC::HS: // C set
    1339             :   case AArch64CC::LO: // C clear
    1340             :     UsedFlags.C = true;
    1341             :     break;
    1342             : 
    1343             :   case AArch64CC::MI: // N set
    1344             :   case AArch64CC::PL: // N clear
    1345             :     UsedFlags.N = true;
    1346             :     break;
    1347             : 
    1348             :   case AArch64CC::VS: // V set
    1349             :   case AArch64CC::VC: // V clear
    1350             :     UsedFlags.V = true;
    1351             :     break;
    1352             : 
    1353             :   case AArch64CC::GT: // Z clear, N and V the same
    1354             :   case AArch64CC::LE: // Z set,   N and V differ
    1355             :     UsedFlags.Z = true;
    1356             :     LLVM_FALLTHROUGH;
    1357             :   case AArch64CC::GE: // N and V the same
    1358             :   case AArch64CC::LT: // N and V differ
    1359             :     UsedFlags.N = true;
    1360             :     UsedFlags.V = true;
    1361             :     break;
    1362             :   }
    1363             :   return UsedFlags;
    1364             : }
    1365             : 
    1366             : static bool isADDSRegImm(unsigned Opcode) {
    1367          31 :   return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
    1368             : }
    1369             : 
    1370             : static bool isSUBSRegImm(unsigned Opcode) {
    1371          31 :   return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
    1372             : }
    1373             : 
    1374             : /// Check if CmpInstr can be substituted by MI.
    1375             : ///
    1376             : /// CmpInstr can be substituted:
    1377             : /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
    1378             : /// - and, MI and CmpInstr are from the same MachineBB
    1379             : /// - and, condition flags are not alive in successors of the CmpInstr parent
    1380             : /// - and, if MI opcode is the S form there must be no defs of flags between
    1381             : ///        MI and CmpInstr
    1382             : ///        or if MI opcode is not the S form there must be neither defs of flags
    1383             : ///        nor uses of flags between MI and CmpInstr.
    1384             : /// - and  C/V flags are not used after CmpInstr
    1385          31 : static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
    1386             :                                        const TargetRegisterInfo *TRI) {
    1387             :   assert(MI);
    1388             :   assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
    1389             :   assert(CmpInstr);
    1390             : 
    1391          62 :   const unsigned CmpOpcode = CmpInstr->getOpcode();
    1392          62 :   if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
    1393             :     return false;
    1394             : 
    1395          31 :   if (MI->getParent() != CmpInstr->getParent())
    1396             :     return false;
    1397             : 
    1398          31 :   if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
    1399             :     return false;
    1400             : 
    1401          31 :   AccessKind AccessToCheck = AK_Write;
    1402          62 :   if (sForm(*MI) != MI->getOpcode())
    1403          31 :     AccessToCheck = AK_All;
    1404          62 :   if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
    1405             :     return false;
    1406             : 
    1407          31 :   UsedNZCV NZCVUsedAfterCmp;
    1408          93 :   for (auto I = std::next(CmpInstr->getIterator()),
    1409          62 :             E = CmpInstr->getParent()->instr_end();
    1410         115 :        I != E; ++I) {
    1411          85 :     const MachineInstr &Instr = *I;
    1412          85 :     if (Instr.readsRegister(AArch64::NZCV, TRI)) {
    1413          35 :       AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
    1414          35 :       if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
    1415             :         return false;
    1416          70 :       NZCVUsedAfterCmp |= getUsedNZCV(CC);
    1417             :     }
    1418             : 
    1419          85 :     if (Instr.modifiesRegister(AArch64::NZCV, TRI))
    1420             :       break;
    1421             :   }
    1422             : 
    1423          31 :   return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
    1424             : }
    1425             : 
    1426             : /// Substitute an instruction comparing to zero with another instruction
    1427             : /// which produces needed condition flags.
    1428             : ///
    1429             : /// Return true on success.
    1430         118 : bool AArch64InstrInfo::substituteCmpToZero(
    1431             :     MachineInstr &CmpInstr, unsigned SrcReg,
    1432             :     const MachineRegisterInfo *MRI) const {
    1433             :   assert(MRI);
    1434             :   // Get the unique definition of SrcReg.
    1435         118 :   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
    1436         118 :   if (!MI)
    1437             :     return false;
    1438             : 
    1439         118 :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    1440             : 
    1441         118 :   unsigned NewOpc = sForm(*MI);
    1442         118 :   if (NewOpc == AArch64::INSTRUCTION_LIST_END)
    1443             :     return false;
    1444             : 
    1445          31 :   if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
    1446             :     return false;
    1447             : 
    1448             :   // Update the instruction to set NZCV.
    1449          15 :   MI->setDesc(get(NewOpc));
    1450           5 :   CmpInstr.eraseFromParent();
    1451           5 :   bool succeeded = UpdateOperandRegClass(*MI);
    1452             :   (void)succeeded;
    1453             :   assert(succeeded && "Some operands reg class are incompatible!");
    1454           5 :   MI->addRegisterDefined(AArch64::NZCV, TRI);
    1455           5 :   return true;
    1456             : }
    1457             : 
    1458        9144 : bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
    1459       18288 :   if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
    1460             :     return false;
    1461             : 
    1462          19 :   MachineBasicBlock &MBB = *MI.getParent();
    1463          38 :   DebugLoc DL = MI.getDebugLoc();
    1464          19 :   unsigned Reg = MI.getOperand(0).getReg();
    1465             :   const GlobalValue *GV =
    1466          57 :       cast<GlobalValue>((*MI.memoperands_begin())->getValue());
    1467          19 :   const TargetMachine &TM = MBB.getParent()->getTarget();
    1468          19 :   unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
    1469          19 :   const unsigned char MO_NC = AArch64II::MO_NC;
    1470             : 
    1471          19 :   if ((OpFlags & AArch64II::MO_GOT) != 0) {
    1472          39 :     BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
    1473          13 :         .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
    1474          39 :     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
    1475          13 :         .addReg(Reg, RegState::Kill)
    1476          13 :         .addImm(0)
    1477          26 :         .addMemOperand(*MI.memoperands_begin());
    1478           6 :   } else if (TM.getCodeModel() == CodeModel::Large) {
    1479           4 :     BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
    1480           2 :         .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
    1481           2 :         .addImm(0);
    1482           6 :     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
    1483           2 :         .addReg(Reg, RegState::Kill)
    1484           2 :         .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
    1485           2 :         .addImm(16);
    1486           6 :     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
    1487           2 :         .addReg(Reg, RegState::Kill)
    1488           2 :         .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
    1489           2 :         .addImm(32);
    1490           6 :     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
    1491           2 :         .addReg(Reg, RegState::Kill)
    1492           2 :         .addGlobalAddress(GV, 0, AArch64II::MO_G3)
    1493           2 :         .addImm(48);
    1494           6 :     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
    1495           2 :         .addReg(Reg, RegState::Kill)
    1496           2 :         .addImm(0)
    1497           4 :         .addMemOperand(*MI.memoperands_begin());
    1498             :   } else {
    1499          12 :     BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
    1500           8 :         .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
    1501           4 :     unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
    1502          12 :     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
    1503           4 :         .addReg(Reg, RegState::Kill)
    1504           8 :         .addGlobalAddress(GV, 0, LoFlags)
    1505           8 :         .addMemOperand(*MI.memoperands_begin());
    1506             :   }
    1507             : 
    1508          19 :   MBB.erase(MI);
    1509             : 
    1510          19 :   return true;
    1511             : }
    1512             : 
    1513             : /// Return true if this is this instruction has a non-zero immediate
    1514         428 : bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const {
    1515         856 :   switch (MI.getOpcode()) {
    1516             :   default:
    1517             :     break;
    1518         364 :   case AArch64::ADDSWrs:
    1519             :   case AArch64::ADDSXrs:
    1520             :   case AArch64::ADDWrs:
    1521             :   case AArch64::ADDXrs:
    1522             :   case AArch64::ANDSWrs:
    1523             :   case AArch64::ANDSXrs:
    1524             :   case AArch64::ANDWrs:
    1525             :   case AArch64::ANDXrs:
    1526             :   case AArch64::BICSWrs:
    1527             :   case AArch64::BICSXrs:
    1528             :   case AArch64::BICWrs:
    1529             :   case AArch64::BICXrs:
    1530             :   case AArch64::EONWrs:
    1531             :   case AArch64::EONXrs:
    1532             :   case AArch64::EORWrs:
    1533             :   case AArch64::EORXrs:
    1534             :   case AArch64::ORNWrs:
    1535             :   case AArch64::ORNXrs:
    1536             :   case AArch64::ORRWrs:
    1537             :   case AArch64::ORRXrs:
    1538             :   case AArch64::SUBSWrs:
    1539             :   case AArch64::SUBSXrs:
    1540             :   case AArch64::SUBWrs:
    1541             :   case AArch64::SUBXrs:
    1542         728 :     if (MI.getOperand(3).isImm()) {
    1543         364 :       unsigned val = MI.getOperand(3).getImm();
    1544         364 :       return (val != 0);
    1545           0 :     }
    1546             :     break;
    1547             :   }
    1548             :   return false;
    1549             : }
    1550             : 
    1551             : /// Return true if this is this instruction has a non-zero immediate
    1552          15 : bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) const {
    1553          30 :   switch (MI.getOpcode()) {
    1554             :   default:
    1555             :     break;
    1556          15 :   case AArch64::ADDSWrx:
    1557             :   case AArch64::ADDSXrx:
    1558             :   case AArch64::ADDSXrx64:
    1559             :   case AArch64::ADDWrx:
    1560             :   case AArch64::ADDXrx:
    1561             :   case AArch64::ADDXrx64:
    1562             :   case AArch64::SUBSWrx:
    1563             :   case AArch64::SUBSXrx:
    1564             :   case AArch64::SUBSXrx64:
    1565             :   case AArch64::SUBWrx:
    1566             :   case AArch64::SUBXrx:
    1567             :   case AArch64::SUBXrx64:
    1568          30 :     if (MI.getOperand(3).isImm()) {
    1569          15 :       unsigned val = MI.getOperand(3).getImm();
    1570          15 :       return (val != 0);
    1571           0 :     }
    1572             :     break;
    1573             :   }
    1574             : 
    1575             :   return false;
    1576             : }
    1577             : 
    1578             : // Return true if this instruction simply sets its single destination register
    1579             : // to zero. This is equivalent to a register rename of the zero-register.
    1580          94 : bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) const {
    1581         188 :   switch (MI.getOpcode()) {
    1582             :   default:
    1583             :     break;
    1584          46 :   case AArch64::MOVZWi:
    1585             :   case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
    1586          92 :     if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
    1587             :       assert(MI.getDesc().getNumOperands() == 3 &&
    1588             :              MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
    1589             :       return true;
    1590             :     }
    1591             :     break;
    1592          16 :   case AArch64::ANDWri: // and Rd, Rzr, #imm
    1593          16 :     return MI.getOperand(1).getReg() == AArch64::WZR;
    1594          32 :   case AArch64::ANDXri:
    1595          32 :     return MI.getOperand(1).getReg() == AArch64::XZR;
    1596           0 :   case TargetOpcode::COPY:
    1597           0 :     return MI.getOperand(1).getReg() == AArch64::WZR;
    1598             :   }
    1599             :   return false;
    1600             : }
    1601             : 
    1602             : // Return true if this instruction simply renames a general register without
    1603             : // modifying bits.
    1604       10146 : bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) const {
    1605       20292 :   switch (MI.getOpcode()) {
    1606             :   default:
    1607             :     break;
    1608        9941 :   case TargetOpcode::COPY: {
    1609             :     // GPR32 copies will by lowered to ORRXrs
    1610        9941 :     unsigned DstReg = MI.getOperand(0).getReg();
    1611       14017 :     return (AArch64::GPR32RegClass.contains(DstReg) ||
    1612       11158 :             AArch64::GPR64RegClass.contains(DstReg));
    1613             :   }
    1614           0 :   case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
    1615           0 :     if (MI.getOperand(1).getReg() == AArch64::XZR) {
    1616             :       assert(MI.getDesc().getNumOperands() == 4 &&
    1617             :              MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
    1618             :       return true;
    1619             :     }
    1620             :     break;
    1621           0 :   case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
    1622           0 :     if (MI.getOperand(2).getImm() == 0) {
    1623             :       assert(MI.getDesc().getNumOperands() == 4 &&
    1624             :              MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
    1625             :       return true;
    1626             :     }
    1627             :     break;
    1628             :   }
    1629             :   return false;
    1630             : }
    1631             : 
    1632             : // Return true if this instruction simply renames a general register without
    1633             : // modifying bits.
    1634        8753 : bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) const {
    1635       17506 :   switch (MI.getOpcode()) {
    1636             :   default:
    1637             :     break;
    1638        8548 :   case TargetOpcode::COPY: {
    1639             :     // FPR64 copies will by lowered to ORR.16b
    1640        8548 :     unsigned DstReg = MI.getOperand(0).getReg();
    1641        9354 :     return (AArch64::FPR64RegClass.contains(DstReg) ||
    1642        8737 :             AArch64::FPR128RegClass.contains(DstReg));
    1643             :   }
    1644           0 :   case AArch64::ORRv16i8:
    1645           0 :     if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
    1646             :       assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
    1647             :              "invalid ORRv16i8 operands");
    1648             :       return true;
    1649             :     }
    1650             :     break;
    1651             :   }
    1652             :   return false;
    1653             : }
    1654             : 
    1655        8504 : unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
    1656             :                                                int &FrameIndex) const {
    1657       17008 :   switch (MI.getOpcode()) {
    1658             :   default:
    1659             :     break;
    1660         507 :   case AArch64::LDRWui:
    1661             :   case AArch64::LDRXui:
    1662             :   case AArch64::LDRBui:
    1663             :   case AArch64::LDRHui:
    1664             :   case AArch64::LDRSui:
    1665             :   case AArch64::LDRDui:
    1666             :   case AArch64::LDRQui:
    1667        2364 :     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
    1668        1515 :         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
    1669         336 :       FrameIndex = MI.getOperand(1).getIndex();
    1670         336 :       return MI.getOperand(0).getReg();
    1671             :     }
    1672             :     break;
    1673             :   }
    1674             : 
    1675             :   return 0;
    1676             : }
    1677             : 
    1678        4268 : unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
    1679             :                                               int &FrameIndex) const {
    1680        4268 :   switch (MI.getOpcode()) {
    1681             :   default:
    1682             :     break;
    1683         129 :   case AArch64::STRWui:
    1684             :   case AArch64::STRXui:
    1685             :   case AArch64::STRBui:
    1686             :   case AArch64::STRHui:
    1687             :   case AArch64::STRSui:
    1688             :   case AArch64::STRDui:
    1689             :   case AArch64::STRQui:
    1690         552 :     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
    1691         297 :         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
    1692          48 :       FrameIndex = MI.getOperand(1).getIndex();
    1693          48 :       return MI.getOperand(0).getReg();
    1694             :     }
    1695             :     break;
    1696             :   }
    1697             :   return 0;
    1698             : }
    1699             : 
    1700             : /// Return true if this is load/store scales or extends its register offset.
    1701             : /// This refers to scaling a dynamic index as opposed to scaled immediates.
    1702             : /// MI should be a memory op that allows scaled addressing.
    1703         597 : bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const {
    1704        1194 :   switch (MI.getOpcode()) {
    1705             :   default:
    1706             :     break;
    1707         553 :   case AArch64::LDRBBroW:
    1708             :   case AArch64::LDRBroW:
    1709             :   case AArch64::LDRDroW:
    1710             :   case AArch64::LDRHHroW:
    1711             :   case AArch64::LDRHroW:
    1712             :   case AArch64::LDRQroW:
    1713             :   case AArch64::LDRSBWroW:
    1714             :   case AArch64::LDRSBXroW:
    1715             :   case AArch64::LDRSHWroW:
    1716             :   case AArch64::LDRSHXroW:
    1717             :   case AArch64::LDRSWroW:
    1718             :   case AArch64::LDRSroW:
    1719             :   case AArch64::LDRWroW:
    1720             :   case AArch64::LDRXroW:
    1721             :   case AArch64::STRBBroW:
    1722             :   case AArch64::STRBroW:
    1723             :   case AArch64::STRDroW:
    1724             :   case AArch64::STRHHroW:
    1725             :   case AArch64::STRHroW:
    1726             :   case AArch64::STRQroW:
    1727             :   case AArch64::STRSroW:
    1728             :   case AArch64::STRWroW:
    1729             :   case AArch64::STRXroW:
    1730             :   case AArch64::LDRBBroX:
    1731             :   case AArch64::LDRBroX:
    1732             :   case AArch64::LDRDroX:
    1733             :   case AArch64::LDRHHroX:
    1734             :   case AArch64::LDRHroX:
    1735             :   case AArch64::LDRQroX:
    1736             :   case AArch64::LDRSBWroX:
    1737             :   case AArch64::LDRSBXroX:
    1738             :   case AArch64::LDRSHWroX:
    1739             :   case AArch64::LDRSHXroX:
    1740             :   case AArch64::LDRSWroX:
    1741             :   case AArch64::LDRSroX:
    1742             :   case AArch64::LDRWroX:
    1743             :   case AArch64::LDRXroX:
    1744             :   case AArch64::STRBBroX:
    1745             :   case AArch64::STRBroX:
    1746             :   case AArch64::STRDroX:
    1747             :   case AArch64::STRHHroX:
    1748             :   case AArch64::STRHroX:
    1749             :   case AArch64::STRQroX:
    1750             :   case AArch64::STRSroX:
    1751             :   case AArch64::STRWroX:
    1752             :   case AArch64::STRXroX:
    1753             : 
    1754         553 :     unsigned Val = MI.getOperand(3).getImm();
    1755         553 :     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
    1756         553 :     return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
    1757             :   }
    1758             :   return false;
    1759             : }
    1760             : 
    1761             : /// Check all MachineMemOperands for a hint to suppress pairing.
    1762       18357 : bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const {
    1763       18357 :   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
    1764       12166 :     return MMO->getFlags() & MOSuppressPair;
    1765       48880 :   });
    1766             : }
    1767             : 
    1768             : /// Set a flag on the first MachineMemOperand to suppress pairing.
    1769          11 : void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const {
    1770          11 :   if (MI.memoperands_empty())
    1771             :     return;
    1772          11 :   (*MI.memoperands_begin())->setFlags(MOSuppressPair);
    1773             : }
    1774             : 
    1775             : /// Check all MachineMemOperands for a hint that the load/store is strided.
    1776          33 : bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) const {
    1777          33 :   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
    1778          13 :     return MMO->getFlags() & MOStridedAccess;
    1779          79 :   });
    1780             : }
    1781             : 
    1782       30072 : bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
    1783       30072 :   switch (Opc) {
    1784             :   default:
    1785             :     return false;
    1786        1079 :   case AArch64::STURSi:
    1787             :   case AArch64::STURDi:
    1788             :   case AArch64::STURQi:
    1789             :   case AArch64::STURBBi:
    1790             :   case AArch64::STURHHi:
    1791             :   case AArch64::STURWi:
    1792             :   case AArch64::STURXi:
    1793             :   case AArch64::LDURSi:
    1794             :   case AArch64::LDURDi:
    1795             :   case AArch64::LDURQi:
    1796             :   case AArch64::LDURWi:
    1797             :   case AArch64::LDURXi:
    1798             :   case AArch64::LDURSWi:
    1799             :   case AArch64::LDURHHi:
    1800             :   case AArch64::LDURBBi:
    1801             :   case AArch64::LDURSBWi:
    1802             :   case AArch64::LDURSHWi:
    1803        1079 :     return true;
    1804             :   }
    1805             : }
    1806             : 
    1807       14654 : bool AArch64InstrInfo::isUnscaledLdSt(MachineInstr &MI) const {
    1808       29308 :   return isUnscaledLdSt(MI.getOpcode());
    1809             : }
    1810             : 
    1811             : // Is this a candidate for ld/st merging or pairing?  For example, we don't
    1812             : // touch volatiles or load/stores that have a hint to avoid pair formation.
    1813        8772 : bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
    1814             :   // If this is a volatile load/store, don't mess with it.
    1815        8772 :   if (MI.hasOrderedMemoryRef())
    1816             :     return false;
    1817             : 
    1818             :   // Make sure this is a reg+imm (as opposed to an address reloc).
    1819             :   assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
    1820       14666 :   if (!MI.getOperand(2).isImm())
    1821             :     return false;
    1822             : 
    1823             :   // Can't merge/pair if the instruction modifies the base register.
    1824             :   // e.g., ldr x0, [x0]
    1825        6726 :   unsigned BaseReg = MI.getOperand(1).getReg();
    1826        6726 :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    1827        6726 :   if (MI.modifiesRegister(BaseReg, TRI))
    1828             :     return false;
    1829             : 
    1830             :   // Check if this load/store has a hint to avoid pair formation.
    1831             :   // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
    1832        6618 :   if (isLdStPairSuppressed(MI))
    1833             :     return false;
    1834             : 
    1835             :   // On some CPUs quad load/store pairs are slower than two single load/stores.
    1836        6603 :   if (Subtarget.isPaired128Slow()) {
    1837         204 :     switch (MI.getOpcode()) {
    1838             :     default:
    1839             :       break;
    1840             :     case AArch64::LDURQi:
    1841             :     case AArch64::STURQi:
    1842             :     case AArch64::LDRQui:
    1843             :     case AArch64::STRQui:
    1844             :       return false;
    1845             :     }
    1846             :   }
    1847             : 
    1848             :   return true;
    1849             : }
    1850             : 
    1851       10956 : bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
    1852             :     MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
    1853             :     const TargetRegisterInfo *TRI) const {
    1854             :   unsigned Width;
    1855       10956 :   return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
    1856             : }
    1857             : 
    1858       23153 : bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
    1859             :     MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
    1860             :     const TargetRegisterInfo *TRI) const {
    1861             :   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
    1862             :   // Handle only loads/stores with base register followed by immediate offset.
    1863       23153 :   if (LdSt.getNumExplicitOperands() == 3) {
    1864             :     // Non-paired instruction (e.g., ldr x1, [x0, #8]).
    1865       55065 :     if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
    1866             :       return false;
    1867        3655 :   } else if (LdSt.getNumExplicitOperands() == 4) {
    1868             :     // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
    1869        7567 :     if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
    1870        3650 :         !LdSt.getOperand(3).isImm())
    1871             :       return false;
    1872             :   } else
    1873             :     return false;
    1874             : 
    1875             :   // Get the scaling factor for the instruction and set the width for the
    1876             :   // instruction.
    1877       14340 :   unsigned Scale = 0;
    1878             :   int64_t Dummy1, Dummy2;
    1879             : 
    1880             :   // If this returns false, then it's an instruction we don't want to handle.
    1881       28680 :   if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
    1882             :     return false;
    1883             : 
    1884             :   // Compute the offset. Offset is calculated as the immediate operand
    1885             :   // multiplied by the scaling factor. Unscaled instructions have scaling factor
    1886             :   // set to 1.
    1887       14022 :   if (LdSt.getNumExplicitOperands() == 3) {
    1888       12966 :     BaseReg = LdSt.getOperand(1).getReg();
    1889       12966 :     Offset = LdSt.getOperand(2).getImm() * Scale;
    1890             :   } else {
    1891             :     assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
    1892        1056 :     BaseReg = LdSt.getOperand(2).getReg();
    1893        1056 :     Offset = LdSt.getOperand(3).getImm() * Scale;
    1894             :   }
    1895             :   return true;
    1896             : }
    1897             : 
    1898             : MachineOperand &
    1899           0 : AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
    1900             :   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
    1901           0 :   MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
    1902             :   assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
    1903           0 :   return OfsOp;
    1904             : }
    1905             : 
    1906       14350 : bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
    1907             :                                     unsigned &Width, int64_t &MinOffset,
    1908             :                                     int64_t &MaxOffset) const {
    1909       14350 :   switch (Opcode) {
    1910             :   // Not a memory operation or something we want to handle.
    1911         318 :   default:
    1912         318 :     Scale = Width = 0;
    1913         318 :     MinOffset = MaxOffset = 0;
    1914         318 :     return false;
    1915          19 :   case AArch64::STRWpost:
    1916             :   case AArch64::LDRWpost:
    1917          19 :     Width = 32;
    1918          19 :     Scale = 4;
    1919          19 :     MinOffset = -256;
    1920          19 :     MaxOffset = 255;
    1921          19 :     break;
    1922         122 :   case AArch64::LDURQi:
    1923             :   case AArch64::STURQi:
    1924         122 :     Width = 16;
    1925         122 :     Scale = 1;
    1926         122 :     MinOffset = -256;
    1927         122 :     MaxOffset = 255;
    1928         122 :     break;
    1929         182 :   case AArch64::LDURXi:
    1930             :   case AArch64::LDURDi:
    1931             :   case AArch64::STURXi:
    1932             :   case AArch64::STURDi:
    1933         182 :     Width = 8;
    1934         182 :     Scale = 1;
    1935         182 :     MinOffset = -256;
    1936         182 :     MaxOffset = 255;
    1937         182 :     break;
    1938         211 :   case AArch64::LDURWi:
    1939             :   case AArch64::LDURSi:
    1940             :   case AArch64::LDURSWi:
    1941             :   case AArch64::STURWi:
    1942             :   case AArch64::STURSi:
    1943         211 :     Width = 4;
    1944         211 :     Scale = 1;
    1945         211 :     MinOffset = -256;
    1946         211 :     MaxOffset = 255;
    1947         211 :     break;
    1948         128 :   case AArch64::LDURHi:
    1949             :   case AArch64::LDURHHi:
    1950             :   case AArch64::LDURSHXi:
    1951             :   case AArch64::LDURSHWi:
    1952             :   case AArch64::STURHi:
    1953             :   case AArch64::STURHHi:
    1954         128 :     Width = 2;
    1955         128 :     Scale = 1;
    1956         128 :     MinOffset = -256;
    1957         128 :     MaxOffset = 255;
    1958         128 :     break;
    1959          90 :   case AArch64::LDURBi:
    1960             :   case AArch64::LDURBBi:
    1961             :   case AArch64::LDURSBXi:
    1962             :   case AArch64::LDURSBWi:
    1963             :   case AArch64::STURBi:
    1964             :   case AArch64::STURBBi:
    1965          90 :     Width = 1;
    1966          90 :     Scale = 1;
    1967          90 :     MinOffset = -256;
    1968          90 :     MaxOffset = 255;
    1969          90 :     break;
    1970         243 :   case AArch64::LDPQi:
    1971             :   case AArch64::LDNPQi:
    1972             :   case AArch64::STPQi:
    1973             :   case AArch64::STNPQi:
    1974         243 :     Scale = 16;
    1975         243 :     Width = 32;
    1976         243 :     MinOffset = -64;
    1977         243 :     MaxOffset = 63;
    1978         243 :     break;
    1979        2912 :   case AArch64::LDRQui:
    1980             :   case AArch64::STRQui:
    1981        2912 :     Scale = Width = 16;
    1982        2912 :     MinOffset = 0;
    1983        2912 :     MaxOffset = 4095;
    1984        2912 :     break;
    1985         624 :   case AArch64::LDPXi:
    1986             :   case AArch64::LDPDi:
    1987             :   case AArch64::LDNPXi:
    1988             :   case AArch64::LDNPDi:
    1989             :   case AArch64::STPXi:
    1990             :   case AArch64::STPDi:
    1991             :   case AArch64::STNPXi:
    1992             :   case AArch64::STNPDi:
    1993         624 :     Scale = 8;
    1994         624 :     Width = 16;
    1995         624 :     MinOffset = -64;
    1996         624 :     MaxOffset = 63;
    1997         624 :     break;
    1998        6523 :   case AArch64::LDRXui:
    1999             :   case AArch64::LDRDui:
    2000             :   case AArch64::STRXui:
    2001             :   case AArch64::STRDui:
    2002        6523 :     Scale = Width = 8;
    2003        6523 :     MinOffset = 0;
    2004        6523 :     MaxOffset = 4095;
    2005        6523 :     break;
    2006         178 :   case AArch64::LDPWi:
    2007             :   case AArch64::LDPSi:
    2008             :   case AArch64::LDNPWi:
    2009             :   case AArch64::LDNPSi:
    2010             :   case AArch64::STPWi:
    2011             :   case AArch64::STPSi:
    2012             :   case AArch64::STNPWi:
    2013             :   case AArch64::STNPSi:
    2014         178 :     Scale = 4;
    2015         178 :     Width = 8;
    2016         178 :     MinOffset = -64;
    2017         178 :     MaxOffset = 63;
    2018         178 :     break;
    2019        1889 :   case AArch64::LDRWui:
    2020             :   case AArch64::LDRSui:
    2021             :   case AArch64::LDRSWui:
    2022             :   case AArch64::STRWui:
    2023             :   case AArch64::STRSui:
    2024        1889 :     Scale = Width = 4;
    2025        1889 :     MinOffset = 0;
    2026        1889 :     MaxOffset = 4095;
    2027        1889 :     break;
    2028         241 :   case AArch64::LDRHui:
    2029             :   case AArch64::LDRHHui:
    2030             :   case AArch64::STRHui:
    2031             :   case AArch64::STRHHui:
    2032         241 :     Scale = Width = 2;
    2033         241 :     MinOffset = 0;
    2034         241 :     MaxOffset = 4095;
    2035         241 :     break;
    2036         670 :   case AArch64::LDRBui:
    2037             :   case AArch64::LDRBBui:
    2038             :   case AArch64::STRBui:
    2039             :   case AArch64::STRBBui:
    2040         670 :     Scale = Width = 1;
    2041         670 :     MinOffset = 0;
    2042         670 :     MaxOffset = 4095;
    2043         670 :     break;
    2044             :   }
    2045             : 
    2046             :   return true;
    2047             : }
    2048             : 
    2049             : // Scale the unscaled offsets.  Returns false if the unscaled offset can't be
    2050             : // scaled.
    2051          51 : static bool scaleOffset(unsigned Opc, int64_t &Offset) {
    2052          51 :   unsigned OffsetStride = 1;
    2053          51 :   switch (Opc) {
    2054             :   default:
    2055             :     return false;
    2056             :   case AArch64::LDURQi:
    2057             :   case AArch64::STURQi:
    2058             :     OffsetStride = 16;
    2059             :     break;
    2060          13 :   case AArch64::LDURXi:
    2061             :   case AArch64::LDURDi:
    2062             :   case AArch64::STURXi:
    2063             :   case AArch64::STURDi:
    2064          13 :     OffsetStride = 8;
    2065          13 :     break;
    2066          37 :   case AArch64::LDURWi:
    2067             :   case AArch64::LDURSi:
    2068             :   case AArch64::LDURSWi:
    2069             :   case AArch64::STURWi:
    2070             :   case AArch64::STURSi:
    2071          37 :     OffsetStride = 4;
    2072          37 :     break;
    2073             :   }
    2074             :   // If the byte-offset isn't a multiple of the stride, we can't scale this
    2075             :   // offset.
    2076          51 :   if (Offset % OffsetStride != 0)
    2077             :     return false;
    2078             : 
    2079             :   // Convert the byte-offset used by unscaled into an "element" offset used
    2080             :   // by the scaled pair load/store instructions.
    2081          46 :   Offset /= OffsetStride;
    2082          46 :   return true;
    2083             : }
    2084             : 
    2085             : static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
    2086         523 :   if (FirstOpc == SecondOpc)
    2087             :     return true;
    2088             :   // We can also pair sign-ext and zero-ext instructions.
    2089          49 :   switch (FirstOpc) {
    2090             :   default:
    2091             :     return false;
    2092           3 :   case AArch64::LDRWui:
    2093             :   case AArch64::LDURWi:
    2094           3 :     return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
    2095           4 :   case AArch64::LDRSWui:
    2096             :   case AArch64::LDURSWi:
    2097           4 :     return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
    2098             :   }
    2099             :   // These instructions can't be paired based on their opcodes.
    2100             :   return false;
    2101             : }
    2102             : 
    2103             : /// Detect opportunities for ldp/stp formation.
    2104             : ///
    2105             : /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
    2106         773 : bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
    2107             :                                            MachineInstr &SecondLdSt,
    2108             :                                            unsigned NumLoads) const {
    2109             :   // Only cluster up to a single pair.
    2110         773 :   if (NumLoads > 1)
    2111             :     return false;
    2112             : 
    2113         594 :   if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
    2114             :     return false;
    2115             : 
    2116             :   // Can we pair these instructions based on their opcodes?
    2117        1046 :   unsigned FirstOpc = FirstLdSt.getOpcode();
    2118        1046 :   unsigned SecondOpc = SecondLdSt.getOpcode();
    2119           7 :   if (!canPairLdStOpc(FirstOpc, SecondOpc))
    2120             :     return false;
    2121             : 
    2122             :   // Can't merge volatiles or load/stores that have a hint to avoid pair
    2123             :   // formation, for example.
    2124         929 :   if (!isCandidateToMergeOrPair(FirstLdSt) ||
    2125         449 :       !isCandidateToMergeOrPair(SecondLdSt))
    2126             :     return false;
    2127             : 
    2128             :   // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
    2129         446 :   int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
    2130         446 :   if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
    2131             :     return false;
    2132             : 
    2133         441 :   int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
    2134         441 :   if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
    2135             :     return false;
    2136             : 
    2137             :   // Pairwise instructions have a 7-bit signed offset field.
    2138         441 :   if (Offset1 > 63 || Offset1 < -64)
    2139             :     return false;
    2140             : 
    2141             :   // The caller should already have ordered First/SecondLdSt by offset.
    2142             :   assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
    2143         427 :   return Offset1 + 1 == Offset2;
    2144             : }
    2145             : 
    2146          87 : static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
    2147             :                                             unsigned Reg, unsigned SubIdx,
    2148             :                                             unsigned State,
    2149             :                                             const TargetRegisterInfo *TRI) {
    2150          87 :   if (!SubIdx)
    2151           0 :     return MIB.addReg(Reg, State);
    2152             : 
    2153          87 :   if (TargetRegisterInfo::isPhysicalRegister(Reg))
    2154          87 :     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
    2155           0 :   return MIB.addReg(Reg, State, SubIdx);
    2156             : }
    2157             : 
    2158             : static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
    2159             :                                         unsigned NumRegs) {
    2160             :   // We really want the positive remainder mod 32 here, that happens to be
    2161             :   // easily obtainable with a mask.
    2162          11 :   return ((DestReg - SrcReg) & 0x1f) < NumRegs;
    2163             : }
    2164             : 
    2165          11 : void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
    2166             :                                         MachineBasicBlock::iterator I,
    2167             :                                         const DebugLoc &DL, unsigned DestReg,
    2168             :                                         unsigned SrcReg, bool KillSrc,
    2169             :                                         unsigned Opcode,
    2170             :                                         ArrayRef<unsigned> Indices) const {
    2171             :   assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
    2172          11 :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    2173          22 :   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
    2174          22 :   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
    2175          11 :   unsigned NumRegs = Indices.size();
    2176             : 
    2177          11 :   int SubReg = 0, End = NumRegs, Incr = 1;
    2178          22 :   if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
    2179           4 :     SubReg = NumRegs - 1;
    2180           4 :     End = -1;
    2181           4 :     Incr = -1;
    2182             :   }
    2183             : 
    2184          69 :   for (; SubReg != End; SubReg += Incr) {
    2185          58 :     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
    2186          58 :     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
    2187          29 :     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
    2188          29 :     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
    2189             :   }
    2190          11 : }
    2191             : 
    2192        3112 : void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
    2193             :                                    MachineBasicBlock::iterator I,
    2194             :                                    const DebugLoc &DL, unsigned DestReg,
    2195             :                                    unsigned SrcReg, bool KillSrc) const {
    2196        6569 :   if (AArch64::GPR32spRegClass.contains(DestReg) &&
    2197        2686 :       (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
    2198         798 :     const TargetRegisterInfo *TRI = &getRegisterInfo();
    2199             : 
    2200         798 :     if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
    2201             :       // If either operand is WSP, expand to ADD #0.
    2202           0 :       if (Subtarget.hasZeroCycleRegMove()) {
    2203             :         // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
    2204             :         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
    2205           0 :                                                      &AArch64::GPR64spRegClass);
    2206             :         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
    2207           0 :                                                     &AArch64::GPR64spRegClass);
    2208             :         // This instruction is reading and writing X registers.  This may upset
    2209             :         // the register scavenger and machine verifier, so we need to indicate
    2210             :         // that we are reading an undefined value from SrcRegX, but a proper
    2211             :         // value from SrcReg.
    2212           0 :         BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
    2213           0 :             .addReg(SrcRegX, RegState::Undef)
    2214           0 :             .addImm(0)
    2215           0 :             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
    2216           0 :             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
    2217             :       } else {
    2218           0 :         BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
    2219           0 :             .addReg(SrcReg, getKillRegState(KillSrc))
    2220           0 :             .addImm(0)
    2221           0 :             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
    2222             :       }
    2223         798 :     } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
    2224          72 :       BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
    2225          36 :           .addImm(0)
    2226          72 :           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
    2227             :     } else {
    2228         762 :       if (Subtarget.hasZeroCycleRegMove()) {
    2229             :         // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
    2230             :         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
    2231         152 :                                                      &AArch64::GPR64spRegClass);
    2232             :         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
    2233         152 :                                                     &AArch64::GPR64spRegClass);
    2234             :         // This instruction is reading and writing X registers.  This may upset
    2235             :         // the register scavenger and machine verifier, so we need to indicate
    2236             :         // that we are reading an undefined value from SrcRegX, but a proper
    2237             :         // value from SrcReg.
    2238         228 :         BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
    2239          76 :             .addReg(AArch64::XZR)
    2240          76 :             .addReg(SrcRegX, RegState::Undef)
    2241          76 :             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
    2242             :       } else {
    2243             :         // Otherwise, expand to ORR WZR.
    2244        2058 :         BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
    2245         686 :             .addReg(AArch64::WZR)
    2246         686 :             .addReg(SrcReg, getKillRegState(KillSrc));
    2247             :       }
    2248             :     }
    2249             :     return;
    2250             :   }
    2251             : 
    2252        5600 :   if (AArch64::GPR64spRegClass.contains(DestReg) &&
    2253        2183 :       (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
    2254         854 :     if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
    2255             :       // If either operand is SP, expand to ADD #0.
    2256         312 :       BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
    2257         104 :           .addReg(SrcReg, getKillRegState(KillSrc))
    2258         104 :           .addImm(0)
    2259         208 :           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
    2260         750 :     } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
    2261          34 :       BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
    2262          17 :           .addImm(0)
    2263          34 :           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
    2264             :     } else {
    2265             :       // Otherwise, expand to ORR XZR.
    2266        2199 :       BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
    2267         733 :           .addReg(AArch64::XZR)
    2268         733 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2269             :     }
    2270             :     return;
    2271             :   }
    2272             : 
    2273             :   // Copy a DDDD register quad by copying the individual sub-registers.
    2274        2914 :   if (AArch64::DDDDRegClass.contains(DestReg) &&
    2275           0 :       AArch64::DDDDRegClass.contains(SrcReg)) {
    2276             :     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
    2277             :                                        AArch64::dsub2, AArch64::dsub3};
    2278           0 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
    2279           0 :                      Indices);
    2280           0 :     return;
    2281             :   }
    2282             : 
    2283             :   // Copy a DDD register triple by copying the individual sub-registers.
    2284        2916 :   if (AArch64::DDDRegClass.contains(DestReg) &&
    2285           2 :       AArch64::DDDRegClass.contains(SrcReg)) {
    2286             :     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
    2287             :                                        AArch64::dsub2};
    2288           1 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
    2289           1 :                      Indices);
    2290           1 :     return;
    2291             :   }
    2292             : 
    2293             :   // Copy a DD register pair by copying the individual sub-registers.
    2294        2916 :   if (AArch64::DDRegClass.contains(DestReg) &&
    2295           8 :       AArch64::DDRegClass.contains(SrcReg)) {
    2296             :     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
    2297           4 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
    2298           4 :                      Indices);
    2299           4 :     return;
    2300             :   }
    2301             : 
    2302             :   // Copy a QQQQ register quad by copying the individual sub-registers.
    2303        2912 :   if (AArch64::QQQQRegClass.contains(DestReg) &&
    2304           4 :       AArch64::QQQQRegClass.contains(SrcReg)) {
    2305             :     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
    2306             :                                        AArch64::qsub2, AArch64::qsub3};
    2307           2 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
    2308           2 :                      Indices);
    2309           2 :     return;
    2310             :   }
    2311             : 
    2312             :   // Copy a QQQ register triple by copying the individual sub-registers.
    2313        2908 :   if (AArch64::QQQRegClass.contains(DestReg) &&
    2314           4 :       AArch64::QQQRegClass.contains(SrcReg)) {
    2315             :     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
    2316             :                                        AArch64::qsub2};
    2317           2 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
    2318           2 :                      Indices);
    2319           2 :     return;
    2320             :   }
    2321             : 
    2322             :   // Copy a QQ register pair by copying the individual sub-registers.
    2323        2904 :   if (AArch64::QQRegClass.contains(DestReg) &&
    2324           4 :       AArch64::QQRegClass.contains(SrcReg)) {
    2325             :     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
    2326           2 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
    2327           2 :                      Indices);
    2328           2 :     return;
    2329             :   }
    2330             : 
    2331        2440 :   if (AArch64::FPR128RegClass.contains(DestReg) &&
    2332         568 :       AArch64::FPR128RegClass.contains(SrcReg)) {
    2333         284 :     if (Subtarget.hasNEON()) {
    2334         849 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2335         283 :           .addReg(SrcReg)
    2336         283 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2337             :     } else {
    2338           3 :       BuildMI(MBB, I, DL, get(AArch64::STRQpre))
    2339           1 :           .addReg(AArch64::SP, RegState::Define)
    2340           1 :           .addReg(SrcReg, getKillRegState(KillSrc))
    2341           1 :           .addReg(AArch64::SP)
    2342           1 :           .addImm(-16);
    2343           3 :       BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
    2344           1 :           .addReg(AArch64::SP, RegState::Define)
    2345           1 :           .addReg(DestReg, RegState::Define)
    2346           1 :           .addReg(AArch64::SP)
    2347           1 :           .addImm(16);
    2348             :     }
    2349             :     return;
    2350             :   }
    2351             : 
    2352        1839 :   if (AArch64::FPR64RegClass.contains(DestReg) &&
    2353         673 :       AArch64::FPR64RegClass.contains(SrcReg)) {
    2354         267 :     if (Subtarget.hasNEON()) {
    2355         528 :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
    2356             :                                        &AArch64::FPR128RegClass);
    2357         528 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
    2358             :                                       &AArch64::FPR128RegClass);
    2359         792 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2360         264 :           .addReg(SrcReg)
    2361         264 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2362             :     } else {
    2363           9 :       BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
    2364           3 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2365             :     }
    2366             :     return;
    2367             :   }
    2368             : 
    2369        1461 :   if (AArch64::FPR32RegClass.contains(DestReg) &&
    2370         407 :       AArch64::FPR32RegClass.contains(SrcReg)) {
    2371          26 :     if (Subtarget.hasNEON()) {
    2372          50 :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
    2373             :                                        &AArch64::FPR128RegClass);
    2374          50 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
    2375             :                                       &AArch64::FPR128RegClass);
    2376          75 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2377          25 :           .addReg(SrcReg)
    2378          25 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2379             :     } else {
    2380           3 :       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
    2381           1 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2382             :     }
    2383             :     return;
    2384             :   }
    2385             : 
    2386        1048 :   if (AArch64::FPR16RegClass.contains(DestReg) &&
    2387          40 :       AArch64::FPR16RegClass.contains(SrcReg)) {
    2388          20 :     if (Subtarget.hasNEON()) {
    2389          40 :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
    2390             :                                        &AArch64::FPR128RegClass);
    2391          40 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
    2392             :                                       &AArch64::FPR128RegClass);
    2393          60 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2394          20 :           .addReg(SrcReg)
    2395          20 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2396             :     } else {
    2397           0 :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
    2398             :                                        &AArch64::FPR32RegClass);
    2399           0 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
    2400             :                                       &AArch64::FPR32RegClass);
    2401           0 :       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
    2402           0 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2403             :     }
    2404             :     return;
    2405             :   }
    2406             : 
    2407         853 :   if (AArch64::FPR8RegClass.contains(DestReg) &&
    2408           0 :       AArch64::FPR8RegClass.contains(SrcReg)) {
    2409           0 :     if (Subtarget.hasNEON()) {
    2410           0 :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
    2411             :                                        &AArch64::FPR128RegClass);
    2412           0 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
    2413             :                                       &AArch64::FPR128RegClass);
    2414           0 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2415           0 :           .addReg(SrcReg)
    2416           0 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2417             :     } else {
    2418           0 :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
    2419             :                                        &AArch64::FPR32RegClass);
    2420           0 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
    2421             :                                       &AArch64::FPR32RegClass);
    2422           0 :       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
    2423           0 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2424             :     }
    2425             :     return;
    2426             :   }
    2427             : 
    2428             :   // Copies between GPR64 and FPR64.
    2429        1123 :   if (AArch64::FPR64RegClass.contains(DestReg) &&
    2430         270 :       AArch64::GPR64RegClass.contains(SrcReg)) {
    2431         405 :     BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
    2432         135 :         .addReg(SrcReg, getKillRegState(KillSrc));
    2433         135 :     return;
    2434             :   }
    2435        1560 :   if (AArch64::GPR64RegClass.contains(DestReg) &&
    2436         252 :       AArch64::FPR64RegClass.contains(SrcReg)) {
    2437         375 :     BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
    2438         125 :         .addReg(SrcReg, getKillRegState(KillSrc));
    2439         125 :     return;
    2440             :   }
    2441             :   // Copies between GPR32 and FPR32.
    2442        1291 :   if (AArch64::FPR32RegClass.contains(DestReg) &&
    2443         698 :       AArch64::GPR32RegClass.contains(SrcReg)) {
    2444        1047 :     BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
    2445         349 :         .addReg(SrcReg, getKillRegState(KillSrc));
    2446         349 :     return;
    2447             :   }
    2448         726 :   if (AArch64::GPR32RegClass.contains(DestReg) &&
    2449         482 :       AArch64::FPR32RegClass.contains(SrcReg)) {
    2450         723 :     BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
    2451         241 :         .addReg(SrcReg, getKillRegState(KillSrc));
    2452         241 :     return;
    2453             :   }
    2454             : 
    2455           2 :   if (DestReg == AArch64::NZCV) {
    2456             :     assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
    2457           3 :     BuildMI(MBB, I, DL, get(AArch64::MSR))
    2458           1 :         .addImm(AArch64SysReg::NZCV)
    2459           1 :         .addReg(SrcReg, getKillRegState(KillSrc))
    2460           1 :         .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
    2461           1 :     return;
    2462             :   }
    2463             : 
    2464           1 :   if (SrcReg == AArch64::NZCV) {
    2465             :     assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
    2466           3 :     BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
    2467           1 :         .addImm(AArch64SysReg::NZCV)
    2468           1 :         .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
    2469           1 :     return;
    2470             :   }
    2471             : 
    2472           0 :   llvm_unreachable("unimplemented reg-to-reg copy");
    2473             : }
    2474             : 
    2475         983 : void AArch64InstrInfo::storeRegToStackSlot(
    2476             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
    2477             :     bool isKill, int FI, const TargetRegisterClass *RC,
    2478             :     const TargetRegisterInfo *TRI) const {
    2479        1966 :   DebugLoc DL;
    2480        1966 :   if (MBBI != MBB.end())
    2481         884 :     DL = MBBI->getDebugLoc();
    2482         983 :   MachineFunction &MF = *MBB.getParent();
    2483         983 :   MachineFrameInfo &MFI = MF.getFrameInfo();
    2484         983 :   unsigned Align = MFI.getObjectAlignment(FI);
    2485             : 
    2486         983 :   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
    2487        1966 :   MachineMemOperand *MMO = MF.getMachineMemOperand(
    2488         983 :       PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
    2489         983 :   unsigned Opc = 0;
    2490         983 :   bool Offset = true;
    2491        1966 :   switch (TRI->getSpillSize(*RC)) {
    2492           0 :   case 1:
    2493           0 :     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
    2494           0 :       Opc = AArch64::STRBui;
    2495             :     break;
    2496           0 :   case 2:
    2497           0 :     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
    2498           0 :       Opc = AArch64::STRHui;
    2499             :     break;
    2500         166 :   case 4:
    2501         332 :     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
    2502         154 :       Opc = AArch64::STRWui;
    2503         154 :       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
    2504          27 :         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
    2505             :       else
    2506             :         assert(SrcReg != AArch64::WSP);
    2507          24 :     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
    2508          12 :       Opc = AArch64::STRSui;
    2509             :     break;
    2510         552 :   case 8:
    2511        1104 :     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
    2512         408 :       Opc = AArch64::STRXui;
    2513         408 :       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
    2514          71 :         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
    2515             :       else
    2516             :         assert(SrcReg != AArch64::SP);
    2517         288 :     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
    2518         144 :       Opc = AArch64::STRDui;
    2519             :     break;
    2520         255 :   case 16:
    2521         510 :     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
    2522             :       Opc = AArch64::STRQui;
    2523           0 :     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
    2524             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2525           0 :       Opc = AArch64::ST1Twov1d;
    2526           0 :       Offset = false;
    2527             :     }
    2528             :     break;
    2529           0 :   case 24:
    2530           0 :     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
    2531             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2532           0 :       Opc = AArch64::ST1Threev1d;
    2533           0 :       Offset = false;
    2534             :     }
    2535             :     break;
    2536           4 :   case 32:
    2537           8 :     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
    2538             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2539             :       Opc = AArch64::ST1Fourv1d;
    2540             :       Offset = false;
    2541           8 :     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
    2542             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2543           4 :       Opc = AArch64::ST1Twov2d;
    2544           4 :       Offset = false;
    2545             :     }
    2546             :     break;
    2547           3 :   case 48:
    2548           6 :     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
    2549             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2550           3 :       Opc = AArch64::ST1Threev2d;
    2551           3 :       Offset = false;
    2552             :     }
    2553             :     break;
    2554           3 :   case 64:
    2555           6 :     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
    2556             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2557           3 :       Opc = AArch64::ST1Fourv2d;
    2558           3 :       Offset = false;
    2559             :     }
    2560             :     break;
    2561             :   }
    2562             :   assert(Opc && "Unknown register class");
    2563             : 
    2564        2949 :   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
    2565         983 :                                      .addReg(SrcReg, getKillRegState(isKill))
    2566         983 :                                      .addFrameIndex(FI);
    2567             : 
    2568         983 :   if (Offset)
    2569             :     MI.addImm(0);
    2570         983 :   MI.addMemOperand(MMO);
    2571         983 : }
    2572             : 
    2573         880 : void AArch64InstrInfo::loadRegFromStackSlot(
    2574             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
    2575             :     int FI, const TargetRegisterClass *RC,
    2576             :     const TargetRegisterInfo *TRI) const {
    2577        1760 :   DebugLoc DL;
    2578        1760 :   if (MBBI != MBB.end())
    2579         880 :     DL = MBBI->getDebugLoc();
    2580         880 :   MachineFunction &MF = *MBB.getParent();
    2581         880 :   MachineFrameInfo &MFI = MF.getFrameInfo();
    2582         880 :   unsigned Align = MFI.getObjectAlignment(FI);
    2583         880 :   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
    2584        1760 :   MachineMemOperand *MMO = MF.getMachineMemOperand(
    2585         880 :       PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
    2586             : 
    2587         880 :   unsigned Opc = 0;
    2588         880 :   bool Offset = true;
    2589        1760 :   switch (TRI->getSpillSize(*RC)) {
    2590           0 :   case 1:
    2591           0 :     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
    2592           0 :       Opc = AArch64::LDRBui;
    2593             :     break;
    2594           0 :   case 2:
    2595           0 :     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
    2596           0 :       Opc = AArch64::LDRHui;
    2597             :     break;
    2598          79 :   case 4:
    2599         158 :     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
    2600          71 :       Opc = AArch64::LDRWui;
    2601          71 :       if (TargetRegisterInfo::isVirtualRegister(DestReg))
    2602          28 :         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
    2603             :       else
    2604             :         assert(DestReg != AArch64::WSP);
    2605          16 :     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
    2606           8 :       Opc = AArch64::LDRSui;
    2607             :     break;
    2608         438 :   case 8:
    2609         876 :     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
    2610         335 :       Opc = AArch64::LDRXui;
    2611         335 :       if (TargetRegisterInfo::isVirtualRegister(DestReg))
    2612          61 :         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
    2613             :       else
    2614             :         assert(DestReg != AArch64::SP);
    2615         206 :     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
    2616         103 :       Opc = AArch64::LDRDui;
    2617             :     break;
    2618         353 :   case 16:
    2619         706 :     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
    2620             :       Opc = AArch64::LDRQui;
    2621           0 :     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
    2622             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2623           0 :       Opc = AArch64::LD1Twov1d;
    2624           0 :       Offset = false;
    2625             :     }
    2626             :     break;
    2627           0 :   case 24:
    2628           0 :     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
    2629             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2630           0 :       Opc = AArch64::LD1Threev1d;
    2631           0 :       Offset = false;
    2632             :     }
    2633             :     break;
    2634           4 :   case 32:
    2635           8 :     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
    2636             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2637             :       Opc = AArch64::LD1Fourv1d;
    2638             :       Offset = false;
    2639           8 :     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
    2640             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2641           4 :       Opc = AArch64::LD1Twov2d;
    2642           4 :       Offset = false;
    2643             :     }
    2644             :     break;
    2645           3 :   case 48:
    2646           6 :     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
    2647             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2648           3 :       Opc = AArch64::LD1Threev2d;
    2649           3 :       Offset = false;
    2650             :     }
    2651             :     break;
    2652           3 :   case 64:
    2653           6 :     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
    2654             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2655           3 :       Opc = AArch64::LD1Fourv2d;
    2656           3 :       Offset = false;
    2657             :     }
    2658             :     break;
    2659             :   }
    2660             :   assert(Opc && "Unknown register class");
    2661             : 
    2662        2640 :   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
    2663         880 :                                      .addReg(DestReg, getDefRegState(true))
    2664         880 :                                      .addFrameIndex(FI);
    2665         880 :   if (Offset)
    2666             :     MI.addImm(0);
    2667         880 :   MI.addMemOperand(MMO);
    2668         880 : }
    2669             : 
    2670       13051 : void llvm::emitFrameOffset(MachineBasicBlock &MBB,
    2671             :                            MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
    2672             :                            unsigned DestReg, unsigned SrcReg, int Offset,
    2673             :                            const TargetInstrInfo *TII,
    2674             :                            MachineInstr::MIFlag Flag, bool SetNZCV) {
    2675       13051 :   if (DestReg == SrcReg && Offset == 0)
    2676             :     return;
    2677             : 
    2678             :   assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
    2679             :          "SP increment/decrement not 16-byte aligned");
    2680             : 
    2681        1943 :   bool isSub = Offset < 0;
    2682        1943 :   if (isSub)
    2683         764 :     Offset = -Offset;
    2684             : 
    2685             :   // FIXME: If the offset won't fit in 24-bits, compute the offset into a
    2686             :   // scratch register.  If DestReg is a virtual register, use it as the
    2687             :   // scratch register; otherwise, create a new virtual register (to be
    2688             :   // replaced by the scavenger at the end of PEI).  That case can be optimized
    2689             :   // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
    2690             :   // register can be loaded with offset%8 and the add/sub can use an extending
    2691             :   // instruction with LSL#3.
    2692             :   // Currently the function handles any offsets but generates a poor sequence
    2693             :   // of code.
    2694             :   //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
    2695             : 
    2696             :   unsigned Opc;
    2697        1943 :   if (SetNZCV)
    2698           2 :     Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
    2699             :   else
    2700        1941 :     Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
    2701        1943 :   const unsigned MaxEncoding = 0xfff;
    2702        1943 :   const unsigned ShiftSize = 12;
    2703        1943 :   const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
    2704        1975 :   while (((unsigned)Offset) >= (1 << ShiftSize)) {
    2705             :     unsigned ThisVal;
    2706          34 :     if (((unsigned)Offset) > MaxEncodableValue) {
    2707             :       ThisVal = MaxEncodableValue;
    2708             :     } else {
    2709          24 :       ThisVal = Offset & MaxEncodableValue;
    2710             :     }
    2711             :     assert((ThisVal >> ShiftSize) <= MaxEncoding &&
    2712             :            "Encoding cannot handle value that big");
    2713         102 :     BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
    2714          34 :         .addReg(SrcReg)
    2715          68 :         .addImm(ThisVal >> ShiftSize)
    2716          68 :         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
    2717          34 :         .setMIFlag(Flag);
    2718             : 
    2719          34 :     SrcReg = DestReg;
    2720          34 :     Offset -= ThisVal;
    2721          34 :     if (Offset == 0)
    2722             :       return;
    2723             :   }
    2724        5823 :   BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
    2725        1941 :       .addReg(SrcReg)
    2726        3882 :       .addImm(Offset)
    2727        3882 :       .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
    2728        1941 :       .setMIFlag(Flag);
    2729             : }
    2730             : 
    2731         714 : MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
    2732             :     MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
    2733             :     MachineBasicBlock::iterator InsertPt, int FrameIndex,
    2734             :     LiveIntervals *LIS) const {
    2735             :   // This is a bit of a hack. Consider this instruction:
    2736             :   //
    2737             :   //   %vreg0<def> = COPY %SP; GPR64all:%vreg0
    2738             :   //
    2739             :   // We explicitly chose GPR64all for the virtual register so such a copy might
    2740             :   // be eliminated by RegisterCoalescer. However, that may not be possible, and
    2741             :   // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
    2742             :   // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
    2743             :   //
    2744             :   // To prevent that, we are going to constrain the %vreg0 register class here.
    2745             :   //
    2746             :   // <rdar://problem/11522048>
    2747             :   //
    2748         354 :   if (MI.isFullCopy()) {
    2749         354 :     unsigned DstReg = MI.getOperand(0).getReg();
    2750         354 :     unsigned SrcReg = MI.getOperand(1).getReg();
    2751         355 :     if (SrcReg == AArch64::SP &&
    2752           1 :         TargetRegisterInfo::isVirtualRegister(DstReg)) {
    2753           1 :       MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
    2754           1 :       return nullptr;
    2755             :     }
    2756         354 :     if (DstReg == AArch64::SP &&
    2757           1 :         TargetRegisterInfo::isVirtualRegister(SrcReg)) {
    2758           1 :       MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
    2759           1 :       return nullptr;
    2760             :     }
    2761             :   }
    2762             : 
    2763             :   // Handle the case where a copy is being spilled or filled but the source
    2764             :   // and destination register class don't match.  For example:
    2765             :   //
    2766             :   //   %vreg0<def> = COPY %XZR; GPR64common:%vreg0
    2767             :   //
    2768             :   // In this case we can still safely fold away the COPY and generate the
    2769             :   // following spill code:
    2770             :   //
    2771             :   //   STRXui %XZR, <fi#0>
    2772             :   //
    2773             :   // This also eliminates spilled cross register class COPYs (e.g. between x and
    2774             :   // d regs) of the same size.  For example:
    2775             :   //
    2776             :   //   %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1
    2777             :   //
    2778             :   // will be filled as
    2779             :   //
    2780             :   //   LDRDui %vreg0, fi<#0>
    2781             :   //
    2782             :   // instead of
    2783             :   //
    2784             :   //   LDRXui %vregTemp, fi<#0>
    2785             :   //   %vreg0 = FMOV %vregTemp
    2786             :   //
    2787        1218 :   if (MI.isCopy() && Ops.size() == 1 &&
    2788             :       // Make sure we're only folding the explicit COPY defs/uses.
    2789         781 :       (Ops[0] == 0 || Ops[0] == 1)) {
    2790         506 :     bool IsSpill = Ops[0] == 0;
    2791         506 :     bool IsFill = !IsSpill;
    2792         506 :     const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
    2793         506 :     const MachineRegisterInfo &MRI = MF.getRegInfo();
    2794         506 :     MachineBasicBlock &MBB = *MI.getParent();
    2795         506 :     const MachineOperand &DstMO = MI.getOperand(0);
    2796        1012 :     const MachineOperand &SrcMO = MI.getOperand(1);
    2797         506 :     unsigned DstReg = DstMO.getReg();
    2798         506 :     unsigned SrcReg = SrcMO.getReg();
    2799             :     // This is slightly expensive to compute for physical regs since
    2800             :     // getMinimalPhysRegClass is slow.
    2801         352 :     auto getRegClass = [&](unsigned Reg) {
    2802         352 :       return TargetRegisterInfo::isVirtualRegister(Reg)
    2803         704 :                  ? MRI.getRegClass(Reg)
    2804         630 :                  : TRI.getMinimalPhysRegClass(Reg);
    2805         858 :     };
    2806             : 
    2807         936 :     if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
    2808             :       assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
    2809             :                  TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
    2810             :              "Mismatched register size in non subreg COPY");
    2811         352 :       if (IsSpill)
    2812         314 :         storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
    2813             :                             getRegClass(SrcReg), &TRI);
    2814             :       else
    2815         195 :         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
    2816             :                              getRegClass(DstReg), &TRI);
    2817        1132 :       return &*--InsertPt;
    2818             :     }
    2819             : 
    2820             :     // Handle cases like spilling def of:
    2821             :     //
    2822             :     //   %vreg0:sub_32<def,read-undef> = COPY %WZR; GPR64common:%vreg0
    2823             :     //
    2824             :     // where the physical register source can be widened and stored to the full
    2825             :     // virtual reg destination stack slot, in this case producing:
    2826             :     //
    2827             :     //   STRXui %XZR, <fi#0>
    2828             :     //
    2829         302 :     if (IsSpill && DstMO.isUndef() &&
    2830          74 :         TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
    2831             :       assert(SrcMO.getSubReg() == 0 &&
    2832             :              "Unexpected subreg on physical register");
    2833             :       const TargetRegisterClass *SpillRC;
    2834             :       unsigned SpillSubreg;
    2835          74 :       switch (DstMO.getSubReg()) {
    2836             :       default:
    2837             :         SpillRC = nullptr;
    2838             :         break;
    2839          47 :       case AArch64::sub_32:
    2840             :       case AArch64::ssub:
    2841          94 :         if (AArch64::GPR32RegClass.contains(SrcReg)) {
    2842             :           SpillRC = &AArch64::GPR64RegClass;
    2843             :           SpillSubreg = AArch64::sub_32;
    2844          88 :         } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
    2845             :           SpillRC = &AArch64::FPR64RegClass;
    2846             :           SpillSubreg = AArch64::ssub;
    2847             :         } else
    2848             :           SpillRC = nullptr;
    2849             :         break;
    2850          27 :       case AArch64::dsub:
    2851          54 :         if (AArch64::FPR64RegClass.contains(SrcReg)) {
    2852             :           SpillRC = &AArch64::FPR128RegClass;
    2853             :           SpillSubreg = AArch64::dsub;
    2854             :         } else
    2855             :           SpillRC = nullptr;
    2856             :         break;
    2857             :       }
    2858             : 
    2859             :       if (SpillRC)
    2860          74 :         if (unsigned WidenedSrcReg =
    2861         148 :                 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
    2862          74 :           storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
    2863             :                               FrameIndex, SpillRC, &TRI);
    2864         148 :           return &*--InsertPt;
    2865             :         }
    2866             :     }
    2867             : 
    2868             :     // Handle cases like filling use of:
    2869             :     //
    2870             :     //   %vreg0:sub_32<def,read-undef> = COPY %vreg1; GPR64:%vreg0, GPR32:%vreg1
    2871             :     //
    2872             :     // where we can load the full virtual reg source stack slot, into the subreg
    2873             :     // destination, in this case producing:
    2874             :     //
    2875             :     //   LDRWui %vreg0:sub_32<def,read-undef>, <fi#0>
    2876             :     //
    2877         162 :     if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
    2878             :       const TargetRegisterClass *FillRC;
    2879           2 :       switch (DstMO.getSubReg()) {
    2880             :       default:
    2881             :         FillRC = nullptr;
    2882             :         break;
    2883             :       case AArch64::sub_32:
    2884             :         FillRC = &AArch64::GPR32RegClass;
    2885             :         break;
    2886           1 :       case AArch64::ssub:
    2887           1 :         FillRC = &AArch64::FPR32RegClass;
    2888           1 :         break;
    2889           0 :       case AArch64::dsub:
    2890           0 :         FillRC = &AArch64::FPR64RegClass;
    2891           0 :         break;
    2892             :       }
    2893             : 
    2894             :       if (FillRC) {
    2895             :         assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
    2896             :                    TRI.getRegSizeInBits(*FillRC) &&
    2897             :                "Mismatched regclass size on folded subreg COPY");
    2898           2 :         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
    2899           4 :         MachineInstr &LoadMI = *--InsertPt;
    2900           2 :         MachineOperand &LoadDst = LoadMI.getOperand(0);
    2901             :         assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
    2902           4 :         LoadDst.setSubReg(DstMO.getSubReg());
    2903           2 :         LoadDst.setIsUndef();
    2904           2 :         return &LoadMI;
    2905             :       }
    2906             :     }
    2907             :   }
    2908             : 
    2909             :   // Cannot fold.
    2910             :   return nullptr;
    2911             : }
    2912             : 
    2913        6966 : int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
    2914             :                                     bool *OutUseUnscaledOp,
    2915             :                                     unsigned *OutUnscaledOp,
    2916             :                                     int *EmittableOffset) {
    2917        6966 :   int Scale = 1;
    2918        6966 :   bool IsSigned = false;
    2919             :   // The ImmIdx should be changed case by case if it is not 2.
    2920        6966 :   unsigned ImmIdx = 2;
    2921        6966 :   unsigned UnscaledOp = 0;
    2922             :   // Set output values in case of early exit.
    2923        6966 :   if (EmittableOffset)
    2924        2974 :     *EmittableOffset = 0;
    2925        6966 :   if (OutUseUnscaledOp)
    2926        2974 :     *OutUseUnscaledOp = false;
    2927        6966 :   if (OutUnscaledOp)
    2928        2974 :     *OutUnscaledOp = 0;
    2929       13932 :   switch (MI.getOpcode()) {
    2930           0 :   default:
    2931           0 :     llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
    2932             :   // Vector spills/fills can't take an immediate offset.
    2933             :   case AArch64::LD1Twov2d:
    2934             :   case AArch64::LD1Threev2d:
    2935             :   case AArch64::LD1Fourv2d:
    2936             :   case AArch64::LD1Twov1d:
    2937             :   case AArch64::LD1Threev1d:
    2938             :   case AArch64::LD1Fourv1d:
    2939             :   case AArch64::ST1Twov2d:
    2940             :   case AArch64::ST1Threev2d:
    2941             :   case AArch64::ST1Fourv2d:
    2942             :   case AArch64::ST1Twov1d:
    2943             :   case AArch64::ST1Threev1d:
    2944             :   case AArch64::ST1Fourv1d:
    2945             :     return AArch64FrameOffsetCannotUpdate;
    2946             :   case AArch64::PRFMui:
    2947             :     Scale = 8;
    2948             :     UnscaledOp = AArch64::PRFUMi;
    2949             :     break;
    2950        1399 :   case AArch64::LDRXui:
    2951        1399 :     Scale = 8;
    2952        1399 :     UnscaledOp = AArch64::LDURXi;
    2953        1399 :     break;
    2954         421 :   case AArch64::LDRWui:
    2955         421 :     Scale = 4;
    2956         421 :     UnscaledOp = AArch64::LDURWi;
    2957         421 :     break;
    2958           0 :   case AArch64::LDRBui:
    2959           0 :     Scale = 1;
    2960           0 :     UnscaledOp = AArch64::LDURBi;
    2961           0 :     break;
    2962           4 :   case AArch64::LDRHui:
    2963           4 :     Scale = 2;
    2964           4 :     UnscaledOp = AArch64::LDURHi;
    2965           4 :     break;
    2966          85 :   case AArch64::LDRSui:
    2967          85 :     Scale = 4;
    2968          85 :     UnscaledOp = AArch64::LDURSi;
    2969          85 :     break;
    2970         366 :   case AArch64::LDRDui:
    2971         366 :     Scale = 8;
    2972         366 :     UnscaledOp = AArch64::LDURDi;
    2973         366 :     break;
    2974         800 :   case AArch64::LDRQui:
    2975         800 :     Scale = 16;
    2976         800 :     UnscaledOp = AArch64::LDURQi;
    2977         800 :     break;
    2978          48 :   case AArch64::LDRBBui:
    2979          48 :     Scale = 1;
    2980          48 :     UnscaledOp = AArch64::LDURBBi;
    2981          48 :     break;
    2982          33 :   case AArch64::LDRHHui:
    2983          33 :     Scale = 2;
    2984          33 :     UnscaledOp = AArch64::LDURHHi;
    2985          33 :     break;
    2986           0 :   case AArch64::LDRSBXui:
    2987           0 :     Scale = 1;
    2988           0 :     UnscaledOp = AArch64::LDURSBXi;
    2989           0 :     break;
    2990          15 :   case AArch64::LDRSBWui:
    2991          15 :     Scale = 1;
    2992          15 :     UnscaledOp = AArch64::LDURSBWi;
    2993          15 :     break;
    2994           0 :   case AArch64::LDRSHXui:
    2995           0 :     Scale = 2;
    2996           0 :     UnscaledOp = AArch64::LDURSHXi;
    2997           0 :     break;
    2998          12 :   case AArch64::LDRSHWui:
    2999          12 :     Scale = 2;
    3000          12 :     UnscaledOp = AArch64::LDURSHWi;
    3001          12 :     break;
    3002           6 :   case AArch64::LDRSWui:
    3003           6 :     Scale = 4;
    3004           6 :     UnscaledOp = AArch64::LDURSWi;
    3005           6 :     break;
    3006             : 
    3007        1515 :   case AArch64::STRXui:
    3008        1515 :     Scale = 8;
    3009        1515 :     UnscaledOp = AArch64::STURXi;
    3010        1515 :     break;
    3011         710 :   case AArch64::STRWui:
    3012         710 :     Scale = 4;
    3013         710 :     UnscaledOp = AArch64::STURWi;
    3014         710 :     break;
    3015           0 :   case AArch64::STRBui:
    3016           0 :     Scale = 1;
    3017           0 :     UnscaledOp = AArch64::STURBi;
    3018           0 :     break;
    3019           0 :   case AArch64::STRHui:
    3020           0 :     Scale = 2;
    3021           0 :     UnscaledOp = AArch64::STURHi;
    3022           0 :     break;
    3023          42 :   case AArch64::STRSui:
    3024          42 :     Scale = 4;
    3025          42 :     UnscaledOp = AArch64::STURSi;
    3026          42 :     break;
    3027         426 :   case AArch64::STRDui:
    3028         426 :     Scale = 8;
    3029         426 :     UnscaledOp = AArch64::STURDi;
    3030         426 :     break;
    3031         941 :   case AArch64::STRQui:
    3032         941 :     Scale = 16;
    3033         941 :     UnscaledOp = AArch64::STURQi;
    3034         941 :     break;
    3035          44 :   case AArch64::STRBBui:
    3036          44 :     Scale = 1;
    3037          44 :     UnscaledOp = AArch64::STURBBi;
    3038          44 :     break;
    3039          27 :   case AArch64::STRHHui:
    3040          27 :     Scale = 2;
    3041          27 :     UnscaledOp = AArch64::STURHHi;
    3042          27 :     break;
    3043             : 
    3044           6 :   case AArch64::LDPXi:
    3045             :   case AArch64::LDPDi:
    3046             :   case AArch64::STPXi:
    3047             :   case AArch64::STPDi:
    3048             :   case AArch64::LDNPXi:
    3049             :   case AArch64::LDNPDi:
    3050             :   case AArch64::STNPXi:
    3051             :   case AArch64::STNPDi:
    3052           6 :     ImmIdx = 3;
    3053           6 :     IsSigned = true;
    3054           6 :     Scale = 8;
    3055           6 :     break;
    3056           0 :   case AArch64::LDPQi:
    3057             :   case AArch64::STPQi:
    3058             :   case AArch64::LDNPQi:
    3059             :   case AArch64::STNPQi:
    3060           0 :     ImmIdx = 3;
    3061           0 :     IsSigned = true;
    3062           0 :     Scale = 16;
    3063           0 :     break;
    3064           0 :   case AArch64::LDPWi:
    3065             :   case AArch64::LDPSi:
    3066             :   case AArch64::STPWi:
    3067             :   case AArch64::STPSi:
    3068             :   case AArch64::LDNPWi:
    3069             :   case AArch64::LDNPSi:
    3070             :   case AArch64::STNPWi:
    3071             :   case AArch64::STNPSi:
    3072           0 :     ImmIdx = 3;
    3073           0 :     IsSigned = true;
    3074           0 :     Scale = 4;
    3075           0 :     break;
    3076             : 
    3077          10 :   case AArch64::LDURXi:
    3078             :   case AArch64::LDURWi:
    3079             :   case AArch64::LDURBi:
    3080             :   case AArch64::LDURHi:
    3081             :   case AArch64::LDURSi:
    3082             :   case AArch64::LDURDi:
    3083             :   case AArch64::LDURQi:
    3084             :   case AArch64::LDURHHi:
    3085             :   case AArch64::LDURBBi:
    3086             :   case AArch64::LDURSBXi:
    3087             :   case AArch64::LDURSBWi:
    3088             :   case AArch64::LDURSHXi:
    3089             :   case AArch64::LDURSHWi:
    3090             :   case AArch64::LDURSWi:
    3091             :   case AArch64::STURXi:
    3092             :   case AArch64::STURWi:
    3093             :   case AArch64::STURBi:
    3094             :   case AArch64::STURHi:
    3095             :   case AArch64::STURSi:
    3096             :   case AArch64::STURDi:
    3097             :   case AArch64::STURQi:
    3098             :   case AArch64::STURBBi:
    3099             :   case AArch64::STURHHi:
    3100          10 :     Scale = 1;
    3101          10 :     break;
    3102             :   }
    3103             : 
    3104       13868 :   Offset += MI.getOperand(ImmIdx).getImm() * Scale;
    3105             : 
    3106        6934 :   bool useUnscaledOp = false;
    3107             :   // If the offset doesn't match the scale, we rewrite the instruction to
    3108             :   // use the unscaled instruction instead. Likewise, if we have a negative
    3109             :   // offset (and have an unscaled op to use).
    3110        6934 :   if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
    3111         342 :     useUnscaledOp = true;
    3112             : 
    3113             :   // Use an unscaled addressing mode if the instruction has a negative offset
    3114             :   // (or if the instruction is already using an unscaled addressing mode).
    3115             :   unsigned MaskBits;
    3116        6934 :   if (IsSigned) {
    3117             :     // ldp/stp instructions.
    3118           6 :     MaskBits = 7;
    3119           6 :     Offset /= Scale;
    3120        6928 :   } else if (UnscaledOp == 0 || useUnscaledOp) {
    3121             :     MaskBits = 9;
    3122             :     IsSigned = true;
    3123             :     Scale = 1;
    3124             :   } else {
    3125        6576 :     MaskBits = 12;
    3126        6576 :     IsSigned = false;
    3127        6576 :     Offset /= Scale;
    3128             :   }
    3129             : 
    3130             :   // Attempt to fold address computation.
    3131        6934 :   int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
    3132        6934 :   int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
    3133        6934 :   if (Offset >= MinOff && Offset <= MaxOff) {
    3134        6756 :     if (EmittableOffset)
    3135        2952 :       *EmittableOffset = Offset;
    3136        6756 :     Offset = 0;
    3137             :   } else {
    3138         178 :     int NewOff = Offset < 0 ? MinOff : MaxOff;
    3139         178 :     if (EmittableOffset)
    3140           1 :       *EmittableOffset = NewOff;
    3141         178 :     Offset = (Offset - NewOff) * Scale;
    3142             :   }
    3143        6934 :   if (OutUseUnscaledOp)
    3144        2953 :     *OutUseUnscaledOp = useUnscaledOp;
    3145        6934 :   if (OutUnscaledOp)
    3146        2953 :     *OutUnscaledOp = UnscaledOp;
    3147        6934 :   return AArch64FrameOffsetCanUpdate |
    3148        6934 :          (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
    3149             : }
    3150             : 
    3151        3152 : bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
    3152             :                                     unsigned FrameReg, int &Offset,
    3153             :                                     const AArch64InstrInfo *TII) {
    3154        6304 :   unsigned Opcode = MI.getOpcode();
    3155        3152 :   unsigned ImmIdx = FrameRegIdx + 1;
    3156             : 
    3157        3152 :   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
    3158         356 :     Offset += MI.getOperand(ImmIdx).getImm();
    3159         712 :     emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
    3160         178 :                     MI.getOperand(0).getReg(), FrameReg, Offset, TII,
    3161             :                     MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
    3162         178 :     MI.eraseFromParent();
    3163         178 :     Offset = 0;
    3164         178 :     return true;
    3165             :   }
    3166             : 
    3167             :   int NewOffset;
    3168             :   unsigned UnscaledOp;
    3169             :   bool UseUnscaledOp;
    3170             :   int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
    3171        2974 :                                          &UnscaledOp, &NewOffset);
    3172        2974 :   if (Status & AArch64FrameOffsetCanUpdate) {
    3173        2953 :     if (Status & AArch64FrameOffsetIsLegal)
    3174             :       // Replace the FrameIndex with FrameReg.
    3175        5904 :       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
    3176        2953 :     if (UseUnscaledOp)
    3177         306 :       MI.setDesc(TII->get(UnscaledOp));
    3178             : 
    3179        5906 :     MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
    3180        2953 :     return Offset == 0;
    3181             :   }
    3182             : 
    3183             :   return false;
    3184             : }
    3185             : 
    3186           1 : void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
    3187           2 :   NopInst.setOpcode(AArch64::HINT);
    3188           2 :   NopInst.addOperand(MCOperand::createImm(0));
    3189           1 : }
    3190             : 
    3191             : // AArch64 supports MachineCombiner.
    3192       11003 : bool AArch64InstrInfo::useMachineCombiner() const { return true; }
    3193             : 
    3194             : // True when Opc sets flag
    3195             : static bool isCombineInstrSettingFlag(unsigned Opc) {
    3196        2358 :   switch (Opc) {
    3197             :   case AArch64::ADDSWrr:
    3198             :   case AArch64::ADDSWri:
    3199             :   case AArch64::ADDSXrr:
    3200             :   case AArch64::ADDSXri:
    3201             :   case AArch64::SUBSWrr:
    3202             :   case AArch64::SUBSXrr:
    3203             :   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
    3204             :   case AArch64::SUBSWri:
    3205             :   case AArch64::SUBSXri:
    3206             :     return true;
    3207             :   default:
    3208             :     break;
    3209             :   }
    3210             :   return false;
    3211             : }
    3212             : 
    3213             : // 32b Opcodes that can be combined with a MUL
    3214             : static bool isCombineInstrCandidate32(unsigned Opc) {
    3215       95988 :   switch (Opc) {
    3216             :   case AArch64::ADDWrr:
    3217             :   case AArch64::ADDWri:
    3218             :   case AArch64::SUBWrr:
    3219             :   case AArch64::ADDSWrr:
    3220             :   case AArch64::ADDSWri:
    3221             :   case AArch64::SUBSWrr:
    3222             :   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
    3223             :   case AArch64::SUBWri:
    3224             :   case AArch64::SUBSWri:
    3225             :     return true;
    3226             :   default:
    3227             :     break;
    3228             :   }
    3229             :   return false;
    3230             : }
    3231             : 
    3232             : // 64b Opcodes that can be combined with a MUL
    3233             : static bool isCombineInstrCandidate64(unsigned Opc) {
    3234       94822 :   switch (Opc) {
    3235             :   case AArch64::ADDXrr:
    3236             :   case AArch64::ADDXri:
    3237             :   case AArch64::SUBXrr:
    3238             :   case AArch64::ADDSXrr:
    3239             :   case AArch64::ADDSXri:
    3240             :   case AArch64::SUBSXrr:
    3241             :   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
    3242             :   case AArch64::SUBXri:
    3243             :   case AArch64::SUBSXri:
    3244             :     return true;
    3245             :   default:
    3246             :     break;
    3247             :   }
    3248             :   return false;
    3249             : }
    3250             : 
    3251             : // FP Opcodes that can be combined with a FMUL
    3252       95796 : static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
    3253      191592 :   switch (Inst.getOpcode()) {
    3254             :   default:
    3255       95284 :     break;
    3256         512 :   case AArch64::FADDSrr:
    3257             :   case AArch64::FADDDrr:
    3258             :   case AArch64::FADDv2f32:
    3259             :   case AArch64::FADDv2f64:
    3260             :   case AArch64::FADDv4f32:
    3261             :   case AArch64::FSUBSrr:
    3262             :   case AArch64::FSUBDrr:
    3263             :   case AArch64::FSUBv2f32:
    3264             :   case AArch64::FSUBv2f64:
    3265             :   case AArch64::FSUBv4f32:
    3266        1024 :     TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
    3267         901 :     return (Options.UnsafeFPMath ||
    3268        1413 :             Options.AllowFPOpFusion == FPOpFusion::Fast);
    3269             :   }
    3270       95284 :   return false;
    3271             : }
    3272             : 
    3273             : // Opcodes that can be combined with a MUL
    3274       95988 : static bool isCombineInstrCandidate(unsigned Opc) {
    3275      190810 :   return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
    3276             : }
    3277             : 
    3278             : //
    3279             : // Utility routine that checks if \param MO is defined by an
    3280             : // \param CombineOpc instruction in the basic block \param MBB
    3281        3096 : static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
    3282             :                        unsigned CombineOpc, unsigned ZeroReg = 0,
    3283             :                        bool CheckZeroReg = false) {
    3284        3096 :   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    3285        3096 :   MachineInstr *MI = nullptr;
    3286             : 
    3287        6066 :   if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
    3288        2898 :     MI = MRI.getUniqueVRegDef(MO.getReg());
    3289             :   // And it needs to be in the trace (otherwise, it won't have a depth).
    3290        5601 :   if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
    3291             :     return false;
    3292             :   // Must only used by the user we combine with.
    3293         245 :   if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
    3294             :     return false;
    3295             : 
    3296         231 :   if (CheckZeroReg) {
    3297             :     assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
    3298             :            MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
    3299             :            MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
    3300             :     // The third input reg must be zero.
    3301         200 :     if (MI->getOperand(3).getReg() != ZeroReg)
    3302             :       return false;
    3303             :   }
    3304             : 
    3305             :   return true;
    3306             : }
    3307             : 
    3308             : //
    3309             : // Is \param MO defined by an integer multiply and can be combined?
    3310             : static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
    3311             :                               unsigned MulOpc, unsigned ZeroReg) {
    3312        2524 :   return canCombine(MBB, MO, MulOpc, ZeroReg, true);
    3313             : }
    3314             : 
    3315             : //
    3316             : // Is \param MO defined by a floating-point multiply and can be combined?
    3317             : static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
    3318             :                                unsigned MulOpc) {
    3319         572 :   return canCombine(MBB, MO, MulOpc);
    3320             : }
    3321             : 
    3322             : // TODO: There are many more machine instruction opcodes to match:
    3323             : //       1. Other data types (integer, vectors)
    3324             : //       2. Other math / logic operations (xor, or)
    3325             : //       3. Other forms of the same operation (intrinsics and other variants)
    3326       95766 : bool AArch64InstrInfo::isAssociativeAndCommutative(
    3327             :     const MachineInstr &Inst) const {
    3328      191532 :   switch (Inst.getOpcode()) {
    3329         781 :   case AArch64::FADDDrr:
    3330             :   case AArch64::FADDSrr:
    3331             :   case AArch64::FADDv2f32:
    3332             :   case AArch64::FADDv2f64:
    3333             :   case AArch64::FADDv4f32:
    3334             :   case AArch64::FMULDrr:
    3335             :   case AArch64::FMULSrr:
    3336             :   case AArch64::FMULX32:
    3337             :   case AArch64::FMULX64:
    3338             :   case AArch64::FMULXv2f32:
    3339             :   case AArch64::FMULXv2f64:
    3340             :   case AArch64::FMULXv4f32:
    3341             :   case AArch64::FMULv2f32:
    3342             :   case AArch64::FMULv2f64:
    3343             :   case AArch64::FMULv4f32:
    3344         781 :     return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
    3345             :   default:
    3346             :     return false;
    3347             :   }
    3348             : }
    3349             : 
    3350             : /// Find instructions that can be turned into madd.
    3351       95988 : static bool getMaddPatterns(MachineInstr &Root,
    3352             :                             SmallVectorImpl<MachineCombinerPattern> &Patterns) {
    3353      191976 :   unsigned Opc = Root.getOpcode();
    3354       95988 :   MachineBasicBlock &MBB = *Root.getParent();
    3355       95988 :   bool Found = false;
    3356             : 
    3357       95988 :   if (!isCombineInstrCandidate(Opc))
    3358             :     return false;
    3359        1128 :   if (isCombineInstrSettingFlag(Opc)) {
    3360        1128 :     int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
    3361             :     // When NZCV is live bail out.
    3362        1128 :     if (Cmp_NZCV == -1)
    3363             :       return false;
    3364         444 :     unsigned NewOpc = convertToNonFlagSettingOpc(Root);
    3365             :     // When opcode can't change bail out.
    3366             :     // CHECKME: do we miss any cases for opcode conversion?
    3367         444 :     if (NewOpc == Opc)
    3368             :       return false;
    3369             :     Opc = NewOpc;
    3370             :   }
    3371             : 
    3372        1674 :   switch (Opc) {
    3373             :   default:
    3374             :     break;
    3375         273 :   case AArch64::ADDWrr:
    3376             :     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
    3377             :            "ADDWrr does not have register operands");
    3378         819 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
    3379             :                           AArch64::WZR)) {
    3380           1 :       Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
    3381           1 :       Found = true;
    3382             :     }
    3383         819 :     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
    3384             :                           AArch64::WZR)) {
    3385           1 :       Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
    3386           1 :       Found = true;
    3387             :     }
    3388             :     break;
    3389         235 :   case AArch64::ADDXrr:
    3390         705 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
    3391             :                           AArch64::XZR)) {
    3392           7 :       Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
    3393           7 :       Found = true;
    3394             :     }
    3395         705 :     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
    3396             :                           AArch64::XZR)) {
    3397          38 :       Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
    3398          38 :       Found = true;
    3399             :     }
    3400             :     break;
    3401         230 :   case AArch64::SUBWrr:
    3402         690 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
    3403             :                           AArch64::WZR)) {
    3404           0 :       Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
    3405           0 :       Found = true;
    3406             :     }
    3407         690 :     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
    3408             :                           AArch64::WZR)) {
    3409         126 :       Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
    3410         126 :       Found = true;
    3411             :     }
    3412             :     break;
    3413         112 :   case AArch64::SUBXrr:
    3414         336 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
    3415             :                           AArch64::XZR)) {
    3416           0 :       Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
    3417           0 :       Found = true;
    3418             :     }
    3419         336 :     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
    3420             :                           AArch64::XZR)) {
    3421          15 :       Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
    3422          15 :       Found = true;
    3423             :     }
    3424             :     break;
    3425         141 :   case AArch64::ADDWri:
    3426         423 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
    3427             :                           AArch64::WZR)) {
    3428           2 :       Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
    3429           2 :       Found = true;
    3430             :     }
    3431             :     break;
    3432         503 :   case AArch64::ADDXri:
    3433        1509 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
    3434             :                           AArch64::XZR)) {
    3435           1 :       Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
    3436           1 :       Found = true;
    3437             :     }
    3438             :     break;
    3439          59 :   case AArch64::SUBWri:
    3440         177 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
    3441             :                           AArch64::WZR)) {
    3442           0 :       Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
    3443           0 :       Found = true;
    3444             :     }
    3445             :     break;
    3446         121 :   case AArch64::SUBXri:
    3447         363 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
    3448             :                           AArch64::XZR)) {
    3449           1 :       Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
    3450           1 :       Found = true;
    3451             :     }
    3452             :     break;
    3453             :   }
    3454             :   return Found;
    3455             : }
    3456             : /// Floating-Point Support
    3457             : 
    3458             : /// Find instructions that can be turned into madd.
    3459       95796 : static bool getFMAPatterns(MachineInstr &Root,
    3460             :                            SmallVectorImpl<MachineCombinerPattern> &Patterns) {
    3461             : 
    3462       95796 :   if (!isCombineInstrCandidateFP(Root))
    3463             :     return false;
    3464             : 
    3465         155 :   MachineBasicBlock &MBB = *Root.getParent();
    3466         155 :   bool Found = false;
    3467             : 
    3468         310 :   switch (Root.getOpcode()) {
    3469             :   default:
    3470             :     assert(false && "Unsupported FP instruction in combiner\n");
    3471             :     break;
    3472          52 :   case AArch64::FADDSrr:
    3473             :     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
    3474             :            "FADDWrr does not have register operands");
    3475         156 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
    3476           1 :       Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
    3477           1 :       Found = true;
    3478         153 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3479             :                                   AArch64::FMULv1i32_indexed)) {
    3480           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
    3481           1 :       Found = true;
    3482             :     }
    3483         156 :     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
    3484           0 :       Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
    3485           0 :       Found = true;
    3486         156 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3487             :                                   AArch64::FMULv1i32_indexed)) {
    3488           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
    3489           0 :       Found = true;
    3490             :     }
    3491             :     break;
    3492          29 :   case AArch64::FADDDrr:
    3493          87 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
    3494           1 :       Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
    3495           1 :       Found = true;
    3496          84 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3497             :                                   AArch64::FMULv1i64_indexed)) {
    3498           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
    3499           1 :       Found = true;
    3500             :     }
    3501          87 :     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
    3502           1 :       Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
    3503           1 :       Found = true;
    3504          84 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3505             :                                   AArch64::FMULv1i64_indexed)) {
    3506           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
    3507           0 :       Found = true;
    3508             :     }
    3509             :     break;
    3510           5 :   case AArch64::FADDv2f32:
    3511          15 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3512             :                            AArch64::FMULv2i32_indexed)) {
    3513           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
    3514           1 :       Found = true;
    3515          12 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3516             :                                   AArch64::FMULv2f32)) {
    3517           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
    3518           1 :       Found = true;
    3519             :     }
    3520          15 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3521             :                            AArch64::FMULv2i32_indexed)) {
    3522           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
    3523           0 :       Found = true;
    3524          15 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3525             :                                   AArch64::FMULv2f32)) {
    3526           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
    3527           0 :       Found = true;
    3528             :     }
    3529             :     break;
    3530          10 :   case AArch64::FADDv2f64:
    3531          30 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3532             :                            AArch64::FMULv2i64_indexed)) {
    3533           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
    3534           1 :       Found = true;
    3535          27 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3536             :                                   AArch64::FMULv2f64)) {
    3537           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
    3538           1 :       Found = true;
    3539             :     }
    3540          30 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3541             :                            AArch64::FMULv2i64_indexed)) {
    3542           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
    3543           0 :       Found = true;
    3544          30 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3545             :                                   AArch64::FMULv2f64)) {
    3546           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
    3547           0 :       Found = true;
    3548             :     }
    3549             :     break;
    3550          31 :   case AArch64::FADDv4f32:
    3551          93 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3552             :                            AArch64::FMULv4i32_indexed)) {
    3553           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
    3554           1 :       Found = true;
    3555          90 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3556             :                                   AArch64::FMULv4f32)) {
    3557           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
    3558           1 :       Found = true;
    3559             :     }
    3560          93 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3561             :                            AArch64::FMULv4i32_indexed)) {
    3562           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
    3563           0 :       Found = true;
    3564          93 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3565             :                                   AArch64::FMULv4f32)) {
    3566           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
    3567           0 :       Found = true;
    3568             :     }
    3569             :     break;
    3570             : 
    3571           6 :   case AArch64::FSUBSrr:
    3572          18 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
    3573           0 :       Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
    3574           0 :       Found = true;
    3575             :     }
    3576          18 :     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
    3577           0 :       Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
    3578           0 :       Found = true;
    3579          18 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3580             :                                   AArch64::FMULv1i32_indexed)) {
    3581           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
    3582           2 :       Found = true;
    3583             :     }
    3584          18 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
    3585           2 :       Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
    3586           2 :       Found = true;
    3587             :     }
    3588             :     break;
    3589           4 :   case AArch64::FSUBDrr:
    3590          12 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
    3591           0 :       Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
    3592           0 :       Found = true;
    3593             :     }
    3594          12 :     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
    3595           0 :       Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
    3596           0 :       Found = true;
    3597          12 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3598             :                                   AArch64::FMULv1i64_indexed)) {
    3599           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
    3600           2 :       Found = true;
    3601             :     }
    3602          12 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
    3603           2 :       Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
    3604           2 :       Found = true;
    3605             :     }
    3606             :     break;
    3607           6 :   case AArch64::FSUBv2f32:
    3608          18 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3609             :                            AArch64::FMULv2i32_indexed)) {
    3610           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
    3611           2 :       Found = true;
    3612          12 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3613             :                                   AArch64::FMULv2f32)) {
    3614           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
    3615           2 :       Found = true;
    3616             :     }
    3617             :     break;
    3618           8 :   case AArch64::FSUBv2f64:
    3619          24 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3620             :                            AArch64::FMULv2i64_indexed)) {
    3621           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
    3622           2 :       Found = true;
    3623          18 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3624             :                                   AArch64::FMULv2f64)) {
    3625           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
    3626           2 :       Found = true;
    3627             :     }
    3628             :     break;
    3629           4 :   case AArch64::FSUBv4f32:
    3630          12 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3631             :                            AArch64::FMULv4i32_indexed)) {
    3632           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
    3633           2 :       Found = true;
    3634           6 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3635             :                                   AArch64::FMULv4f32)) {
    3636           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
    3637           2 :       Found = true;
    3638             :     }
    3639             :     break;
    3640             :   }
    3641             :   return Found;
    3642             : }
    3643             : 
    3644             : /// Return true when a code sequence can improve throughput. It
    3645             : /// should be called only for instructions in loops.
    3646             : /// \param Pattern - combiner pattern
    3647          38 : bool AArch64InstrInfo::isThroughputPattern(
    3648             :     MachineCombinerPattern Pattern) const {
    3649          38 :   switch (Pattern) {
    3650             :   default:
    3651             :     break;
    3652             :   case MachineCombinerPattern::FMULADDS_OP1:
    3653             :   case MachineCombinerPattern::FMULADDS_OP2:
    3654             :   case MachineCombinerPattern::FMULSUBS_OP1:
    3655             :   case MachineCombinerPattern::FMULSUBS_OP2:
    3656             :   case MachineCombinerPattern::FMULADDD_OP1:
    3657             :   case MachineCombinerPattern::FMULADDD_OP2:
    3658             :   case MachineCombinerPattern::FMULSUBD_OP1:
    3659             :   case MachineCombinerPattern::FMULSUBD_OP2:
    3660             :   case MachineCombinerPattern::FNMULSUBS_OP1:
    3661             :   case MachineCombinerPattern::FNMULSUBD_OP1:
    3662             :   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
    3663             :   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
    3664             :   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
    3665             :   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
    3666             :   case MachineCombinerPattern::FMLAv2f32_OP2:
    3667             :   case MachineCombinerPattern::FMLAv2f32_OP1:
    3668             :   case MachineCombinerPattern::FMLAv2f64_OP1:
    3669             :   case MachineCombinerPattern::FMLAv2f64_OP2:
    3670             :   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
    3671             :   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
    3672             :   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
    3673             :   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
    3674             :   case MachineCombinerPattern::FMLAv4f32_OP1:
    3675             :   case MachineCombinerPattern::FMLAv4f32_OP2:
    3676             :   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
    3677             :   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
    3678             :   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
    3679             :   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
    3680             :   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
    3681             :   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
    3682             :   case MachineCombinerPattern::FMLSv2f32_OP2:
    3683             :   case MachineCombinerPattern::FMLSv2f64_OP2:
    3684             :   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
    3685             :   case MachineCombinerPattern::FMLSv4f32_OP2:
    3686             :     return true;
    3687             :   } // end switch (Pattern)
    3688          14 :   return false;
    3689             : }
    3690             : /// Return true when there is potentially a faster code sequence for an
    3691             : /// instruction chain ending in \p Root. All potential patterns are listed in
    3692             : /// the \p Pattern vector. Pattern should be sorted in priority order since the
    3693             : /// pattern evaluator stops checking as soon as it finds a faster sequence.
    3694             : 
    3695       95988 : bool AArch64InstrInfo::getMachineCombinerPatterns(
    3696             :     MachineInstr &Root,
    3697             :     SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
    3698             :   // Integer patterns
    3699       95988 :   if (getMaddPatterns(Root, Patterns))
    3700             :     return true;
    3701             :   // Floating point patterns
    3702       95796 :   if (getFMAPatterns(Root, Patterns))
    3703             :     return true;
    3704             : 
    3705       95766 :   return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
    3706             : }
    3707             : 
    3708             : enum class FMAInstKind { Default, Indexed, Accumulator };
    3709             : /// genFusedMultiply - Generate fused multiply instructions.
    3710             : /// This function supports both integer and floating point instructions.
    3711             : /// A typical example:
    3712             : ///  F|MUL I=A,B,0
    3713             : ///  F|ADD R,I,C
    3714             : ///  ==> F|MADD R,A,B,C
    3715             : /// \param MF Containing MachineFunction
    3716             : /// \param MRI Register information
    3717             : /// \param TII Target information
    3718             : /// \param Root is the F|ADD instruction
    3719             : /// \param [out] InsInstrs is a vector of machine instructions and will
    3720             : /// contain the generated madd instruction
    3721             : /// \param IdxMulOpd is index of operand in Root that is the result of
    3722             : /// the F|MUL. In the example above IdxMulOpd is 1.
    3723             : /// \param MaddOpc the opcode fo the f|madd instruction
    3724             : /// \param RC Register class of operands
    3725             : /// \param kind of fma instruction (addressing mode) to be generated
    3726             : static MachineInstr *
    3727         218 : genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
    3728             :                  const TargetInstrInfo *TII, MachineInstr &Root,
    3729             :                  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
    3730             :                  unsigned MaddOpc, const TargetRegisterClass *RC,
    3731             :                  FMAInstKind kind = FMAInstKind::Default) {
    3732             :   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
    3733             : 
    3734         218 :   unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
    3735         436 :   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
    3736         218 :   unsigned ResultReg = Root.getOperand(0).getReg();
    3737         218 :   unsigned SrcReg0 = MUL->getOperand(1).getReg();
    3738         436 :   bool Src0IsKill = MUL->getOperand(1).isKill();
    3739         218 :   unsigned SrcReg1 = MUL->getOperand(2).getReg();
    3740         436 :   bool Src1IsKill = MUL->getOperand(2).isKill();
    3741         436 :   unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
    3742         436 :   bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
    3743             : 
    3744         218 :   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
    3745         218 :     MRI.constrainRegClass(ResultReg, RC);
    3746         218 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
    3747         218 :     MRI.constrainRegClass(SrcReg0, RC);
    3748         218 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
    3749         218 :     MRI.constrainRegClass(SrcReg1, RC);
    3750         218 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
    3751         218 :     MRI.constrainRegClass(SrcReg2, RC);
    3752             : 
    3753         218 :   MachineInstrBuilder MIB;
    3754         218 :   if (kind == FMAInstKind::Default)
    3755         776 :     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
    3756         194 :               .addReg(SrcReg0, getKillRegState(Src0IsKill))
    3757         194 :               .addReg(SrcReg1, getKillRegState(Src1IsKill))
    3758         194 :               .addReg(SrcReg2, getKillRegState(Src2IsKill));
    3759          24 :   else if (kind == FMAInstKind::Indexed)
    3760          60 :     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
    3761          15 :               .addReg(SrcReg2, getKillRegState(Src2IsKill))
    3762          15 :               .addReg(SrcReg0, getKillRegState(Src0IsKill))
    3763          15 :               .addReg(SrcReg1, getKillRegState(Src1IsKill))
    3764          30 :               .addImm(MUL->getOperand(3).getImm());
    3765           9 :   else if (kind == FMAInstKind::Accumulator)
    3766          36 :     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
    3767           9 :               .addReg(SrcReg2, getKillRegState(Src2IsKill))
    3768           9 :               .addReg(SrcReg0, getKillRegState(Src0IsKill))
    3769           9 :               .addReg(SrcReg1, getKillRegState(Src1IsKill));
    3770             :   else
    3771             :     assert(false && "Invalid FMA instruction kind \n");
    3772             :   // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
    3773         218 :   InsInstrs.push_back(MIB);
    3774         218 :   return MUL;
    3775             : }
    3776             : 
    3777             : /// genMaddR - Generate madd instruction and combine mul and add using
    3778             : /// an extra virtual register
    3779             : /// Example - an ADD intermediate needs to be stored in a register:
    3780             : ///   MUL I=A,B,0
    3781             : ///   ADD R,I,Imm
    3782             : ///   ==> ORR  V, ZR, Imm
    3783             : ///   ==> MADD R,A,B,V
    3784             : /// \param MF Containing MachineFunction
    3785             : /// \param MRI Register information
    3786             : /// \param TII Target information
    3787             : /// \param Root is the ADD instruction
    3788             : /// \param [out] InsInstrs is a vector of machine instructions and will
    3789             : /// contain the generated madd instruction
    3790             : /// \param IdxMulOpd is index of operand in Root that is the result of
    3791             : /// the MUL. In the example above IdxMulOpd is 1.
    3792             : /// \param MaddOpc the opcode fo the madd instruction
    3793             : /// \param VR is a virtual register that holds the value of an ADD operand
    3794             : /// (V in the example above).
    3795             : /// \param RC Register class of operands
    3796           3 : static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
    3797             :                               const TargetInstrInfo *TII, MachineInstr &Root,
    3798             :                               SmallVectorImpl<MachineInstr *> &InsInstrs,
    3799             :                               unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
    3800             :                               const TargetRegisterClass *RC) {
    3801             :   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
    3802             : 
    3803           6 :   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
    3804           3 :   unsigned ResultReg = Root.getOperand(0).getReg();
    3805           3 :   unsigned SrcReg0 = MUL->getOperand(1).getReg();
    3806           6 :   bool Src0IsKill = MUL->getOperand(1).isKill();
    3807           3 :   unsigned SrcReg1 = MUL->getOperand(2).getReg();
    3808           6 :   bool Src1IsKill = MUL->getOperand(2).isKill();
    3809             : 
    3810           3 :   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
    3811           3 :     MRI.constrainRegClass(ResultReg, RC);
    3812           3 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
    3813           3 :     MRI.constrainRegClass(SrcReg0, RC);
    3814           3 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
    3815           3 :     MRI.constrainRegClass(SrcReg1, RC);
    3816           3 :   if (TargetRegisterInfo::isVirtualRegister(VR))
    3817           3 :     MRI.constrainRegClass(VR, RC);
    3818             : 
    3819             :   MachineInstrBuilder MIB =
    3820          12 :       BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
    3821           3 :           .addReg(SrcReg0, getKillRegState(Src0IsKill))
    3822           3 :           .addReg(SrcReg1, getKillRegState(Src1IsKill))
    3823           3 :           .addReg(VR);
    3824             :   // Insert the MADD
    3825           3 :   InsInstrs.push_back(MIB);
    3826           3 :   return MUL;
    3827             : }
    3828             : 
    3829             : /// When getMachineCombinerPatterns() finds potential patterns,
    3830             : /// this function generates the instructions that could replace the
    3831             : /// original code sequence
    3832         339 : void AArch64InstrInfo::genAlternativeCodeSequence(
    3833             :     MachineInstr &Root, MachineCombinerPattern Pattern,
    3834             :     SmallVectorImpl<MachineInstr *> &InsInstrs,
    3835             :     SmallVectorImpl<MachineInstr *> &DelInstrs,
    3836             :     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
    3837         339 :   MachineBasicBlock &MBB = *Root.getParent();
    3838         339 :   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    3839         339 :   MachineFunction &MF = *MBB.getParent();
    3840         339 :   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
    3841             : 
    3842             :   MachineInstr *MUL;
    3843             :   const TargetRegisterClass *RC;
    3844             :   unsigned Opc;
    3845         339 :   switch (Pattern) {
    3846         117 :   default:
    3847             :     // Reassociate instructions.
    3848         117 :     TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
    3849             :                                                 DelInstrs, InstrIdxForVirtReg);
    3850         117 :     return;
    3851           8 :   case MachineCombinerPattern::MULADDW_OP1:
    3852             :   case MachineCombinerPattern::MULADDX_OP1:
    3853             :     // MUL I=A,B,0
    3854             :     // ADD R,I,C
    3855             :     // ==> MADD R,A,B,C
    3856             :     // --- Create(MADD);
    3857           8 :     if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
    3858             :       Opc = AArch64::MADDWrrr;
    3859             :       RC = &AArch64::GPR32RegClass;
    3860             :     } else {
    3861           7 :       Opc = AArch64::MADDXrrr;
    3862           7 :       RC = &AArch64::GPR64RegClass;
    3863             :     }
    3864           8 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    3865           8 :     break;
    3866          39 :   case MachineCombinerPattern::MULADDW_OP2:
    3867             :   case MachineCombinerPattern::MULADDX_OP2:
    3868             :     // MUL I=A,B,0
    3869             :     // ADD R,C,I
    3870             :     // ==> MADD R,A,B,C
    3871             :     // --- Create(MADD);
    3872          39 :     if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
    3873             :       Opc = AArch64::MADDWrrr;
    3874             :       RC = &AArch64::GPR32RegClass;
    3875             :     } else {
    3876          38 :       Opc = AArch64::MADDXrrr;
    3877          38 :       RC = &AArch64::GPR64RegClass;
    3878             :     }
    3879          39 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    3880          39 :     break;
    3881           3 :   case MachineCombinerPattern::MULADDWI_OP1:
    3882             :   case MachineCombinerPattern::MULADDXI_OP1: {
    3883             :     // MUL I=A,B,0
    3884             :     // ADD R,I,Imm
    3885             :     // ==> ORR  V, ZR, Imm
    3886             :     // ==> MADD R,A,B,V
    3887             :     // --- Create(MADD);
    3888             :     const TargetRegisterClass *OrrRC;
    3889             :     unsigned BitSize, OrrOpc, ZeroReg;
    3890           3 :     if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
    3891             :       OrrOpc = AArch64::ORRWri;
    3892             :       OrrRC = &AArch64::GPR32spRegClass;
    3893             :       BitSize = 32;
    3894             :       ZeroReg = AArch64::WZR;
    3895             :       Opc = AArch64::MADDWrrr;
    3896             :       RC = &AArch64::GPR32RegClass;
    3897             :     } else {
    3898           1 :       OrrOpc = AArch64::ORRXri;
    3899           1 :       OrrRC = &AArch64::GPR64spRegClass;
    3900           1 :       BitSize = 64;
    3901           1 :       ZeroReg = AArch64::XZR;
    3902           1 :       Opc = AArch64::MADDXrrr;
    3903           1 :       RC = &AArch64::GPR64RegClass;
    3904             :     }
    3905           3 :     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
    3906           3 :     uint64_t Imm = Root.getOperand(2).getImm();
    3907             : 
    3908           6 :     if (Root.getOperand(3).isImm()) {
    3909           3 :       unsigned Val = Root.getOperand(3).getImm();
    3910           3 :       Imm = Imm << Val;
    3911             :     }
    3912           3 :     uint64_t UImm = SignExtend64(Imm, BitSize);
    3913             :     uint64_t Encoding;
    3914           3 :     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
    3915             :       MachineInstrBuilder MIB1 =
    3916           8 :           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
    3917           2 :               .addReg(ZeroReg)
    3918           4 :               .addImm(Encoding);
    3919           2 :       InsInstrs.push_back(MIB1);
    3920           8 :       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    3921           2 :       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
    3922             :     }
    3923             :     break;
    3924             :   }
    3925           0 :   case MachineCombinerPattern::MULSUBW_OP1:
    3926             :   case MachineCombinerPattern::MULSUBX_OP1: {
    3927             :     // MUL I=A,B,0
    3928             :     // SUB R,I, C
    3929             :     // ==> SUB  V, 0, C
    3930             :     // ==> MADD R,A,B,V // = -C + A*B
    3931             :     // --- Create(MADD);
    3932             :     const TargetRegisterClass *SubRC;
    3933             :     unsigned SubOpc, ZeroReg;
    3934           0 :     if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
    3935             :       SubOpc = AArch64::SUBWrr;
    3936             :       SubRC = &AArch64::GPR32spRegClass;
    3937             :       ZeroReg = AArch64::WZR;
    3938             :       Opc = AArch64::MADDWrrr;
    3939             :       RC = &AArch64::GPR32RegClass;
    3940             :     } else {
    3941           0 :       SubOpc = AArch64::SUBXrr;
    3942           0 :       SubRC = &AArch64::GPR64spRegClass;
    3943           0 :       ZeroReg = AArch64::XZR;
    3944           0 :       Opc = AArch64::MADDXrrr;
    3945           0 :       RC = &AArch64::GPR64RegClass;
    3946             :     }
    3947           0 :     unsigned NewVR = MRI.createVirtualRegister(SubRC);
    3948             :     // SUB NewVR, 0, C
    3949             :     MachineInstrBuilder MIB1 =
    3950           0 :         BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
    3951           0 :             .addReg(ZeroReg)
    3952           0 :             .add(Root.getOperand(2));
    3953           0 :     InsInstrs.push_back(MIB1);
    3954           0 :     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    3955           0 :     MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
    3956             :     break;
    3957             :   }
    3958         141 :   case MachineCombinerPattern::MULSUBW_OP2:
    3959             :   case MachineCombinerPattern::MULSUBX_OP2:
    3960             :     // MUL I=A,B,0
    3961             :     // SUB R,C,I
    3962             :     // ==> MSUB R,A,B,C (computes C - A*B)
    3963             :     // --- Create(MSUB);
    3964         141 :     if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
    3965             :       Opc = AArch64::MSUBWrrr;
    3966             :       RC = &AArch64::GPR32RegClass;
    3967             :     } else {
    3968          15 :       Opc = AArch64::MSUBXrrr;
    3969          15 :       RC = &AArch64::GPR64RegClass;
    3970             :     }
    3971         141 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    3972         141 :     break;
    3973           1 :   case MachineCombinerPattern::MULSUBWI_OP1:
    3974             :   case MachineCombinerPattern::MULSUBXI_OP1: {
    3975             :     // MUL I=A,B,0
    3976             :     // SUB R,I, Imm
    3977             :     // ==> ORR  V, ZR, -Imm
    3978             :     // ==> MADD R,A,B,V // = -Imm + A*B
    3979             :     // --- Create(MADD);
    3980             :     const TargetRegisterClass *OrrRC;
    3981             :     unsigned BitSize, OrrOpc, ZeroReg;
    3982           1 :     if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
    3983             :       OrrOpc = AArch64::ORRWri;
    3984             :       OrrRC = &AArch64::GPR32spRegClass;
    3985             :       BitSize = 32;
    3986             :       ZeroReg = AArch64::WZR;
    3987             :       Opc = AArch64::MADDWrrr;
    3988             :       RC = &AArch64::GPR32RegClass;
    3989             :     } else {
    3990           1 :       OrrOpc = AArch64::ORRXri;
    3991           1 :       OrrRC = &AArch64::GPR64spRegClass;
    3992           1 :       BitSize = 64;
    3993           1 :       ZeroReg = AArch64::XZR;
    3994           1 :       Opc = AArch64::MADDXrrr;
    3995           1 :       RC = &AArch64::GPR64RegClass;
    3996             :     }
    3997           1 :     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
    3998           1 :     uint64_t Imm = Root.getOperand(2).getImm();
    3999           2 :     if (Root.getOperand(3).isImm()) {
    4000           1 :       unsigned Val = Root.getOperand(3).getImm();
    4001           1 :       Imm = Imm << Val;
    4002             :     }
    4003           2 :     uint64_t UImm = SignExtend64(-Imm, BitSize);
    4004             :     uint64_t Encoding;
    4005           1 :     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
    4006             :       MachineInstrBuilder MIB1 =
    4007           4 :           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
    4008           1 :               .addReg(ZeroReg)
    4009           2 :               .addImm(Encoding);
    4010           1 :       InsInstrs.push_back(MIB1);
    4011           4 :       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4012           1 :       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
    4013             :     }
    4014             :     break;
    4015             :   }
    4016             :   // Floating Point Support
    4017           2 :   case MachineCombinerPattern::FMULADDS_OP1:
    4018             :   case MachineCombinerPattern::FMULADDD_OP1:
    4019             :     // MUL I=A,B,0
    4020             :     // ADD R,I,C
    4021             :     // ==> MADD R,A,B,C
    4022             :     // --- Create(MADD);
    4023           2 :     if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
    4024             :       Opc = AArch64::FMADDSrrr;
    4025             :       RC = &AArch64::FPR32RegClass;
    4026             :     } else {
    4027           1 :       Opc = AArch64::FMADDDrrr;
    4028           1 :       RC = &AArch64::FPR64RegClass;
    4029             :     }
    4030           2 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    4031           2 :     break;
    4032           0 :   case MachineCombinerPattern::FMULADDS_OP2:
    4033             :   case MachineCombinerPattern::FMULADDD_OP2:
    4034             :     // FMUL I=A,B,0
    4035             :     // FADD R,C,I
    4036             :     // ==> FMADD R,A,B,C
    4037             :     // --- Create(FMADD);
    4038           0 :     if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
    4039             :       Opc = AArch64::FMADDSrrr;
    4040             :       RC = &AArch64::FPR32RegClass;
    4041             :     } else {
    4042           0 :       Opc = AArch64::FMADDDrrr;
    4043           0 :       RC = &AArch64::FPR64RegClass;
    4044             :     }
    4045           0 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    4046           0 :     break;
    4047             : 
    4048           1 :   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
    4049           1 :     Opc = AArch64::FMLAv1i32_indexed;
    4050           1 :     RC = &AArch64::FPR32RegClass;
    4051           1 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4052             :                            FMAInstKind::Indexed);
    4053           1 :     break;
    4054           0 :   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
    4055           0 :     Opc = AArch64::FMLAv1i32_indexed;
    4056           0 :     RC = &AArch64::FPR32RegClass;
    4057           0 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4058             :                            FMAInstKind::Indexed);
    4059           0 :     break;
    4060             : 
    4061           1 :   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
    4062           1 :     Opc = AArch64::FMLAv1i64_indexed;
    4063           1 :     RC = &AArch64::FPR64RegClass;
    4064           1 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4065             :                            FMAInstKind::Indexed);
    4066           1 :     break;
    4067           0 :   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
    4068           0 :     Opc = AArch64::FMLAv1i64_indexed;
    4069           0 :     RC = &AArch64::FPR64RegClass;
    4070           0 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4071             :                            FMAInstKind::Indexed);
    4072           0 :     break;
    4073             : 
    4074           2 :   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
    4075             :   case MachineCombinerPattern::FMLAv2f32_OP1:
    4076           2 :     RC = &AArch64::FPR64RegClass;
    4077           2 :     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
    4078           1 :       Opc = AArch64::FMLAv2i32_indexed;
    4079           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4080             :                              FMAInstKind::Indexed);
    4081             :     } else {
    4082           1 :       Opc = AArch64::FMLAv2f32;
    4083           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4084             :                              FMAInstKind::Accumulator);
    4085             :     }
    4086             :     break;
    4087           0 :   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
    4088             :   case MachineCombinerPattern::FMLAv2f32_OP2:
    4089           0 :     RC = &AArch64::FPR64RegClass;
    4090           0 :     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
    4091           0 :       Opc = AArch64::FMLAv2i32_indexed;
    4092           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4093             :                              FMAInstKind::Indexed);
    4094             :     } else {
    4095           0 :       Opc = AArch64::FMLAv2f32;
    4096           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4097             :                              FMAInstKind::Accumulator);
    4098             :     }
    4099             :     break;
    4100             : 
    4101           2 :   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
    4102             :   case MachineCombinerPattern::FMLAv2f64_OP1:
    4103           2 :     RC = &AArch64::FPR128RegClass;
    4104           2 :     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
    4105           1 :       Opc = AArch64::FMLAv2i64_indexed;
    4106           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4107             :                              FMAInstKind::Indexed);
    4108             :     } else {
    4109           1 :       Opc = AArch64::FMLAv2f64;
    4110           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4111             :                              FMAInstKind::Accumulator);
    4112             :     }
    4113             :     break;
    4114           0 :   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
    4115             :   case MachineCombinerPattern::FMLAv2f64_OP2:
    4116           0 :     RC = &AArch64::FPR128RegClass;
    4117           0 :     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
    4118           0 :       Opc = AArch64::FMLAv2i64_indexed;
    4119           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4120             :                              FMAInstKind::Indexed);
    4121             :     } else {
    4122           0 :       Opc = AArch64::FMLAv2f64;
    4123           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4124             :                              FMAInstKind::Accumulator);
    4125             :     }
    4126             :     break;
    4127             : 
    4128           2 :   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
    4129             :   case MachineCombinerPattern::FMLAv4f32_OP1:
    4130           2 :     RC = &AArch64::FPR128RegClass;
    4131           2 :     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
    4132           1 :       Opc = AArch64::FMLAv4i32_indexed;
    4133           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4134             :                              FMAInstKind::Indexed);
    4135             :     } else {
    4136           1 :       Opc = AArch64::FMLAv4f32;
    4137           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4138             :                              FMAInstKind::Accumulator);
    4139             :     }
    4140             :     break;
    4141             : 
    4142           0 :   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
    4143             :   case MachineCombinerPattern::FMLAv4f32_OP2:
    4144           0 :     RC = &AArch64::FPR128RegClass;
    4145           0 :     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
    4146           0 :       Opc = AArch64::FMLAv4i32_indexed;
    4147           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4148             :                              FMAInstKind::Indexed);
    4149             :     } else {
    4150           0 :       Opc = AArch64::FMLAv4f32;
    4151           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4152             :                              FMAInstKind::Accumulator);
    4153             :     }
    4154             :     break;
    4155             : 
    4156           0 :   case MachineCombinerPattern::FMULSUBS_OP1:
    4157             :   case MachineCombinerPattern::FMULSUBD_OP1: {
    4158             :     // FMUL I=A,B,0
    4159             :     // FSUB R,I,C
    4160             :     // ==> FNMSUB R,A,B,C // = -C + A*B
    4161             :     // --- Create(FNMSUB);
    4162           0 :     if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
    4163             :       Opc = AArch64::FNMSUBSrrr;
    4164             :       RC = &AArch64::FPR32RegClass;
    4165             :     } else {
    4166           0 :       Opc = AArch64::FNMSUBDrrr;
    4167           0 :       RC = &AArch64::FPR64RegClass;
    4168             :     }
    4169           0 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    4170           0 :     break;
    4171             :   }
    4172             : 
    4173           4 :   case MachineCombinerPattern::FNMULSUBS_OP1:
    4174             :   case MachineCombinerPattern::FNMULSUBD_OP1: {
    4175             :     // FNMUL I=A,B,0
    4176             :     // FSUB R,I,C
    4177             :     // ==> FNMADD R,A,B,C // = -A*B - C
    4178             :     // --- Create(FNMADD);
    4179           4 :     if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
    4180             :       Opc = AArch64::FNMADDSrrr;
    4181             :       RC = &AArch64::FPR32RegClass;
    4182             :     } else {
    4183           2 :       Opc = AArch64::FNMADDDrrr;
    4184           2 :       RC = &AArch64::FPR64RegClass;
    4185             :     }
    4186           4 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    4187           4 :     break;
    4188             :   }
    4189             : 
    4190           0 :   case MachineCombinerPattern::FMULSUBS_OP2:
    4191             :   case MachineCombinerPattern::FMULSUBD_OP2: {
    4192             :     // FMUL I=A,B,0
    4193             :     // FSUB R,C,I
    4194             :     // ==> FMSUB R,A,B,C (computes C - A*B)
    4195             :     // --- Create(FMSUB);
    4196           0 :     if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
    4197             :       Opc = AArch64::FMSUBSrrr;
    4198             :       RC = &AArch64::FPR32RegClass;
    4199             :     } else {
    4200           0 :       Opc = AArch64::FMSUBDrrr;
    4201           0 :       RC = &AArch64::FPR64RegClass;
    4202             :     }
    4203           0 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    4204           0 :     break;
    4205             :   }
    4206             : 
    4207           2 :   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
    4208           2 :     Opc = AArch64::FMLSv1i32_indexed;
    4209           2 :     RC = &AArch64::FPR32RegClass;
    4210           2 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4211             :                            FMAInstKind::Indexed);
    4212           2 :     break;
    4213             : 
    4214           2 :   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
    4215           2 :     Opc = AArch64::FMLSv1i64_indexed;
    4216           2 :     RC = &AArch64::FPR64RegClass;
    4217           2 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4218             :                            FMAInstKind::Indexed);
    4219           2 :     break;
    4220             : 
    4221           4 :   case MachineCombinerPattern::FMLSv2f32_OP2:
    4222             :   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
    4223           4 :     RC = &AArch64::FPR64RegClass;
    4224           4 :     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
    4225           2 :       Opc = AArch64::FMLSv2i32_indexed;
    4226           2 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4227             :                              FMAInstKind::Indexed);
    4228             :     } else {
    4229           2 :       Opc = AArch64::FMLSv2f32;
    4230           2 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4231             :                              FMAInstKind::Accumulator);
    4232             :     }
    4233             :     break;
    4234             : 
    4235           4 :   case MachineCombinerPattern::FMLSv2f64_OP2:
    4236             :   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
    4237           4 :     RC = &AArch64::FPR128RegClass;
    4238           4 :     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
    4239           2 :       Opc = AArch64::FMLSv2i64_indexed;
    4240           2 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4241             :                              FMAInstKind::Indexed);
    4242             :     } else {
    4243           2 :       Opc = AArch64::FMLSv2f64;
    4244           2 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4245             :                              FMAInstKind::Accumulator);
    4246             :     }
    4247             :     break;
    4248             : 
    4249           4 :   case MachineCombinerPattern::FMLSv4f32_OP2:
    4250             :   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
    4251           4 :     RC = &AArch64::FPR128RegClass;
    4252           4 :     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
    4253           2 :       Opc = AArch64::FMLSv4i32_indexed;
    4254           2 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4255             :                              FMAInstKind::Indexed);
    4256             :     } else {
    4257           2 :       Opc = AArch64::FMLSv4f32;
    4258           2 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4259             :                              FMAInstKind::Accumulator);
    4260             :     }
    4261             :     break;
    4262             :   } // end switch (Pattern)
    4263             :   // Record MUL and ADD/SUB for deletion
    4264         222 :   DelInstrs.push_back(MUL);
    4265         222 :   DelInstrs.push_back(&Root);
    4266             : }
    4267             : 
    4268             : /// \brief Replace csincr-branch sequence by simple conditional branch
    4269             : ///
    4270             : /// Examples:
    4271             : /// 1. \code
    4272             : ///   csinc  w9, wzr, wzr, <condition code>
    4273             : ///   tbnz   w9, #0, 0x44
    4274             : ///    \endcode
    4275             : /// to
    4276             : ///    \code
    4277             : ///   b.<inverted condition code>
    4278             : ///    \endcode
    4279             : ///
    4280             : /// 2. \code
    4281             : ///   csinc w9, wzr, wzr, <condition code>
    4282             : ///   tbz   w9, #0, 0x44
    4283             : ///    \endcode
    4284             : /// to
    4285             : ///    \code
    4286             : ///   b.<condition code>
    4287             : ///    \endcode
    4288             : ///
    4289             : /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
    4290             : /// compare's constant operand is power of 2.
    4291             : ///
    4292             : /// Examples:
    4293             : ///    \code
    4294             : ///   and  w8, w8, #0x400
    4295             : ///   cbnz w8, L1
    4296             : ///    \endcode
    4297             : /// to
    4298             : ///    \code
    4299             : ///   tbnz w8, #10, L1
    4300             : ///    \endcode
    4301             : ///
    4302             : /// \param  MI Conditional Branch
    4303             : /// \return True when the simple conditional branch is generated
    4304             : ///
    4305         914 : bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
    4306         914 :   bool IsNegativeBranch = false;
    4307         914 :   bool IsTestAndBranch = false;
    4308         914 :   unsigned TargetBBInMI = 0;
    4309        1828 :   switch (MI.getOpcode()) {
    4310           0 :   default:
    4311           0 :     llvm_unreachable("Unknown branch instruction?");
    4312             :   case AArch64::Bcc:
    4313             :     return false;
    4314             :   case AArch64::CBZW:
    4315             :   case AArch64::CBZX:
    4316             :     TargetBBInMI = 1;
    4317             :     break;
    4318         180 :   case AArch64::CBNZW:
    4319             :   case AArch64::CBNZX:
    4320         180 :     TargetBBInMI = 1;
    4321         180 :     IsNegativeBranch = true;
    4322         180 :     break;
    4323          80 :   case AArch64::TBZW:
    4324             :   case AArch64::TBZX:
    4325          80 :     TargetBBInMI = 2;
    4326          80 :     IsTestAndBranch = true;
    4327          80 :     break;
    4328          29 :   case AArch64::TBNZW:
    4329             :   case AArch64::TBNZX:
    4330          29 :     TargetBBInMI = 2;
    4331          29 :     IsNegativeBranch = true;
    4332          29 :     IsTestAndBranch = true;
    4333          29 :     break;
    4334             :   }
    4335             :   // So we increment a zero register and test for bits other
    4336             :   // than bit 0? Conservatively bail out in case the verifier
    4337             :   // missed this case.
    4338         378 :   if (IsTestAndBranch && MI.getOperand(1).getImm())
    4339             :     return false;
    4340             : 
    4341             :   // Find Definition.
    4342             :   assert(MI.getParent() && "Incomplete machine instruciton\n");
    4343         340 :   MachineBasicBlock *MBB = MI.getParent();
    4344         340 :   MachineFunction *MF = MBB->getParent();
    4345         340 :   MachineRegisterInfo *MRI = &MF->getRegInfo();
    4346         340 :   unsigned VReg = MI.getOperand(0).getReg();
    4347         340 :   if (!TargetRegisterInfo::isVirtualRegister(VReg))
    4348             :     return false;
    4349             : 
    4350         340 :   MachineInstr *DefMI = MRI->getVRegDef(VReg);
    4351             : 
    4352             :   // Look through COPY instructions to find definition.
    4353         430 :   while (DefMI->isCopy()) {
    4354         197 :     unsigned CopyVReg = DefMI->getOperand(1).getReg();
    4355         197 :     if (!MRI->hasOneNonDBGUse(CopyVReg))
    4356             :       return false;
    4357         140 :     if (!MRI->hasOneDef(CopyVReg))
    4358             :       return false;
    4359          45 :     DefMI = MRI->getVRegDef(CopyVReg);
    4360             :   }
    4361             : 
    4362         376 :   switch (DefMI->getOpcode()) {
    4363             :   default:
    4364             :     return false;
    4365             :   // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
    4366           7 :   case AArch64::ANDWri:
    4367             :   case AArch64::ANDXri: {
    4368           7 :     if (IsTestAndBranch)
    4369             :       return false;
    4370           7 :     if (DefMI->getParent() != MBB)
    4371             :       return false;
    4372           6 :     if (!MRI->hasOneNonDBGUse(VReg))
    4373             :       return false;
    4374             : 
    4375          10 :     bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
    4376          10 :     uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
    4377          10 :         DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
    4378           5 :     if (!isPowerOf2_64(Mask))
    4379             :       return false;
    4380             : 
    4381           5 :     MachineOperand &MO = DefMI->getOperand(1);
    4382           5 :     unsigned NewReg = MO.getReg();
    4383           5 :     if (!TargetRegisterInfo::isVirtualRegister(NewReg))
    4384             :       return false;
    4385             : 
    4386             :     assert(!MRI->def_empty(NewReg) && "Register must be defined.");
    4387             : 
    4388           5 :     MachineBasicBlock &RefToMBB = *MBB;
    4389           5 :     MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
    4390          10 :     DebugLoc DL = MI.getDebugLoc();
    4391           5 :     unsigned Imm = Log2_64(Mask);
    4392           5 :     unsigned Opc = (Imm < 32)
    4393           5 :                        ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
    4394             :                        : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
    4395          15 :     MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
    4396           5 :                               .addReg(NewReg)
    4397          10 :                               .addImm(Imm)
    4398           5 :                               .addMBB(TBB);
    4399             :     // Register lives on to the CBZ now.
    4400           5 :     MO.setIsKill(false);
    4401             : 
    4402             :     // For immediate smaller than 32, we need to use the 32-bit
    4403             :     // variant (W) in all cases. Indeed the 64-bit variant does not
    4404             :     // allow to encode them.
    4405             :     // Therefore, if the input register is 64-bit, we need to take the
    4406             :     // 32-bit sub-part.
    4407           5 :     if (!Is32Bit && Imm < 32)
    4408           3 :       NewMI->getOperand(0).setSubReg(AArch64::sub_32);
    4409           5 :     MI.eraseFromParent();
    4410           5 :     return true;
    4411             :   }
    4412             :   // Look for CSINC
    4413           1 :   case AArch64::CSINCWr:
    4414             :   case AArch64::CSINCXr: {
    4415           2 :     if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
    4416           1 :           DefMI->getOperand(2).getReg() == AArch64::WZR) &&
    4417           0 :         !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
    4418           0 :           DefMI->getOperand(2).getReg() == AArch64::XZR))
    4419             :       return false;
    4420             : 
    4421           1 :     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
    4422             :       return false;
    4423             : 
    4424           1 :     AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
    4425             :     // Convert only when the condition code is not modified between
    4426             :     // the CSINC and the branch. The CC may be used by other
    4427             :     // instructions in between.
    4428           3 :     if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
    4429             :       return false;
    4430           1 :     MachineBasicBlock &RefToMBB = *MBB;
    4431           2 :     MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
    4432           2 :     DebugLoc DL = MI.getDebugLoc();
    4433           1 :     if (IsNegativeBranch)
    4434           1 :       CC = AArch64CC::getInvertedCondCode(CC);
    4435           4 :     BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
    4436           1 :     MI.eraseFromParent();
    4437           1 :     return true;
    4438             :   }
    4439             :   }
    4440             : }
    4441             : 
    4442             : std::pair<unsigned, unsigned>
    4443          20 : AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
    4444          20 :   const unsigned Mask = AArch64II::MO_FRAGMENT;
    4445          40 :   return std::make_pair(TF & Mask, TF & ~Mask);
    4446             : }
    4447             : 
    4448             : ArrayRef<std::pair<unsigned, const char *>>
    4449          22 : AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
    4450             :   using namespace AArch64II;
    4451             : 
    4452             :   static const std::pair<unsigned, const char *> TargetFlags[] = {
    4453             :       {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
    4454             :       {MO_G3, "aarch64-g3"},     {MO_G2, "aarch64-g2"},
    4455             :       {MO_G1, "aarch64-g1"},     {MO_G0, "aarch64-g0"},
    4456             :       {MO_HI12, "aarch64-hi12"}};
    4457          22 :   return makeArrayRef(TargetFlags);
    4458             : }
    4459             : 
    4460             : ArrayRef<std::pair<unsigned, const char *>>
    4461          18 : AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
    4462             :   using namespace AArch64II;
    4463             : 
    4464             :   static const std::pair<unsigned, const char *> TargetFlags[] = {
    4465             :       {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}};
    4466          18 :   return makeArrayRef(TargetFlags);
    4467             : }
    4468             : 
    4469             : ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
    4470          10 : AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
    4471             :   static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
    4472             :       {{MOSuppressPair, "aarch64-suppress-pair"},
    4473             :        {MOStridedAccess, "aarch64-strided-access"}};
    4474          10 :   return makeArrayRef(TargetFlags);
    4475             : }
    4476             : 
    4477             : // Constants defining how certain sequences should be outlined.
    4478             : const unsigned MachineOutlinerDefaultFn = 0;
    4479             : const unsigned MachineOutlinerTailCallFn = 1;
    4480             : 
    4481          35 : std::pair<size_t, unsigned> AArch64InstrInfo::getOutliningCallOverhead(
    4482             :     MachineBasicBlock::iterator &StartIt,
    4483             :     MachineBasicBlock::iterator &EndIt) const {
    4484             :   // Is this a tail-call?
    4485          70 :   if (EndIt->isTerminator()) {
    4486             :     // Yes, so we only have to emit a call. Return a cost of 1 + signify that
    4487             :     // this candidate should be tail-called.
    4488          32 :     return std::make_pair(1, MachineOutlinerTailCallFn);
    4489             :   }
    4490             : 
    4491             :   // No, so save + restore LR.
    4492          38 :   return std::make_pair(3, MachineOutlinerDefaultFn);
    4493             : }
    4494             : 
    4495          13 : std::pair<size_t, unsigned> AArch64InstrInfo::getOutliningFrameOverhead(
    4496             :     std::vector<std::pair<MachineBasicBlock::iterator,
    4497             :                           MachineBasicBlock::iterator>> &CandidateClass) const {
    4498             : 
    4499             :   // Is the last instruction in this class a terminator?
    4500          39 :   if (CandidateClass[0].second->isTerminator())
    4501          12 :     return std::make_pair(0, MachineOutlinerTailCallFn);
    4502             : 
    4503             :   // No, so we have to add a return to the end.
    4504          14 :   return std::make_pair(1, MachineOutlinerDefaultFn);
    4505             : }
    4506             : 
    4507           7 : bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const {
    4508          14 :   return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
    4509             : }
    4510             : 
    4511             : AArch64GenInstrInfo::MachineOutlinerInstrType
    4512          76 : AArch64InstrInfo::getOutliningType(MachineInstr &MI) const {
    4513             : 
    4514          76 :   MachineFunction *MF = MI.getParent()->getParent();
    4515          76 :   AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
    4516             : 
    4517             :   // Don't outline LOHs.
    4518          76 :   if (FuncInfo->getLOHRelated().count(&MI))
    4519             :     return MachineOutlinerInstrType::Illegal;
    4520             : 
    4521             :   // Don't allow debug values to impact outlining type.
    4522         152 :   if (MI.isDebugValue() || MI.isIndirectDebugValue())
    4523             :     return MachineOutlinerInstrType::Invisible;
    4524             : 
    4525             :   // Is this a terminator for a basic block?
    4526          76 :   if (MI.isTerminator()) {
    4527             : 
    4528             :     // Is this the end of a function?
    4529          14 :     if (MI.getParent()->succ_empty())
    4530             :       return MachineOutlinerInstrType::Legal;
    4531             : 
    4532             :     // It's not, so don't outline it.
    4533           0 :     return MachineOutlinerInstrType::Illegal;
    4534             :   }
    4535             : 
    4536             :   // Don't outline positions.
    4537          63 :   if (MI.isPosition())
    4538             :     return MachineOutlinerInstrType::Illegal;
    4539             : 
    4540             :   // Don't touch the link register or W30.
    4541         186 :   if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
    4542         120 :       MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
    4543             :     return MachineOutlinerInstrType::Illegal;
    4544             : 
    4545             :   // Make sure none of the operands are un-outlinable.
    4546         240 :   for (const MachineOperand &MOP : MI.operands()) {
    4547         370 :     if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
    4548         185 :         MOP.isTargetIndex())
    4549             :       return MachineOutlinerInstrType::Illegal;
    4550             :   }
    4551             : 
    4552             :   // Does this use the stack?
    4553         151 :   if (MI.modifiesRegister(AArch64::SP, &RI) ||
    4554          82 :       MI.readsRegister(AArch64::SP, &RI)) {
    4555             : 
    4556             :     // Is it a memory operation?
    4557          22 :     if (MI.mayLoadOrStore()) {
    4558             :       unsigned Base;  // Filled with the base regiser of MI.
    4559             :       int64_t Offset; // Filled with the offset of MI.
    4560             :       unsigned DummyWidth;
    4561             : 
    4562             :       // Does it allow us to offset the base register and is the base SP?
    4563          20 :       if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
    4564          10 :           Base != AArch64::SP)
    4565             :         return MachineOutlinerInstrType::Illegal;
    4566             : 
    4567             :       // Find the minimum/maximum offset for this instruction and check if
    4568             :       // fixing it up would be in range.
    4569             :       int64_t MinOffset, MaxOffset;
    4570             :       unsigned DummyScale;
    4571          20 :       getMemOpInfo(MI.getOpcode(), DummyScale, DummyWidth, MinOffset,
    4572             :                    MaxOffset);
    4573             : 
    4574             :       // TODO: We should really test what happens if an instruction overflows.
    4575             :       // This is tricky to test with IR tests, but when the outliner is moved
    4576             :       // to a MIR test, it really ought to be checked.
    4577          10 :       if (Offset + 16 < MinOffset || Offset + 16 > MaxOffset)
    4578             :         return MachineOutlinerInstrType::Illegal;
    4579             : 
    4580             :       // It's in range, so we can outline it.
    4581          10 :       return MachineOutlinerInstrType::Legal;
    4582             :     }
    4583             : 
    4584             :     // We can't fix it up, so don't outline it.
    4585             :     return MachineOutlinerInstrType::Illegal;
    4586             :   }
    4587             : 
    4588             :   return MachineOutlinerInstrType::Legal;
    4589             : }
    4590             : 
    4591           1 : void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
    4592          18 :   for (MachineInstr &MI : MBB) {
    4593             :     unsigned Base, Width;
    4594             :     int64_t Offset;
    4595             : 
    4596             :     // Is this a load or store with an immediate offset with SP as the base?
    4597          14 :     if (!MI.mayLoadOrStore() ||
    4598           7 :         !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
    4599           0 :         Base != AArch64::SP)
    4600           7 :       continue;
    4601             : 
    4602             :     // It is, so we have to fix it up.
    4603             :     unsigned Scale;
    4604             :     int64_t Dummy1, Dummy2;
    4605             : 
    4606           0 :     MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
    4607             :     assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
    4608           0 :     getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
    4609             :     assert(Scale != 0 && "Unexpected opcode!");
    4610             : 
    4611             :     // We've pushed the return address to the stack, so add 16 to the offset.
    4612             :     // This is safe, since we already checked if it would overflow when we
    4613             :     // checked if this instruction was legal to outline.
    4614           0 :     int64_t NewImm = (Offset + 16) / Scale;
    4615           0 :     StackOffsetOperand.setImm(NewImm);
    4616             :   }
    4617           1 : }
    4618             : 
    4619           2 : void AArch64InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB,
    4620             :                                               MachineFunction &MF,
    4621             :                                               unsigned FrameClass) const {
    4622             : 
    4623             :   // If this is a tail call outlined function, then there's already a return.
    4624           2 :   if (FrameClass == MachineOutlinerTailCallFn)
    4625             :     return;
    4626             : 
    4627             :   // It's not a tail call, so we have to insert the return ourselves.
    4628           6 :   MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
    4629           1 :                           .addReg(AArch64::LR, RegState::Undef);
    4630           2 :   MBB.insert(MBB.end(), ret);
    4631             : 
    4632             :   // Walk over the basic block and fix up all the stack accesses.
    4633           1 :   fixupPostOutline(MBB);
    4634             : }
    4635             : 
    4636           2 : void AArch64InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB,
    4637             :                                               MachineFunction &MF,
    4638           2 :                                               unsigned FrameClass) const {}
    4639             : 
    4640           5 : MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
    4641             :     Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
    4642             :     MachineFunction &MF, unsigned CallClass) const {
    4643             : 
    4644             :   // Are we tail calling?
    4645           5 :   if (CallClass == MachineOutlinerTailCallFn) {
    4646             :     // If yes, then we can just branch to the label.
    4647          12 :     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::B))
    4648           4 :                             .addGlobalAddress(M.getNamedValue(MF.getName())));
    4649           2 :     return It;
    4650             :   }
    4651             : 
    4652             :   // We're not tail calling, so we have to save LR before the call and restore
    4653             :   // it after.
    4654          18 :   MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
    4655           3 :                               .addReg(AArch64::SP, RegState::Define)
    4656           3 :                               .addReg(AArch64::LR)
    4657           3 :                               .addReg(AArch64::SP)
    4658           3 :                               .addImm(-16);
    4659           3 :   It = MBB.insert(It, STRXpre);
    4660           6 :   It++;
    4661             : 
    4662             :   // Insert the call.
    4663          18 :   It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
    4664           6 :                           .addGlobalAddress(M.getNamedValue(MF.getName())));
    4665             : 
    4666           6 :   It++;
    4667             : 
    4668             :   // Restore the link register.
    4669          18 :   MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
    4670           3 :                                .addReg(AArch64::SP, RegState::Define)
    4671           3 :                                .addReg(AArch64::LR, RegState::Define)
    4672           3 :                                .addReg(AArch64::SP)
    4673           3 :                                .addImm(16);
    4674           3 :   It = MBB.insert(It, LDRXpost);
    4675             : 
    4676           3 :   return It;
    4677      216918 : }

Generated by: LCOV version 1.13