LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64InstrInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1581 1887 83.8 %
Date: 2018-09-23 13:06:45 Functions: 86 88 97.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file contains the AArch64 implementation of the TargetInstrInfo class.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "AArch64InstrInfo.h"
      15             : #include "AArch64MachineFunctionInfo.h"
      16             : #include "AArch64Subtarget.h"
      17             : #include "MCTargetDesc/AArch64AddressingModes.h"
      18             : #include "Utils/AArch64BaseInfo.h"
      19             : #include "llvm/ADT/ArrayRef.h"
      20             : #include "llvm/ADT/STLExtras.h"
      21             : #include "llvm/ADT/SmallVector.h"
      22             : #include "llvm/CodeGen/MachineBasicBlock.h"
      23             : #include "llvm/CodeGen/MachineFrameInfo.h"
      24             : #include "llvm/CodeGen/MachineFunction.h"
      25             : #include "llvm/CodeGen/MachineInstr.h"
      26             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      27             : #include "llvm/CodeGen/MachineMemOperand.h"
      28             : #include "llvm/CodeGen/MachineOperand.h"
      29             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      30             : #include "llvm/CodeGen/MachineModuleInfo.h"
      31             : #include "llvm/CodeGen/StackMaps.h"
      32             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      33             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      34             : #include "llvm/IR/DebugLoc.h"
      35             : #include "llvm/IR/GlobalValue.h"
      36             : #include "llvm/MC/MCInst.h"
      37             : #include "llvm/MC/MCInstrDesc.h"
      38             : #include "llvm/Support/Casting.h"
      39             : #include "llvm/Support/CodeGen.h"
      40             : #include "llvm/Support/CommandLine.h"
      41             : #include "llvm/Support/Compiler.h"
      42             : #include "llvm/Support/ErrorHandling.h"
      43             : #include "llvm/Support/MathExtras.h"
      44             : #include "llvm/Target/TargetMachine.h"
      45             : #include "llvm/Target/TargetOptions.h"
      46             : #include <cassert>
      47             : #include <cstdint>
      48             : #include <iterator>
      49             : #include <utility>
      50             : 
      51             : using namespace llvm;
      52             : 
      53             : #define GET_INSTRINFO_CTOR_DTOR
      54             : #include "AArch64GenInstrInfo.inc"
      55             : 
      56             : static cl::opt<unsigned> TBZDisplacementBits(
      57             :     "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
      58             :     cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
      59             : 
      60             : static cl::opt<unsigned> CBZDisplacementBits(
      61             :     "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
      62             :     cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
      63             : 
      64             : static cl::opt<unsigned>
      65             :     BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
      66             :                         cl::desc("Restrict range of Bcc instructions (DEBUG)"));
      67             : 
      68        1539 : AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
      69             :     : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
      70        1539 :       RI(STI.getTargetTriple()), Subtarget(STI) {}
      71             : 
      72             : /// GetInstSize - Return the number of bytes of code the specified
      73             : /// instruction may be.  This returns the maximum number of bytes.
      74       87514 : unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
      75       87514 :   const MachineBasicBlock &MBB = *MI.getParent();
      76       87514 :   const MachineFunction *MF = MBB.getParent();
      77       87514 :   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
      78             : 
      79      175028 :   if (MI.getOpcode() == AArch64::INLINEASM)
      80         216 :     return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
      81             : 
      82             :   // FIXME: We currently only handle pseudoinstructions that don't get expanded
      83             :   //        before the assembly printer.
      84             :   unsigned NumBytes = 0;
      85             :   const MCInstrDesc &Desc = MI.getDesc();
      86       87298 :   switch (Desc.getOpcode()) {
      87             :   default:
      88             :     // Anything not explicitly designated otherwise is a normal 4-byte insn.
      89             :     NumBytes = 4;
      90             :     break;
      91        3300 :   case TargetOpcode::DBG_VALUE:
      92             :   case TargetOpcode::EH_LABEL:
      93             :   case TargetOpcode::IMPLICIT_DEF:
      94             :   case TargetOpcode::KILL:
      95             :     NumBytes = 0;
      96        3300 :     break;
      97          17 :   case TargetOpcode::STACKMAP:
      98             :     // The upper bound for a stackmap intrinsic is the full length of its shadow
      99          17 :     NumBytes = StackMapOpers(&MI).getNumPatchBytes();
     100             :     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
     101          17 :     break;
     102          48 :   case TargetOpcode::PATCHPOINT:
     103             :     // The size of the patchpoint intrinsic is the number of bytes requested
     104          48 :     NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
     105             :     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
     106          48 :     break;
     107          30 :   case AArch64::TLSDESC_CALLSEQ:
     108             :     // This gets lowered to an instruction sequence which takes 16 bytes
     109             :     NumBytes = 16;
     110          30 :     break;
     111             :   }
     112             : 
     113             :   return NumBytes;
     114             : }
     115             : 
     116       38387 : static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
     117             :                             SmallVectorImpl<MachineOperand> &Cond) {
     118             :   // Block ends with fall-through condbranch.
     119       76774 :   switch (LastInst->getOpcode()) {
     120           0 :   default:
     121           0 :     llvm_unreachable("Unknown branch instruction?");
     122       18040 :   case AArch64::Bcc:
     123       18040 :     Target = LastInst->getOperand(1).getMBB();
     124       18040 :     Cond.push_back(LastInst->getOperand(0));
     125       18040 :     break;
     126       12563 :   case AArch64::CBZW:
     127             :   case AArch64::CBZX:
     128             :   case AArch64::CBNZW:
     129             :   case AArch64::CBNZX:
     130       12563 :     Target = LastInst->getOperand(1).getMBB();
     131       25126 :     Cond.push_back(MachineOperand::CreateImm(-1));
     132       37689 :     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
     133       12563 :     Cond.push_back(LastInst->getOperand(0));
     134       12563 :     break;
     135        7784 :   case AArch64::TBZW:
     136             :   case AArch64::TBZX:
     137             :   case AArch64::TBNZW:
     138             :   case AArch64::TBNZX:
     139        7784 :     Target = LastInst->getOperand(2).getMBB();
     140       15568 :     Cond.push_back(MachineOperand::CreateImm(-1));
     141       23352 :     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
     142        7784 :     Cond.push_back(LastInst->getOperand(0));
     143       15568 :     Cond.push_back(LastInst->getOperand(1));
     144             :   }
     145       38387 : }
     146             : 
     147             : static unsigned getBranchDisplacementBits(unsigned Opc) {
     148        1351 :   switch (Opc) {
     149           0 :   default:
     150           0 :     llvm_unreachable("unexpected opcode!");
     151             :   case AArch64::B:
     152             :     return 64;
     153             :   case AArch64::TBNZW:
     154             :   case AArch64::TBZW:
     155             :   case AArch64::TBNZX:
     156             :   case AArch64::TBZX:
     157             :     return TBZDisplacementBits;
     158             :   case AArch64::CBNZW:
     159             :   case AArch64::CBZW:
     160             :   case AArch64::CBNZX:
     161             :   case AArch64::CBZX:
     162             :     return CBZDisplacementBits;
     163             :   case AArch64::Bcc:
     164             :     return BCCDisplacementBits;
     165             :   }
     166             : }
     167             : 
     168        1351 : bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
     169             :                                              int64_t BrOffset) const {
     170             :   unsigned Bits = getBranchDisplacementBits(BranchOp);
     171             :   assert(Bits >= 3 && "max branch displacement must be enough to jump"
     172             :                       "over conditional branch expansion");
     173        1107 :   return isIntN(Bits, BrOffset / 4);
     174             : }
     175             : 
     176             : MachineBasicBlock *
     177        1429 : AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
     178        2858 :   switch (MI.getOpcode()) {
     179           0 :   default:
     180           0 :     llvm_unreachable("unexpected opcode!");
     181         244 :   case AArch64::B:
     182         244 :     return MI.getOperand(0).getMBB();
     183         211 :   case AArch64::TBZW:
     184             :   case AArch64::TBNZW:
     185             :   case AArch64::TBZX:
     186             :   case AArch64::TBNZX:
     187         211 :     return MI.getOperand(2).getMBB();
     188         974 :   case AArch64::CBZW:
     189             :   case AArch64::CBNZW:
     190             :   case AArch64::CBZX:
     191             :   case AArch64::CBNZX:
     192             :   case AArch64::Bcc:
     193         974 :     return MI.getOperand(1).getMBB();
     194             :   }
     195             : }
     196             : 
     197             : // Branch analysis.
     198      394458 : bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
     199             :                                      MachineBasicBlock *&TBB,
     200             :                                      MachineBasicBlock *&FBB,
     201             :                                      SmallVectorImpl<MachineOperand> &Cond,
     202             :                                      bool AllowModify) const {
     203             :   // If the block has no terminators, it just falls into the block after it.
     204      394458 :   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
     205      394458 :   if (I == MBB.end())
     206             :     return false;
     207             : 
     208      393036 :   if (!isUnpredicatedTerminator(*I))
     209             :     return false;
     210             : 
     211             :   // Get the last instruction in the block.
     212             :   MachineInstr *LastInst = &*I;
     213             : 
     214             :   // If there is only one terminator instruction, process it.
     215      369158 :   unsigned LastOpc = LastInst->getOpcode();
     216      723269 :   if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
     217      351874 :     if (isUncondBranchOpcode(LastOpc)) {
     218        7174 :       TBB = LastInst->getOperand(0).getMBB();
     219        7174 :       return false;
     220             :     }
     221             :     if (isCondBranchOpcode(LastOpc)) {
     222             :       // Block ends with fall-through condbranch.
     223       21360 :       parseCondBranch(LastInst, TBB, Cond);
     224       21360 :       return false;
     225             :     }
     226             :     return true; // Can't handle indirect branch.
     227             :   }
     228             : 
     229             :   // Get the instruction before it if it is a terminator.
     230             :   MachineInstr *SecondLastInst = &*I;
     231       17284 :   unsigned SecondLastOpc = SecondLastInst->getOpcode();
     232             : 
     233             :   // If AllowModify is true and the block ends with two or more unconditional
     234             :   // branches, delete all but the first unconditional branch.
     235       17284 :   if (AllowModify && isUncondBranchOpcode(LastOpc)) {
     236        2355 :     while (isUncondBranchOpcode(SecondLastOpc)) {
     237           2 :       LastInst->eraseFromParent();
     238             :       LastInst = SecondLastInst;
     239           2 :       LastOpc = LastInst->getOpcode();
     240           4 :       if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
     241             :         // Return now the only terminator is an unconditional branch.
     242           2 :         TBB = LastInst->getOperand(0).getMBB();
     243           2 :         return false;
     244             :       } else {
     245             :         SecondLastInst = &*I;
     246           0 :         SecondLastOpc = SecondLastInst->getOpcode();
     247             :       }
     248             :     }
     249             :   }
     250             : 
     251             :   // If there are three terminators, we don't know what sort of block this is.
     252       33194 :   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
     253             :     return true;
     254             : 
     255             :   // If the block ends with a B and a Bcc, handle it.
     256       17047 :   if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
     257       17027 :     parseCondBranch(SecondLastInst, TBB, Cond);
     258       17027 :     FBB = LastInst->getOperand(0).getMBB();
     259       17027 :     return false;
     260             :   }
     261             : 
     262             :   // If the block ends with two unconditional branches, handle it.  The second
     263             :   // one is not executed, so remove it.
     264          71 :   if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
     265           0 :     TBB = SecondLastInst->getOperand(0).getMBB();
     266           0 :     I = LastInst;
     267           0 :     if (AllowModify)
     268           0 :       I->eraseFromParent();
     269           0 :     return false;
     270             :   }
     271             : 
     272             :   // ...likewise if it ends with an indirect branch followed by an unconditional
     273             :   // branch.
     274          71 :   if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
     275           0 :     I = LastInst;
     276           0 :     if (AllowModify)
     277           0 :       I->eraseFromParent();
     278           0 :     return true;
     279             :   }
     280             : 
     281             :   // Otherwise, can't handle this.
     282             :   return true;
     283             : }
     284             : 
     285        2018 : bool AArch64InstrInfo::reverseBranchCondition(
     286             :     SmallVectorImpl<MachineOperand> &Cond) const {
     287        2018 :   if (Cond[0].getImm() != -1) {
     288             :     // Regular Bcc
     289        1295 :     AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
     290        1295 :     Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
     291             :   } else {
     292             :     // Folded compare-and-branch
     293         723 :     switch (Cond[1].getImm()) {
     294           0 :     default:
     295           0 :       llvm_unreachable("Unknown conditional branch!");
     296         181 :     case AArch64::CBZW:
     297             :       Cond[1].setImm(AArch64::CBNZW);
     298             :       break;
     299         174 :     case AArch64::CBNZW:
     300             :       Cond[1].setImm(AArch64::CBZW);
     301             :       break;
     302          52 :     case AArch64::CBZX:
     303             :       Cond[1].setImm(AArch64::CBNZX);
     304             :       break;
     305          51 :     case AArch64::CBNZX:
     306             :       Cond[1].setImm(AArch64::CBZX);
     307             :       break;
     308         112 :     case AArch64::TBZW:
     309             :       Cond[1].setImm(AArch64::TBNZW);
     310             :       break;
     311         111 :     case AArch64::TBNZW:
     312             :       Cond[1].setImm(AArch64::TBZW);
     313             :       break;
     314          24 :     case AArch64::TBZX:
     315             :       Cond[1].setImm(AArch64::TBNZX);
     316             :       break;
     317          18 :     case AArch64::TBNZX:
     318             :       Cond[1].setImm(AArch64::TBZX);
     319             :       break;
     320             :     }
     321             :   }
     322             : 
     323        2018 :   return false;
     324             : }
     325             : 
     326        3777 : unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
     327             :                                         int *BytesRemoved) const {
     328        3777 :   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
     329        3777 :   if (I == MBB.end())
     330             :     return 0;
     331             : 
     332        7554 :   if (!isUncondBranchOpcode(I->getOpcode()) &&
     333             :       !isCondBranchOpcode(I->getOpcode()))
     334             :     return 0;
     335             : 
     336             :   // Remove the branch.
     337        3570 :   I->eraseFromParent();
     338             : 
     339        3570 :   I = MBB.end();
     340             : 
     341        3570 :   if (I == MBB.begin()) {
     342         395 :     if (BytesRemoved)
     343           2 :       *BytesRemoved = 4;
     344         395 :     return 1;
     345             :   }
     346             :   --I;
     347        3175 :   if (!isCondBranchOpcode(I->getOpcode())) {
     348        2084 :     if (BytesRemoved)
     349           6 :       *BytesRemoved = 4;
     350        2084 :     return 1;
     351             :   }
     352             : 
     353             :   // Remove the branch.
     354        1091 :   I->eraseFromParent();
     355        1091 :   if (BytesRemoved)
     356           2 :     *BytesRemoved = 8;
     357             : 
     358             :   return 2;
     359             : }
     360             : 
     361        2639 : void AArch64InstrInfo::instantiateCondBranch(
     362             :     MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
     363             :     ArrayRef<MachineOperand> Cond) const {
     364        2639 :   if (Cond[0].getImm() != -1) {
     365             :     // Regular Bcc
     366        1599 :     BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
     367             :   } else {
     368             :     // Folded compare-and-branch
     369             :     // Note that we use addOperand instead of addReg to keep the flags.
     370             :     const MachineInstrBuilder MIB =
     371        1040 :         BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
     372        1040 :     if (Cond.size() > 3)
     373         357 :       MIB.addImm(Cond[3].getImm());
     374             :     MIB.addMBB(TBB);
     375             :   }
     376        2639 : }
     377             : 
     378        3582 : unsigned AArch64InstrInfo::insertBranch(
     379             :     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
     380             :     ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
     381             :   // Shouldn't be a fall through.
     382             :   assert(TBB && "insertBranch must not be told to insert a fallthrough");
     383             : 
     384        3582 :   if (!FBB) {
     385        3513 :     if (Cond.empty()) // Unconditional branch?
     386         943 :       BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
     387             :     else
     388        2570 :       instantiateCondBranch(MBB, DL, TBB, Cond);
     389             : 
     390        3513 :     if (BytesAdded)
     391           2 :       *BytesAdded = 4;
     392             : 
     393        3513 :     return 1;
     394             :   }
     395             : 
     396             :   // Two-way conditional branch.
     397          69 :   instantiateCondBranch(MBB, DL, TBB, Cond);
     398          69 :   BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
     399             : 
     400          69 :   if (BytesAdded)
     401          10 :     *BytesAdded = 8;
     402             : 
     403             :   return 2;
     404             : }
     405             : 
     406             : // Find the original register that VReg is copied from.
     407         240 : static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
     408         486 :   while (TargetRegisterInfo::isVirtualRegister(VReg)) {
     409         397 :     const MachineInstr *DefMI = MRI.getVRegDef(VReg);
     410             :     if (!DefMI->isFullCopy())
     411         151 :       return VReg;
     412         246 :     VReg = DefMI->getOperand(1).getReg();
     413             :   }
     414             :   return VReg;
     415             : }
     416             : 
     417             : // Determine if VReg is defined by an instruction that can be folded into a
     418             : // csel instruction. If so, return the folded opcode, and the replacement
     419             : // register.
     420         224 : static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
     421             :                                 unsigned *NewVReg = nullptr) {
     422         224 :   VReg = removeCopies(MRI, VReg);
     423         224 :   if (!TargetRegisterInfo::isVirtualRegister(VReg))
     424             :     return 0;
     425             : 
     426         151 :   bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
     427         151 :   const MachineInstr *DefMI = MRI.getVRegDef(VReg);
     428             :   unsigned Opc = 0;
     429             :   unsigned SrcOpNum = 0;
     430         302 :   switch (DefMI->getOpcode()) {
     431           0 :   case AArch64::ADDSXri:
     432             :   case AArch64::ADDSWri:
     433             :     // if NZCV is used, do not fold.
     434           0 :     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
     435             :       return 0;
     436             :     // fall-through to ADDXri and ADDWri.
     437             :     LLVM_FALLTHROUGH;
     438             :   case AArch64::ADDXri:
     439             :   case AArch64::ADDWri:
     440             :     // add x, 1 -> csinc.
     441         186 :     if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
     442          10 :         DefMI->getOperand(3).getImm() != 0)
     443             :       return 0;
     444             :     SrcOpNum = 1;
     445          10 :     Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
     446             :     break;
     447             : 
     448           8 :   case AArch64::ORNXrr:
     449             :   case AArch64::ORNWrr: {
     450             :     // not x -> csinv, represented as orn dst, xzr, src.
     451           8 :     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
     452           8 :     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
     453             :       return 0;
     454             :     SrcOpNum = 2;
     455           8 :     Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
     456             :     break;
     457             :   }
     458             : 
     459           8 :   case AArch64::SUBSXrr:
     460             :   case AArch64::SUBSWrr:
     461             :     // if NZCV is used, do not fold.
     462           8 :     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
     463             :       return 0;
     464             :     // fall-through to SUBXrr and SUBWrr.
     465             :     LLVM_FALLTHROUGH;
     466             :   case AArch64::SUBXrr:
     467             :   case AArch64::SUBWrr: {
     468             :     // neg x -> csneg, represented as sub dst, xzr, src.
     469           8 :     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
     470           8 :     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
     471             :       return 0;
     472             :     SrcOpNum = 2;
     473           8 :     Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
     474             :     break;
     475             :   }
     476             :   default:
     477             :     return 0;
     478             :   }
     479             :   assert(Opc && SrcOpNum && "Missing parameters");
     480             : 
     481          26 :   if (NewVReg)
     482          26 :     *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
     483             :   return Opc;
     484             : }
     485             : 
     486          97 : bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
     487             :                                        ArrayRef<MachineOperand> Cond,
     488             :                                        unsigned TrueReg, unsigned FalseReg,
     489             :                                        int &CondCycles, int &TrueCycles,
     490             :                                        int &FalseCycles) const {
     491             :   // Check register classes.
     492          97 :   const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
     493             :   const TargetRegisterClass *RC =
     494         194 :       RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
     495          97 :   if (!RC)
     496             :     return false;
     497             : 
     498             :   // Expanding cbz/tbz requires an extra cycle of latency on the condition.
     499          97 :   unsigned ExtraCondLat = Cond.size() != 1;
     500             : 
     501             :   // GPRs are handled by csel.
     502             :   // FIXME: Fold in x+1, -x, and ~x when applicable.
     503         194 :   if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
     504          41 :       AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
     505             :     // Single-cycle csel, csinc, csinv, and csneg.
     506          94 :     CondCycles = 1 + ExtraCondLat;
     507          94 :     TrueCycles = FalseCycles = 1;
     508          94 :     if (canFoldIntoCSel(MRI, TrueReg))
     509           6 :       TrueCycles = 0;
     510          88 :     else if (canFoldIntoCSel(MRI, FalseReg))
     511           7 :       FalseCycles = 0;
     512          94 :     return true;
     513             :   }
     514             : 
     515             :   // Scalar floating point is handled by fcsel.
     516             :   // FIXME: Form fabs, fmin, and fmax when applicable.
     517           6 :   if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
     518           3 :       AArch64::FPR32RegClass.hasSubClassEq(RC)) {
     519           0 :     CondCycles = 5 + ExtraCondLat;
     520           0 :     TrueCycles = FalseCycles = 2;
     521           0 :     return true;
     522             :   }
     523             : 
     524             :   // Can't do vectors.
     525             :   return false;
     526             : }
     527             : 
     528          24 : void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
     529             :                                     MachineBasicBlock::iterator I,
     530             :                                     const DebugLoc &DL, unsigned DstReg,
     531             :                                     ArrayRef<MachineOperand> Cond,
     532             :                                     unsigned TrueReg, unsigned FalseReg) const {
     533          24 :   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
     534             : 
     535             :   // Parse the condition code, see parseCondBranch() above.
     536             :   AArch64CC::CondCode CC;
     537          24 :   switch (Cond.size()) {
     538           0 :   default:
     539           0 :     llvm_unreachable("Unknown condition opcode in Cond");
     540          15 :   case 1: // b.cc
     541          15 :     CC = AArch64CC::CondCode(Cond[0].getImm());
     542          15 :     break;
     543           5 :   case 3: { // cbz/cbnz
     544             :     // We must insert a compare against 0.
     545             :     bool Is64Bit;
     546           5 :     switch (Cond[1].getImm()) {
     547           0 :     default:
     548           0 :       llvm_unreachable("Unknown branch opcode in Cond");
     549             :     case AArch64::CBZW:
     550             :       Is64Bit = false;
     551             :       CC = AArch64CC::EQ;
     552             :       break;
     553             :     case AArch64::CBZX:
     554             :       Is64Bit = true;
     555             :       CC = AArch64CC::EQ;
     556             :       break;
     557             :     case AArch64::CBNZW:
     558             :       Is64Bit = false;
     559             :       CC = AArch64CC::NE;
     560             :       break;
     561             :     case AArch64::CBNZX:
     562             :       Is64Bit = true;
     563             :       CC = AArch64CC::NE;
     564             :       break;
     565             :     }
     566           5 :     unsigned SrcReg = Cond[2].getReg();
     567           5 :     if (Is64Bit) {
     568             :       // cmp reg, #0 is actually subs xzr, reg, #0.
     569           2 :       MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
     570           6 :       BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
     571           2 :           .addReg(SrcReg)
     572             :           .addImm(0)
     573             :           .addImm(0);
     574             :     } else {
     575           3 :       MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
     576           9 :       BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
     577           3 :           .addReg(SrcReg)
     578             :           .addImm(0)
     579             :           .addImm(0);
     580             :     }
     581             :     break;
     582             :   }
     583           4 :   case 4: { // tbz/tbnz
     584             :     // We must insert a tst instruction.
     585           4 :     switch (Cond[1].getImm()) {
     586           0 :     default:
     587           0 :       llvm_unreachable("Unknown branch opcode in Cond");
     588             :     case AArch64::TBZW:
     589             :     case AArch64::TBZX:
     590             :       CC = AArch64CC::EQ;
     591             :       break;
     592           2 :     case AArch64::TBNZW:
     593             :     case AArch64::TBNZX:
     594             :       CC = AArch64CC::NE;
     595           2 :       break;
     596             :     }
     597             :     // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
     598           4 :     if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
     599           6 :       BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
     600           2 :           .addReg(Cond[2].getReg())
     601             :           .addImm(
     602           2 :               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
     603             :     else
     604           6 :       BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
     605           2 :           .addReg(Cond[2].getReg())
     606             :           .addImm(
     607           2 :               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
     608             :     break;
     609             :   }
     610             :   }
     611             : 
     612             :   unsigned Opc = 0;
     613             :   const TargetRegisterClass *RC = nullptr;
     614             :   bool TryFold = false;
     615          24 :   if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
     616             :     RC = &AArch64::GPR64RegClass;
     617             :     Opc = AArch64::CSELXr;
     618             :     TryFold = true;
     619          14 :   } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
     620             :     RC = &AArch64::GPR32RegClass;
     621             :     Opc = AArch64::CSELWr;
     622             :     TryFold = true;
     623           0 :   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
     624             :     RC = &AArch64::FPR64RegClass;
     625             :     Opc = AArch64::FCSELDrrr;
     626           0 :   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
     627             :     RC = &AArch64::FPR32RegClass;
     628             :     Opc = AArch64::FCSELSrrr;
     629             :   }
     630             :   assert(RC && "Unsupported regclass");
     631             : 
     632             :   // Try folding simple instructions into the csel.
     633          24 :   if (TryFold) {
     634          24 :     unsigned NewVReg = 0;
     635          24 :     unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
     636          24 :     if (FoldedOpc) {
     637             :       // The folded opcodes csinc, csinc and csneg apply the operation to
     638             :       // FalseReg, so we need to invert the condition.
     639             :       CC = AArch64CC::getInvertedCondCode(CC);
     640             :       TrueReg = FalseReg;
     641             :     } else
     642          18 :       FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
     643             : 
     644             :     // Fold the operation. Leave any dead instructions for DCE to clean up.
     645          24 :     if (FoldedOpc) {
     646          13 :       FalseReg = NewVReg;
     647             :       Opc = FoldedOpc;
     648             :       // The extends the live range of NewVReg.
     649          13 :       MRI.clearKillFlags(NewVReg);
     650             :     }
     651             :   }
     652             : 
     653             :   // Pull all virtual register into the appropriate class.
     654          24 :   MRI.constrainRegClass(TrueReg, RC);
     655          24 :   MRI.constrainRegClass(FalseReg, RC);
     656             : 
     657             :   // Insert the csel.
     658          48 :   BuildMI(MBB, I, DL, get(Opc), DstReg)
     659          24 :       .addReg(TrueReg)
     660          24 :       .addReg(FalseReg)
     661          24 :       .addImm(CC);
     662          24 : }
     663             : 
     664             : /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an  ORRxx.
     665             : static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
     666           5 :   uint64_t Imm = MI.getOperand(1).getImm();
     667           5 :   uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
     668             :   uint64_t Encoding;
     669           5 :   return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
     670             : }
     671             : 
     672             : // FIXME: this implementation should be micro-architecture dependent, so a
     673             : // micro-architecture target hook should be introduced here in future.
     674       13227 : bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
     675       13227 :   if (!Subtarget.hasCustomCheapAsMoveHandling())
     676       12061 :     return MI.isAsCheapAsAMove();
     677             : 
     678        1166 :   if (Subtarget.hasExynosCheapAsMoveHandling()) {
     679         645 :     if (isExynosResetFast(MI) || isExynosShiftLeftFast(MI))
     680          17 :       return true;
     681             :     else
     682         628 :       return MI.isAsCheapAsAMove();
     683             :   }
     684             : 
     685        1042 :   switch (MI.getOpcode()) {
     686             :   default:
     687             :     return false;
     688             : 
     689             :   // add/sub on register without shift
     690          18 :   case AArch64::ADDWri:
     691             :   case AArch64::ADDXri:
     692             :   case AArch64::SUBWri:
     693             :   case AArch64::SUBXri:
     694          18 :     return (MI.getOperand(3).getImm() == 0);
     695             : 
     696             :   // logical ops on immediate
     697           0 :   case AArch64::ANDWri:
     698             :   case AArch64::ANDXri:
     699             :   case AArch64::EORWri:
     700             :   case AArch64::EORXri:
     701             :   case AArch64::ORRWri:
     702             :   case AArch64::ORRXri:
     703           0 :     return true;
     704             : 
     705             :   // logical ops on register without shift
     706           0 :   case AArch64::ANDWrr:
     707             :   case AArch64::ANDXrr:
     708             :   case AArch64::BICWrr:
     709             :   case AArch64::BICXrr:
     710             :   case AArch64::EONWrr:
     711             :   case AArch64::EONXrr:
     712             :   case AArch64::EORWrr:
     713             :   case AArch64::EORXrr:
     714             :   case AArch64::ORNWrr:
     715             :   case AArch64::ORNXrr:
     716             :   case AArch64::ORRWrr:
     717             :   case AArch64::ORRXrr:
     718           0 :     return true;
     719             : 
     720             :   // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
     721             :   // ORRXri, it is as cheap as MOV
     722             :   case AArch64::MOVi32imm:
     723           5 :     return canBeExpandedToORR(MI, 32);
     724             :   case AArch64::MOVi64imm:
     725           0 :     return canBeExpandedToORR(MI, 64);
     726             : 
     727             :   // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
     728             :   // feature.
     729          12 :   case AArch64::FMOVH0:
     730             :   case AArch64::FMOVS0:
     731             :   case AArch64::FMOVD0:
     732          12 :     return Subtarget.hasZeroCycleZeroing();
     733           0 :   case TargetOpcode::COPY:
     734           0 :     return (Subtarget.hasZeroCycleZeroing() &&
     735           0 :             (MI.getOperand(1).getReg() == AArch64::WZR ||
     736             :              MI.getOperand(1).getReg() == AArch64::XZR));
     737             :   }
     738             : 
     739             :   llvm_unreachable("Unknown opcode to check as cheap as a move!");
     740             : }
     741             : 
     742        1197 : bool AArch64InstrInfo::isExynosResetFast(const MachineInstr &MI) const {
     743             :   unsigned Reg, Imm, Shift;
     744             : 
     745        2394 :   switch (MI.getOpcode()) {
     746             :   default:
     747             :     return false;
     748             : 
     749             :   // MOV Rd, SP
     750         322 :   case AArch64::ADDWri:
     751             :   case AArch64::ADDXri:
     752         644 :     if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
     753             :       return false;
     754             : 
     755         252 :     Reg = MI.getOperand(1).getReg();
     756         252 :     Imm = MI.getOperand(2).getImm();
     757         252 :     return ((Reg == AArch64::WSP || Reg == AArch64::SP) && Imm == 0);
     758             : 
     759             :   // Literal
     760           3 :   case AArch64::ADR:
     761             :   case AArch64::ADRP:
     762           3 :     return true;
     763             : 
     764             :   // MOVI Vd, #0
     765          27 :   case AArch64::MOVID:
     766             :   case AArch64::MOVIv8b_ns:
     767             :   case AArch64::MOVIv2d_ns:
     768             :   case AArch64::MOVIv16b_ns:
     769          27 :     Imm = MI.getOperand(1).getImm();
     770          27 :     return (Imm == 0);
     771             : 
     772             :   // MOVI Vd, #0
     773           0 :   case AArch64::MOVIv2i32:
     774             :   case AArch64::MOVIv4i16:
     775             :   case AArch64::MOVIv4i32:
     776             :   case AArch64::MOVIv8i16:
     777           0 :     Imm = MI.getOperand(1).getImm();
     778           0 :     Shift = MI.getOperand(2).getImm();
     779           0 :     return (Imm == 0 && Shift == 0);
     780             : 
     781             :   // MOV Rd, Imm
     782           0 :   case AArch64::MOVNWi:
     783             :   case AArch64::MOVNXi:
     784             : 
     785             :   // MOV Rd, Imm
     786             :   case AArch64::MOVZWi:
     787             :   case AArch64::MOVZXi:
     788           0 :     return true;
     789             : 
     790             :   // MOV Rd, Imm
     791           7 :   case AArch64::ORRWri:
     792             :   case AArch64::ORRXri:
     793          14 :     if (!MI.getOperand(1).isReg())
     794             :       return false;
     795             : 
     796           7 :     Reg = MI.getOperand(1).getReg();
     797           7 :     Imm = MI.getOperand(2).getImm();
     798           7 :     return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Imm == 0);
     799             : 
     800             :   // MOV Rd, Rm
     801          22 :   case AArch64::ORRWrs:
     802             :   case AArch64::ORRXrs:
     803          44 :     if (!MI.getOperand(1).isReg())
     804             :       return false;
     805             : 
     806          22 :     Reg = MI.getOperand(1).getReg();
     807          22 :     Imm = MI.getOperand(3).getImm();
     808             :     Shift = AArch64_AM::getShiftValue(Imm);
     809          22 :     return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Shift == 0);
     810             :   }
     811             : }
     812             : 
     813        1598 : bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr &MI) const {
     814             :   unsigned Imm, Shift;
     815             :   AArch64_AM::ShiftExtendType Ext;
     816             : 
     817        3196 :   switch (MI.getOpcode()) {
     818             :   default:
     819             :     return false;
     820             : 
     821             :   // WriteI
     822         358 :   case AArch64::ADDSWri:
     823             :   case AArch64::ADDSXri:
     824             :   case AArch64::ADDWri:
     825             :   case AArch64::ADDXri:
     826             :   case AArch64::SUBSWri:
     827             :   case AArch64::SUBSXri:
     828             :   case AArch64::SUBWri:
     829             :   case AArch64::SUBXri:
     830         358 :     return true;
     831             : 
     832             :   // WriteISReg
     833         370 :   case AArch64::ADDSWrs:
     834             :   case AArch64::ADDSXrs:
     835             :   case AArch64::ADDWrs:
     836             :   case AArch64::ADDXrs:
     837             :   case AArch64::ANDSWrs:
     838             :   case AArch64::ANDSXrs:
     839             :   case AArch64::ANDWrs:
     840             :   case AArch64::ANDXrs:
     841             :   case AArch64::BICSWrs:
     842             :   case AArch64::BICSXrs:
     843             :   case AArch64::BICWrs:
     844             :   case AArch64::BICXrs:
     845             :   case AArch64::EONWrs:
     846             :   case AArch64::EONXrs:
     847             :   case AArch64::EORWrs:
     848             :   case AArch64::EORXrs:
     849             :   case AArch64::ORNWrs:
     850             :   case AArch64::ORNXrs:
     851             :   case AArch64::ORRWrs:
     852             :   case AArch64::ORRXrs:
     853             :   case AArch64::SUBSWrs:
     854             :   case AArch64::SUBSXrs:
     855             :   case AArch64::SUBWrs:
     856             :   case AArch64::SUBXrs:
     857         370 :     Imm = MI.getOperand(3).getImm();
     858             :     Shift = AArch64_AM::getShiftValue(Imm);
     859             :     Ext = AArch64_AM::getShiftType(Imm);
     860         370 :     return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
     861             : 
     862             :   // WriteIEReg
     863          50 :   case AArch64::ADDSWrx:
     864             :   case AArch64::ADDSXrx:
     865             :   case AArch64::ADDSXrx64:
     866             :   case AArch64::ADDWrx:
     867             :   case AArch64::ADDXrx:
     868             :   case AArch64::ADDXrx64:
     869             :   case AArch64::SUBSWrx:
     870             :   case AArch64::SUBSXrx:
     871             :   case AArch64::SUBSXrx64:
     872             :   case AArch64::SUBWrx:
     873             :   case AArch64::SUBXrx:
     874             :   case AArch64::SUBXrx64:
     875          50 :     Imm = MI.getOperand(3).getImm();
     876             :     Shift = AArch64_AM::getArithShiftValue(Imm);
     877             :     Ext = AArch64_AM::getArithExtendType(Imm);
     878          50 :     return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX));
     879             : 
     880         185 :   case AArch64::PRFMroW:
     881             :   case AArch64::PRFMroX:
     882             : 
     883             :   // WriteLDIdx
     884             :   case AArch64::LDRBBroW:
     885             :   case AArch64::LDRBBroX:
     886             :   case AArch64::LDRHHroW:
     887             :   case AArch64::LDRHHroX:
     888             :   case AArch64::LDRSBWroW:
     889             :   case AArch64::LDRSBWroX:
     890             :   case AArch64::LDRSBXroW:
     891             :   case AArch64::LDRSBXroX:
     892             :   case AArch64::LDRSHWroW:
     893             :   case AArch64::LDRSHWroX:
     894             :   case AArch64::LDRSHXroW:
     895             :   case AArch64::LDRSHXroX:
     896             :   case AArch64::LDRSWroW:
     897             :   case AArch64::LDRSWroX:
     898             :   case AArch64::LDRWroW:
     899             :   case AArch64::LDRWroX:
     900             :   case AArch64::LDRXroW:
     901             :   case AArch64::LDRXroX:
     902             : 
     903             :   case AArch64::LDRBroW:
     904             :   case AArch64::LDRBroX:
     905             :   case AArch64::LDRDroW:
     906             :   case AArch64::LDRDroX:
     907             :   case AArch64::LDRHroW:
     908             :   case AArch64::LDRHroX:
     909             :   case AArch64::LDRSroW:
     910             :   case AArch64::LDRSroX:
     911             : 
     912             :   // WriteSTIdx
     913             :   case AArch64::STRBBroW:
     914             :   case AArch64::STRBBroX:
     915             :   case AArch64::STRHHroW:
     916             :   case AArch64::STRHHroX:
     917             :   case AArch64::STRWroW:
     918             :   case AArch64::STRWroX:
     919             :   case AArch64::STRXroW:
     920             :   case AArch64::STRXroX:
     921             : 
     922             :   case AArch64::STRBroW:
     923             :   case AArch64::STRBroX:
     924             :   case AArch64::STRDroW:
     925             :   case AArch64::STRDroX:
     926             :   case AArch64::STRHroW:
     927             :   case AArch64::STRHroX:
     928             :   case AArch64::STRSroW:
     929             :   case AArch64::STRSroX:
     930         185 :     Imm = MI.getOperand(3).getImm();
     931             :     Ext = AArch64_AM::getMemExtendType(Imm);
     932         185 :     return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
     933             :   }
     934             : }
     935             : 
     936         112 : bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
     937         224 :   switch (MI.getOpcode()) {
     938             :   default:
     939             :     return false;
     940             : 
     941           6 :   case AArch64::ADDWrs:
     942             :   case AArch64::ADDXrs:
     943             :   case AArch64::ADDSWrs:
     944             :   case AArch64::ADDSXrs: {
     945           6 :     unsigned Imm = MI.getOperand(3).getImm();
     946             :     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
     947           6 :     if (ShiftVal == 0)
     948             :       return true;
     949           0 :     return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
     950             :   }
     951             : 
     952          27 :   case AArch64::ADDWrx:
     953             :   case AArch64::ADDXrx:
     954             :   case AArch64::ADDXrx64:
     955             :   case AArch64::ADDSWrx:
     956             :   case AArch64::ADDSXrx:
     957             :   case AArch64::ADDSXrx64: {
     958          27 :     unsigned Imm = MI.getOperand(3).getImm();
     959          27 :     switch (AArch64_AM::getArithExtendType(Imm)) {
     960             :     default:
     961             :       return false;
     962             :     case AArch64_AM::UXTB:
     963             :     case AArch64_AM::UXTH:
     964             :     case AArch64_AM::UXTW:
     965             :     case AArch64_AM::UXTX:
     966          27 :       return AArch64_AM::getArithShiftValue(Imm) <= 4;
     967             :     }
     968             :   }
     969             : 
     970          10 :   case AArch64::SUBWrs:
     971             :   case AArch64::SUBSWrs: {
     972          10 :     unsigned Imm = MI.getOperand(3).getImm();
     973             :     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
     974          10 :     return ShiftVal == 0 ||
     975           0 :            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
     976             :   }
     977             : 
     978           0 :   case AArch64::SUBXrs:
     979             :   case AArch64::SUBSXrs: {
     980           0 :     unsigned Imm = MI.getOperand(3).getImm();
     981             :     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
     982           0 :     return ShiftVal == 0 ||
     983           0 :            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
     984             :   }
     985             : 
     986          20 :   case AArch64::SUBWrx:
     987             :   case AArch64::SUBXrx:
     988             :   case AArch64::SUBXrx64:
     989             :   case AArch64::SUBSWrx:
     990             :   case AArch64::SUBSXrx:
     991             :   case AArch64::SUBSXrx64: {
     992          20 :     unsigned Imm = MI.getOperand(3).getImm();
     993          20 :     switch (AArch64_AM::getArithExtendType(Imm)) {
     994             :     default:
     995             :       return false;
     996             :     case AArch64_AM::UXTB:
     997             :     case AArch64_AM::UXTH:
     998             :     case AArch64_AM::UXTW:
     999             :     case AArch64_AM::UXTX:
    1000          20 :       return AArch64_AM::getArithShiftValue(Imm) == 0;
    1001             :     }
    1002             :   }
    1003             : 
    1004          49 :   case AArch64::LDRBBroW:
    1005             :   case AArch64::LDRBBroX:
    1006             :   case AArch64::LDRBroW:
    1007             :   case AArch64::LDRBroX:
    1008             :   case AArch64::LDRDroW:
    1009             :   case AArch64::LDRDroX:
    1010             :   case AArch64::LDRHHroW:
    1011             :   case AArch64::LDRHHroX:
    1012             :   case AArch64::LDRHroW:
    1013             :   case AArch64::LDRHroX:
    1014             :   case AArch64::LDRQroW:
    1015             :   case AArch64::LDRQroX:
    1016             :   case AArch64::LDRSBWroW:
    1017             :   case AArch64::LDRSBWroX:
    1018             :   case AArch64::LDRSBXroW:
    1019             :   case AArch64::LDRSBXroX:
    1020             :   case AArch64::LDRSHWroW:
    1021             :   case AArch64::LDRSHWroX:
    1022             :   case AArch64::LDRSHXroW:
    1023             :   case AArch64::LDRSHXroX:
    1024             :   case AArch64::LDRSWroW:
    1025             :   case AArch64::LDRSWroX:
    1026             :   case AArch64::LDRSroW:
    1027             :   case AArch64::LDRSroX:
    1028             :   case AArch64::LDRWroW:
    1029             :   case AArch64::LDRWroX:
    1030             :   case AArch64::LDRXroW:
    1031             :   case AArch64::LDRXroX:
    1032             :   case AArch64::PRFMroW:
    1033             :   case AArch64::PRFMroX:
    1034             :   case AArch64::STRBBroW:
    1035             :   case AArch64::STRBBroX:
    1036             :   case AArch64::STRBroW:
    1037             :   case AArch64::STRBroX:
    1038             :   case AArch64::STRDroW:
    1039             :   case AArch64::STRDroX:
    1040             :   case AArch64::STRHHroW:
    1041             :   case AArch64::STRHHroX:
    1042             :   case AArch64::STRHroW:
    1043             :   case AArch64::STRHroX:
    1044             :   case AArch64::STRQroW:
    1045             :   case AArch64::STRQroX:
    1046             :   case AArch64::STRSroW:
    1047             :   case AArch64::STRSroX:
    1048             :   case AArch64::STRWroW:
    1049             :   case AArch64::STRWroX:
    1050             :   case AArch64::STRXroW:
    1051             :   case AArch64::STRXroX: {
    1052          49 :     unsigned IsSigned = MI.getOperand(3).getImm();
    1053          49 :     return !IsSigned;
    1054             :   }
    1055             :   }
    1056             : }
    1057             : 
    1058      112792 : bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
    1059             :                                              unsigned &SrcReg, unsigned &DstReg,
    1060             :                                              unsigned &SubIdx) const {
    1061      225584 :   switch (MI.getOpcode()) {
    1062             :   default:
    1063             :     return false;
    1064         648 :   case AArch64::SBFMXri: // aka sxtw
    1065             :   case AArch64::UBFMXri: // aka uxtw
    1066             :     // Check for the 32 -> 64 bit extension case, these instructions can do
    1067             :     // much more.
    1068         648 :     if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
    1069             :       return false;
    1070             :     // This is a signed or unsigned 32 -> 64 bit extension.
    1071          43 :     SrcReg = MI.getOperand(1).getReg();
    1072          43 :     DstReg = MI.getOperand(0).getReg();
    1073          43 :     SubIdx = AArch64::sub_32;
    1074          43 :     return true;
    1075             :   }
    1076             : }
    1077             : 
    1078        7117 : bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
    1079             :     MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
    1080             :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    1081        7117 :   unsigned BaseRegA = 0, BaseRegB = 0;
    1082        7117 :   int64_t OffsetA = 0, OffsetB = 0;
    1083        7117 :   unsigned WidthA = 0, WidthB = 0;
    1084             : 
    1085             :   assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
    1086             :   assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
    1087             : 
    1088       21345 :   if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
    1089       21339 :       MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
    1090         588 :     return false;
    1091             : 
    1092             :   // Retrieve the base register, offset from the base register and width. Width
    1093             :   // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
    1094             :   // base registers are identical, and the offset of a lower memory access +
    1095             :   // the width doesn't overlap the offset of a higher memory access,
    1096             :   // then the memory accesses are different.
    1097       11142 :   if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
    1098        4613 :       getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
    1099        4194 :     if (BaseRegA == BaseRegB) {
    1100        2436 :       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
    1101        2436 :       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
    1102        2436 :       int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
    1103        2436 :       if (LowOffset + LowWidth <= HighOffset)
    1104        2158 :         return true;
    1105             :     }
    1106             :   }
    1107             :   return false;
    1108             : }
    1109             : 
    1110             : /// analyzeCompare - For a comparison instruction, return the source registers
    1111             : /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
    1112             : /// Return true if the comparison instruction can be analyzed.
    1113        1532 : bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
    1114             :                                       unsigned &SrcReg2, int &CmpMask,
    1115             :                                       int &CmpValue) const {
    1116             :   // The first operand can be a frame index where we'd normally expect a
    1117             :   // register.
    1118             :   assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
    1119        3064 :   if (!MI.getOperand(1).isReg())
    1120             :     return false;
    1121             : 
    1122        3062 :   switch (MI.getOpcode()) {
    1123             :   default:
    1124             :     break;
    1125         841 :   case AArch64::SUBSWrr:
    1126             :   case AArch64::SUBSWrs:
    1127             :   case AArch64::SUBSWrx:
    1128             :   case AArch64::SUBSXrr:
    1129             :   case AArch64::SUBSXrs:
    1130             :   case AArch64::SUBSXrx:
    1131             :   case AArch64::ADDSWrr:
    1132             :   case AArch64::ADDSWrs:
    1133             :   case AArch64::ADDSWrx:
    1134             :   case AArch64::ADDSXrr:
    1135             :   case AArch64::ADDSXrs:
    1136             :   case AArch64::ADDSXrx:
    1137             :     // Replace SUBSWrr with SUBWrr if NZCV is not used.
    1138         841 :     SrcReg = MI.getOperand(1).getReg();
    1139         841 :     SrcReg2 = MI.getOperand(2).getReg();
    1140         841 :     CmpMask = ~0;
    1141         841 :     CmpValue = 0;
    1142         841 :     return true;
    1143         650 :   case AArch64::SUBSWri:
    1144             :   case AArch64::ADDSWri:
    1145             :   case AArch64::SUBSXri:
    1146             :   case AArch64::ADDSXri:
    1147         650 :     SrcReg = MI.getOperand(1).getReg();
    1148         650 :     SrcReg2 = 0;
    1149         650 :     CmpMask = ~0;
    1150             :     // FIXME: In order to convert CmpValue to 0 or 1
    1151         650 :     CmpValue = MI.getOperand(2).getImm() != 0;
    1152         650 :     return true;
    1153          40 :   case AArch64::ANDSWri:
    1154             :   case AArch64::ANDSXri:
    1155             :     // ANDS does not use the same encoding scheme as the others xxxS
    1156             :     // instructions.
    1157          40 :     SrcReg = MI.getOperand(1).getReg();
    1158          40 :     SrcReg2 = 0;
    1159          40 :     CmpMask = ~0;
    1160             :     // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
    1161             :     // while the type of CmpValue is int. When converting uint64_t to int,
    1162             :     // the high 32 bits of uint64_t will be lost.
    1163             :     // In fact it causes a bug in spec2006-483.xalancbmk
    1164             :     // CmpValue is only used to compare with zero in OptimizeCompareInstr
    1165          40 :     CmpValue = AArch64_AM::decodeLogicalImmediate(
    1166          40 :                    MI.getOperand(2).getImm(),
    1167          40 :                    MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
    1168          40 :     return true;
    1169             :   }
    1170             : 
    1171             :   return false;
    1172             : }
    1173             : 
    1174         502 : static bool UpdateOperandRegClass(MachineInstr &Instr) {
    1175         502 :   MachineBasicBlock *MBB = Instr.getParent();
    1176             :   assert(MBB && "Can't get MachineBasicBlock here");
    1177         502 :   MachineFunction *MF = MBB->getParent();
    1178             :   assert(MF && "Can't get MachineFunction here");
    1179         502 :   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
    1180         502 :   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
    1181         502 :   MachineRegisterInfo *MRI = &MF->getRegInfo();
    1182             : 
    1183        2224 :   for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
    1184             :        ++OpIdx) {
    1185        1722 :     MachineOperand &MO = Instr.getOperand(OpIdx);
    1186             :     const TargetRegisterClass *OpRegCstraints =
    1187        1722 :         Instr.getRegClassConstraint(OpIdx, TII, TRI);
    1188             : 
    1189             :     // If there's no constraint, there's nothing to do.
    1190        1722 :     if (!OpRegCstraints)
    1191             :       continue;
    1192             :     // If the operand is a frame index, there's nothing to do here.
    1193             :     // A frame index operand will resolve correctly during PEI.
    1194        1345 :     if (MO.isFI())
    1195             :       continue;
    1196             : 
    1197             :     assert(MO.isReg() &&
    1198             :            "Operand has register constraints without being a register!");
    1199             : 
    1200        1343 :     unsigned Reg = MO.getReg();
    1201        1343 :     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
    1202           0 :       if (!OpRegCstraints->contains(Reg))
    1203             :         return false;
    1204        2803 :     } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
    1205         117 :                !MRI->constrainRegClass(Reg, OpRegCstraints))
    1206             :       return false;
    1207             :   }
    1208             : 
    1209             :   return true;
    1210             : }
    1211             : 
    1212             : /// Return the opcode that does not set flags when possible - otherwise
    1213             : /// return the original opcode. The caller is responsible to do the actual
    1214             : /// substitution and legality checking.
    1215        1139 : static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
    1216             :   // Don't convert all compare instructions, because for some the zero register
    1217             :   // encoding becomes the sp register.
    1218             :   bool MIDefinesZeroReg = false;
    1219        2278 :   if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
    1220             :     MIDefinesZeroReg = true;
    1221             : 
    1222        2278 :   switch (MI.getOpcode()) {
    1223           0 :   default:
    1224           0 :     return MI.getOpcode();
    1225             :   case AArch64::ADDSWrr:
    1226             :     return AArch64::ADDWrr;
    1227           0 :   case AArch64::ADDSWri:
    1228           0 :     return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
    1229           0 :   case AArch64::ADDSWrs:
    1230           0 :     return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
    1231           0 :   case AArch64::ADDSWrx:
    1232           0 :     return AArch64::ADDWrx;
    1233           0 :   case AArch64::ADDSXrr:
    1234           0 :     return AArch64::ADDXrr;
    1235           0 :   case AArch64::ADDSXri:
    1236           0 :     return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
    1237           0 :   case AArch64::ADDSXrs:
    1238           0 :     return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
    1239           0 :   case AArch64::ADDSXrx:
    1240           0 :     return AArch64::ADDXrx;
    1241         464 :   case AArch64::SUBSWrr:
    1242         464 :     return AArch64::SUBWrr;
    1243         137 :   case AArch64::SUBSWri:
    1244         137 :     return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
    1245          23 :   case AArch64::SUBSWrs:
    1246          23 :     return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
    1247           8 :   case AArch64::SUBSWrx:
    1248           8 :     return AArch64::SUBWrx;
    1249         255 :   case AArch64::SUBSXrr:
    1250         255 :     return AArch64::SUBXrr;
    1251         228 :   case AArch64::SUBSXri:
    1252         228 :     return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
    1253          12 :   case AArch64::SUBSXrs:
    1254          12 :     return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
    1255          12 :   case AArch64::SUBSXrx:
    1256          12 :     return AArch64::SUBXrx;
    1257             :   }
    1258             : }
    1259             : 
    1260             : enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
    1261             : 
    1262             : /// True when condition flags are accessed (either by writing or reading)
    1263             : /// on the instruction trace starting at From and ending at To.
    1264             : ///
    1265             : /// Note: If From and To are from different blocks it's assumed CC are accessed
    1266             : ///       on the path.
    1267          35 : static bool areCFlagsAccessedBetweenInstrs(
    1268             :     MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
    1269             :     const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
    1270             :   // Early exit if To is at the beginning of the BB.
    1271          70 :   if (To == To->getParent()->begin())
    1272             :     return true;
    1273             : 
    1274             :   // Check whether the instructions are in the same basic block
    1275             :   // If not, assume the condition flags might get modified somewhere.
    1276          35 :   if (To->getParent() != From->getParent())
    1277             :     return true;
    1278             : 
    1279             :   // From must be above To.
    1280             :   assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
    1281             :                       [From](MachineInstr &MI) {
    1282             :                         return MI.getIterator() == From;
    1283             :                       }) != To->getParent()->rend());
    1284             : 
    1285             :   // We iterate backward starting \p To until we hit \p From.
    1286         129 :   for (--To; To != From; --To) {
    1287             :     const MachineInstr &Instr = *To;
    1288             : 
    1289          94 :     if (((AccessToCheck & AK_Write) &&
    1290          94 :          Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
    1291         188 :         ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
    1292           0 :       return true;
    1293             :   }
    1294             :   return false;
    1295             : }
    1296             : 
    1297             : /// Try to optimize a compare instruction. A compare instruction is an
    1298             : /// instruction which produces AArch64::NZCV. It can be truly compare
    1299             : /// instruction
    1300             : /// when there are no uses of its destination register.
    1301             : ///
    1302             : /// The following steps are tried in order:
    1303             : /// 1. Convert CmpInstr into an unconditional version.
    1304             : /// 2. Remove CmpInstr if above there is an instruction producing a needed
    1305             : ///    condition code or an instruction which can be converted into such an
    1306             : ///    instruction.
    1307             : ///    Only comparison with zero is supported.
    1308        1525 : bool AArch64InstrInfo::optimizeCompareInstr(
    1309             :     MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
    1310             :     int CmpValue, const MachineRegisterInfo *MRI) const {
    1311             :   assert(CmpInstr.getParent());
    1312             :   assert(MRI);
    1313             : 
    1314             :   // Replace SUBSWrr with SUBWrr if NZCV is not used.
    1315        1525 :   int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
    1316        1525 :   if (DeadNZCVIdx != -1) {
    1317         988 :     if (CmpInstr.definesRegister(AArch64::WZR) ||
    1318             :         CmpInstr.definesRegister(AArch64::XZR)) {
    1319           0 :       CmpInstr.eraseFromParent();
    1320           0 :       return true;
    1321             :     }
    1322         494 :     unsigned Opc = CmpInstr.getOpcode();
    1323         494 :     unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
    1324         494 :     if (NewOpc == Opc)
    1325             :       return false;
    1326         494 :     const MCInstrDesc &MCID = get(NewOpc);
    1327             :     CmpInstr.setDesc(MCID);
    1328         494 :     CmpInstr.RemoveOperand(DeadNZCVIdx);
    1329         494 :     bool succeeded = UpdateOperandRegClass(CmpInstr);
    1330             :     (void)succeeded;
    1331             :     assert(succeeded && "Some operands reg class are incompatible!");
    1332         494 :     return true;
    1333             :   }
    1334             : 
    1335             :   // Continue only if we have a "ri" where immediate is zero.
    1336             :   // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
    1337             :   // function.
    1338             :   assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
    1339        1031 :   if (CmpValue != 0 || SrcReg2 != 0)
    1340             :     return false;
    1341             : 
    1342             :   // CmpInstr is a Compare instruction if destination register is not used.
    1343         146 :   if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
    1344             :     return false;
    1345             : 
    1346         137 :   return substituteCmpToZero(CmpInstr, SrcReg, MRI);
    1347             : }
    1348             : 
    1349             : /// Get opcode of S version of Instr.
    1350             : /// If Instr is S version its opcode is returned.
    1351             : /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
    1352             : /// or we are not interested in it.
    1353         171 : static unsigned sForm(MachineInstr &Instr) {
    1354         342 :   switch (Instr.getOpcode()) {
    1355             :   default:
    1356             :     return AArch64::INSTRUCTION_LIST_END;
    1357             : 
    1358           0 :   case AArch64::ADDSWrr:
    1359             :   case AArch64::ADDSWri:
    1360             :   case AArch64::ADDSXrr:
    1361             :   case AArch64::ADDSXri:
    1362             :   case AArch64::SUBSWrr:
    1363             :   case AArch64::SUBSWri:
    1364             :   case AArch64::SUBSXrr:
    1365             :   case AArch64::SUBSXri:
    1366           0 :     return Instr.getOpcode();
    1367             : 
    1368           0 :   case AArch64::ADDWrr:
    1369           0 :     return AArch64::ADDSWrr;
    1370           2 :   case AArch64::ADDWri:
    1371           2 :     return AArch64::ADDSWri;
    1372           0 :   case AArch64::ADDXrr:
    1373           0 :     return AArch64::ADDSXrr;
    1374           4 :   case AArch64::ADDXri:
    1375           4 :     return AArch64::ADDSXri;
    1376           0 :   case AArch64::ADCWr:
    1377           0 :     return AArch64::ADCSWr;
    1378           0 :   case AArch64::ADCXr:
    1379           0 :     return AArch64::ADCSXr;
    1380          12 :   case AArch64::SUBWrr:
    1381          12 :     return AArch64::SUBSWrr;
    1382           0 :   case AArch64::SUBWri:
    1383           0 :     return AArch64::SUBSWri;
    1384           4 :   case AArch64::SUBXrr:
    1385           4 :     return AArch64::SUBSXrr;
    1386          44 :   case AArch64::SUBXri:
    1387          44 :     return AArch64::SUBSXri;
    1388           0 :   case AArch64::SBCWr:
    1389           0 :     return AArch64::SBCSWr;
    1390           0 :   case AArch64::SBCXr:
    1391           0 :     return AArch64::SBCSXr;
    1392           2 :   case AArch64::ANDWri:
    1393           2 :     return AArch64::ANDSWri;
    1394           0 :   case AArch64::ANDXri:
    1395           0 :     return AArch64::ANDSXri;
    1396             :   }
    1397             : }
    1398             : 
    1399             : /// Check if AArch64::NZCV should be alive in successors of MBB.
    1400          34 : static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
    1401          76 :   for (auto *BB : MBB->successors())
    1402          42 :     if (BB->isLiveIn(AArch64::NZCV))
    1403             :       return true;
    1404             :   return false;
    1405             : }
    1406             : 
    1407             : namespace {
    1408             : 
    1409             : struct UsedNZCV {
    1410             :   bool N = false;
    1411             :   bool Z = false;
    1412             :   bool C = false;
    1413             :   bool V = false;
    1414             : 
    1415             :   UsedNZCV() = default;
    1416             : 
    1417             :   UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
    1418             :     this->N |= UsedFlags.N;
    1419             :     this->Z |= UsedFlags.Z;
    1420          38 :     this->C |= UsedFlags.C;
    1421          38 :     this->V |= UsedFlags.V;
    1422             :     return *this;
    1423             :   }
    1424             : };
    1425             : 
    1426             : } // end anonymous namespace
    1427             : 
    1428             : /// Find a condition code used by the instruction.
    1429             : /// Returns AArch64CC::Invalid if either the instruction does not use condition
    1430             : /// codes or we don't optimize CmpInstr in the presence of such instructions.
    1431          38 : static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
    1432          76 :   switch (Instr.getOpcode()) {
    1433             :   default:
    1434             :     return AArch64CC::Invalid;
    1435             : 
    1436          20 :   case AArch64::Bcc: {
    1437          20 :     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
    1438             :     assert(Idx >= 2);
    1439          40 :     return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
    1440             :   }
    1441             : 
    1442          18 :   case AArch64::CSINVWr:
    1443             :   case AArch64::CSINVXr:
    1444             :   case AArch64::CSINCWr:
    1445             :   case AArch64::CSINCXr:
    1446             :   case AArch64::CSELWr:
    1447             :   case AArch64::CSELXr:
    1448             :   case AArch64::CSNEGWr:
    1449             :   case AArch64::CSNEGXr:
    1450             :   case AArch64::FCSELSrrr:
    1451             :   case AArch64::FCSELDrrr: {
    1452          18 :     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
    1453             :     assert(Idx >= 1);
    1454          36 :     return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
    1455             :   }
    1456             :   }
    1457             : }
    1458             : 
    1459             : static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
    1460             :   assert(CC != AArch64CC::Invalid);
    1461             :   UsedNZCV UsedFlags;
    1462             :   switch (CC) {
    1463             :   default:
    1464             :     break;
    1465             : 
    1466             :   case AArch64CC::EQ: // Z set
    1467             :   case AArch64CC::NE: // Z clear
    1468             :     UsedFlags.Z = true;
    1469             :     break;
    1470             : 
    1471             :   case AArch64CC::HI: // Z clear and C set
    1472             :   case AArch64CC::LS: // Z set   or  C clear
    1473             :     UsedFlags.Z = true;
    1474             :     LLVM_FALLTHROUGH;
    1475             :   case AArch64CC::HS: // C set
    1476             :   case AArch64CC::LO: // C clear
    1477             :     UsedFlags.C = true;
    1478             :     break;
    1479             : 
    1480             :   case AArch64CC::MI: // N set
    1481             :   case AArch64CC::PL: // N clear
    1482             :     UsedFlags.N = true;
    1483             :     break;
    1484             : 
    1485             :   case AArch64CC::VS: // V set
    1486             :   case AArch64CC::VC: // V clear
    1487             :     UsedFlags.V = true;
    1488             :     break;
    1489             : 
    1490             :   case AArch64CC::GT: // Z clear, N and V the same
    1491             :   case AArch64CC::LE: // Z set,   N and V differ
    1492             :     UsedFlags.Z = true;
    1493             :     LLVM_FALLTHROUGH;
    1494             :   case AArch64CC::GE: // N and V the same
    1495             :   case AArch64CC::LT: // N and V differ
    1496             :     UsedFlags.N = true;
    1497             :     UsedFlags.V = true;
    1498             :     break;
    1499             :   }
    1500             :   return UsedFlags;
    1501             : }
    1502             : 
    1503             : static bool isADDSRegImm(unsigned Opcode) {
    1504          34 :   return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
    1505             : }
    1506             : 
    1507             : static bool isSUBSRegImm(unsigned Opcode) {
    1508          34 :   return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
    1509             : }
    1510             : 
    1511             : /// Check if CmpInstr can be substituted by MI.
    1512             : ///
    1513             : /// CmpInstr can be substituted:
    1514             : /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
    1515             : /// - and, MI and CmpInstr are from the same MachineBB
    1516             : /// - and, condition flags are not alive in successors of the CmpInstr parent
    1517             : /// - and, if MI opcode is the S form there must be no defs of flags between
    1518             : ///        MI and CmpInstr
    1519             : ///        or if MI opcode is not the S form there must be neither defs of flags
    1520             : ///        nor uses of flags between MI and CmpInstr.
    1521             : /// - and  C/V flags are not used after CmpInstr
    1522          34 : static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
    1523             :                                        const TargetRegisterInfo *TRI) {
    1524             :   assert(MI);
    1525             :   assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
    1526             :   assert(CmpInstr);
    1527             : 
    1528          34 :   const unsigned CmpOpcode = CmpInstr->getOpcode();
    1529          34 :   if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
    1530             :     return false;
    1531             : 
    1532          34 :   if (MI->getParent() != CmpInstr->getParent())
    1533             :     return false;
    1534             : 
    1535          34 :   if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
    1536             :     return false;
    1537             : 
    1538             :   AccessKind AccessToCheck = AK_Write;
    1539          68 :   if (sForm(*MI) != MI->getOpcode())
    1540             :     AccessToCheck = AK_All;
    1541          34 :   if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
    1542             :     return false;
    1543             : 
    1544             :   UsedNZCV NZCVUsedAfterCmp;
    1545          34 :   for (auto I = std::next(CmpInstr->getIterator()),
    1546          34 :             E = CmpInstr->getParent()->instr_end();
    1547         128 :        I != E; ++I) {
    1548             :     const MachineInstr &Instr = *I;
    1549          95 :     if (Instr.readsRegister(AArch64::NZCV, TRI)) {
    1550          38 :       AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
    1551          38 :       if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
    1552             :         return false;
    1553             :       NZCVUsedAfterCmp |= getUsedNZCV(CC);
    1554             :     }
    1555             : 
    1556          95 :     if (Instr.modifiesRegister(AArch64::NZCV, TRI))
    1557             :       break;
    1558             :   }
    1559             : 
    1560          34 :   return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
    1561             : }
    1562             : 
    1563             : /// Substitute an instruction comparing to zero with another instruction
    1564             : /// which produces needed condition flags.
    1565             : ///
    1566             : /// Return true on success.
    1567         137 : bool AArch64InstrInfo::substituteCmpToZero(
    1568             :     MachineInstr &CmpInstr, unsigned SrcReg,
    1569             :     const MachineRegisterInfo *MRI) const {
    1570             :   assert(MRI);
    1571             :   // Get the unique definition of SrcReg.
    1572         137 :   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
    1573         137 :   if (!MI)
    1574             :     return false;
    1575             : 
    1576             :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    1577             : 
    1578         137 :   unsigned NewOpc = sForm(*MI);
    1579         137 :   if (NewOpc == AArch64::INSTRUCTION_LIST_END)
    1580             :     return false;
    1581             : 
    1582          34 :   if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
    1583             :     return false;
    1584             : 
    1585             :   // Update the instruction to set NZCV.
    1586           8 :   MI->setDesc(get(NewOpc));
    1587           8 :   CmpInstr.eraseFromParent();
    1588           8 :   bool succeeded = UpdateOperandRegClass(*MI);
    1589             :   (void)succeeded;
    1590             :   assert(succeeded && "Some operands reg class are incompatible!");
    1591           8 :   MI->addRegisterDefined(AArch64::NZCV, TRI);
    1592           8 :   return true;
    1593             : }
    1594             : 
    1595       10606 : bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
    1596       21212 :   if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
    1597             :     return false;
    1598             : 
    1599          21 :   MachineBasicBlock &MBB = *MI.getParent();
    1600             :   DebugLoc DL = MI.getDebugLoc();
    1601          21 :   unsigned Reg = MI.getOperand(0).getReg();
    1602             :   const GlobalValue *GV =
    1603          21 :       cast<GlobalValue>((*MI.memoperands_begin())->getValue());
    1604          21 :   const TargetMachine &TM = MBB.getParent()->getTarget();
    1605          21 :   unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
    1606             :   const unsigned char MO_NC = AArch64II::MO_NC;
    1607             : 
    1608          21 :   if ((OpFlags & AArch64II::MO_GOT) != 0) {
    1609          30 :     BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
    1610             :         .addGlobalAddress(GV, 0, OpFlags);
    1611          45 :     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
    1612          15 :         .addReg(Reg, RegState::Kill)
    1613             :         .addImm(0)
    1614          15 :         .addMemOperand(*MI.memoperands_begin());
    1615           6 :   } else if (TM.getCodeModel() == CodeModel::Large) {
    1616           4 :     BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
    1617             :         .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
    1618             :         .addImm(0);
    1619           4 :     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
    1620           2 :         .addReg(Reg, RegState::Kill)
    1621             :         .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
    1622             :         .addImm(16);
    1623           4 :     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
    1624           2 :         .addReg(Reg, RegState::Kill)
    1625             :         .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
    1626             :         .addImm(32);
    1627           4 :     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
    1628           2 :         .addReg(Reg, RegState::Kill)
    1629             :         .addGlobalAddress(GV, 0, AArch64II::MO_G3)
    1630             :         .addImm(48);
    1631           6 :     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
    1632           2 :         .addReg(Reg, RegState::Kill)
    1633             :         .addImm(0)
    1634           2 :         .addMemOperand(*MI.memoperands_begin());
    1635           4 :   } else if (TM.getCodeModel() == CodeModel::Tiny) {
    1636           0 :     BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
    1637             :         .addGlobalAddress(GV, 0, OpFlags);
    1638             :   } else {
    1639           8 :     BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
    1640           4 :         .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
    1641           4 :     unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
    1642          12 :     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
    1643           4 :         .addReg(Reg, RegState::Kill)
    1644             :         .addGlobalAddress(GV, 0, LoFlags)
    1645           4 :         .addMemOperand(*MI.memoperands_begin());
    1646             :   }
    1647             : 
    1648          21 :   MBB.erase(MI);
    1649             : 
    1650             :   return true;
    1651             : }
    1652             : 
    1653             : /// Return true if this is this instruction has a non-zero immediate
    1654         401 : bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) {
    1655         802 :   switch (MI.getOpcode()) {
    1656             :   default:
    1657             :     break;
    1658         337 :   case AArch64::ADDSWrs:
    1659             :   case AArch64::ADDSXrs:
    1660             :   case AArch64::ADDWrs:
    1661             :   case AArch64::ADDXrs:
    1662             :   case AArch64::ANDSWrs:
    1663             :   case AArch64::ANDSXrs:
    1664             :   case AArch64::ANDWrs:
    1665             :   case AArch64::ANDXrs:
    1666             :   case AArch64::BICSWrs:
    1667             :   case AArch64::BICSXrs:
    1668             :   case AArch64::BICWrs:
    1669             :   case AArch64::BICXrs:
    1670             :   case AArch64::EONWrs:
    1671             :   case AArch64::EONXrs:
    1672             :   case AArch64::EORWrs:
    1673             :   case AArch64::EORXrs:
    1674             :   case AArch64::ORNWrs:
    1675             :   case AArch64::ORNXrs:
    1676             :   case AArch64::ORRWrs:
    1677             :   case AArch64::ORRXrs:
    1678             :   case AArch64::SUBSWrs:
    1679             :   case AArch64::SUBSXrs:
    1680             :   case AArch64::SUBWrs:
    1681             :   case AArch64::SUBXrs:
    1682         674 :     if (MI.getOperand(3).isImm()) {
    1683         337 :       unsigned val = MI.getOperand(3).getImm();
    1684         337 :       return (val != 0);
    1685           0 :     }
    1686             :     break;
    1687             :   }
    1688             :   return false;
    1689             : }
    1690             : 
    1691             : /// Return true if this is this instruction has a non-zero immediate
    1692          15 : bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) {
    1693          30 :   switch (MI.getOpcode()) {
    1694             :   default:
    1695             :     break;
    1696          15 :   case AArch64::ADDSWrx:
    1697             :   case AArch64::ADDSXrx:
    1698             :   case AArch64::ADDSXrx64:
    1699             :   case AArch64::ADDWrx:
    1700             :   case AArch64::ADDXrx:
    1701             :   case AArch64::ADDXrx64:
    1702             :   case AArch64::SUBSWrx:
    1703             :   case AArch64::SUBSXrx:
    1704             :   case AArch64::SUBSXrx64:
    1705             :   case AArch64::SUBWrx:
    1706             :   case AArch64::SUBXrx:
    1707             :   case AArch64::SUBXrx64:
    1708          30 :     if (MI.getOperand(3).isImm()) {
    1709          15 :       unsigned val = MI.getOperand(3).getImm();
    1710          15 :       return (val != 0);
    1711           0 :     }
    1712             :     break;
    1713             :   }
    1714             : 
    1715             :   return false;
    1716             : }
    1717             : 
    1718             : // Return true if this instruction simply sets its single destination register
    1719             : // to zero. This is equivalent to a register rename of the zero-register.
    1720          90 : bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
    1721         180 :   switch (MI.getOpcode()) {
    1722             :   default:
    1723             :     break;
    1724          46 :   case AArch64::MOVZWi:
    1725             :   case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
    1726          92 :     if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
    1727             :       assert(MI.getDesc().getNumOperands() == 3 &&
    1728             :              MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
    1729           0 :       return true;
    1730             :     }
    1731             :     break;
    1732          16 :   case AArch64::ANDWri: // and Rd, Rzr, #imm
    1733          16 :     return MI.getOperand(1).getReg() == AArch64::WZR;
    1734          28 :   case AArch64::ANDXri:
    1735          28 :     return MI.getOperand(1).getReg() == AArch64::XZR;
    1736           0 :   case TargetOpcode::COPY:
    1737           0 :     return MI.getOperand(1).getReg() == AArch64::WZR;
    1738             :   }
    1739             :   return false;
    1740             : }
    1741             : 
    1742             : // Return true if this instruction simply renames a general register without
    1743             : // modifying bits.
    1744       10335 : bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
    1745       20670 :   switch (MI.getOpcode()) {
    1746             :   default:
    1747             :     break;
    1748       10129 :   case TargetOpcode::COPY: {
    1749             :     // GPR32 copies will by lowered to ORRXrs
    1750       10129 :     unsigned DstReg = MI.getOperand(0).getReg();
    1751       12010 :     return (AArch64::GPR32RegClass.contains(DstReg) ||
    1752        9465 :             AArch64::GPR64RegClass.contains(DstReg));
    1753             :   }
    1754           0 :   case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
    1755           0 :     if (MI.getOperand(1).getReg() == AArch64::XZR) {
    1756             :       assert(MI.getDesc().getNumOperands() == 4 &&
    1757             :              MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
    1758           0 :       return true;
    1759             :     }
    1760             :     break;
    1761           0 :   case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
    1762           0 :     if (MI.getOperand(2).getImm() == 0) {
    1763             :       assert(MI.getDesc().getNumOperands() == 4 &&
    1764             :              MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
    1765           0 :       return true;
    1766             :     }
    1767             :     break;
    1768             :   }
    1769             :   return false;
    1770             : }
    1771             : 
    1772             : // Return true if this instruction simply renames a general register without
    1773             : // modifying bits.
    1774        8910 : bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
    1775       17820 :   switch (MI.getOpcode()) {
    1776             :   default:
    1777             :     break;
    1778        8704 :   case TargetOpcode::COPY: {
    1779             :     // FPR64 copies will by lowered to ORR.16b
    1780        8704 :     unsigned DstReg = MI.getOperand(0).getReg();
    1781        9516 :     return (AArch64::FPR64RegClass.contains(DstReg) ||
    1782        8396 :             AArch64::FPR128RegClass.contains(DstReg));
    1783             :   }
    1784           0 :   case AArch64::ORRv16i8:
    1785           0 :     if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
    1786             :       assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
    1787             :              "invalid ORRv16i8 operands");
    1788           0 :       return true;
    1789             :     }
    1790             :     break;
    1791             :   }
    1792             :   return false;
    1793             : }
    1794             : 
    1795       10233 : unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
    1796             :                                                int &FrameIndex) const {
    1797       20466 :   switch (MI.getOpcode()) {
    1798             :   default:
    1799             :     break;
    1800         595 :   case AArch64::LDRWui:
    1801             :   case AArch64::LDRXui:
    1802             :   case AArch64::LDRBui:
    1803             :   case AArch64::LDRHui:
    1804             :   case AArch64::LDRSui:
    1805             :   case AArch64::LDRDui:
    1806             :   case AArch64::LDRQui:
    1807         595 :     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
    1808         986 :         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
    1809         391 :       FrameIndex = MI.getOperand(1).getIndex();
    1810         391 :       return MI.getOperand(0).getReg();
    1811             :     }
    1812             :     break;
    1813             :   }
    1814             : 
    1815             :   return 0;
    1816             : }
    1817             : 
    1818        4717 : unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
    1819             :                                               int &FrameIndex) const {
    1820        4717 :   switch (MI.getOpcode()) {
    1821             :   default:
    1822             :     break;
    1823         172 :   case AArch64::STRWui:
    1824             :   case AArch64::STRXui:
    1825             :   case AArch64::STRBui:
    1826             :   case AArch64::STRHui:
    1827             :   case AArch64::STRSui:
    1828             :   case AArch64::STRDui:
    1829             :   case AArch64::STRQui:
    1830         172 :     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
    1831         238 :         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
    1832          57 :       FrameIndex = MI.getOperand(1).getIndex();
    1833          57 :       return MI.getOperand(0).getReg();
    1834             :     }
    1835             :     break;
    1836             :   }
    1837             :   return 0;
    1838             : }
    1839             : 
    1840             : /// Return true if this is load/store scales or extends its register offset.
    1841             : /// This refers to scaling a dynamic index as opposed to scaled immediates.
    1842             : /// MI should be a memory op that allows scaled addressing.
    1843         685 : bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) {
    1844        1370 :   switch (MI.getOpcode()) {
    1845             :   default:
    1846             :     break;
    1847         685 :   case AArch64::LDRBBroW:
    1848             :   case AArch64::LDRBroW:
    1849             :   case AArch64::LDRDroW:
    1850             :   case AArch64::LDRHHroW:
    1851             :   case AArch64::LDRHroW:
    1852             :   case AArch64::LDRQroW:
    1853             :   case AArch64::LDRSBWroW:
    1854             :   case AArch64::LDRSBXroW:
    1855             :   case AArch64::LDRSHWroW:
    1856             :   case AArch64::LDRSHXroW:
    1857             :   case AArch64::LDRSWroW:
    1858             :   case AArch64::LDRSroW:
    1859             :   case AArch64::LDRWroW:
    1860             :   case AArch64::LDRXroW:
    1861             :   case AArch64::STRBBroW:
    1862             :   case AArch64::STRBroW:
    1863             :   case AArch64::STRDroW:
    1864             :   case AArch64::STRHHroW:
    1865             :   case AArch64::STRHroW:
    1866             :   case AArch64::STRQroW:
    1867             :   case AArch64::STRSroW:
    1868             :   case AArch64::STRWroW:
    1869             :   case AArch64::STRXroW:
    1870             :   case AArch64::LDRBBroX:
    1871             :   case AArch64::LDRBroX:
    1872             :   case AArch64::LDRDroX:
    1873             :   case AArch64::LDRHHroX:
    1874             :   case AArch64::LDRHroX:
    1875             :   case AArch64::LDRQroX:
    1876             :   case AArch64::LDRSBWroX:
    1877             :   case AArch64::LDRSBXroX:
    1878             :   case AArch64::LDRSHWroX:
    1879             :   case AArch64::LDRSHXroX:
    1880             :   case AArch64::LDRSWroX:
    1881             :   case AArch64::LDRSroX:
    1882             :   case AArch64::LDRWroX:
    1883             :   case AArch64::LDRXroX:
    1884             :   case AArch64::STRBBroX:
    1885             :   case AArch64::STRBroX:
    1886             :   case AArch64::STRDroX:
    1887             :   case AArch64::STRHHroX:
    1888             :   case AArch64::STRHroX:
    1889             :   case AArch64::STRQroX:
    1890             :   case AArch64::STRSroX:
    1891             :   case AArch64::STRWroX:
    1892             :   case AArch64::STRXroX:
    1893             : 
    1894         685 :     unsigned Val = MI.getOperand(3).getImm();
    1895             :     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
    1896         685 :     return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
    1897             :   }
    1898             :   return false;
    1899             : }
    1900             : 
    1901             : /// Check all MachineMemOperands for a hint to suppress pairing.
    1902       21471 : bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
    1903       21471 :   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
    1904           0 :     return MMO->getFlags() & MOSuppressPair;
    1905       21471 :   });
    1906             : }
    1907             : 
    1908             : /// Set a flag on the first MachineMemOperand to suppress pairing.
    1909           9 : void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
    1910           9 :   if (MI.memoperands_empty())
    1911             :     return;
    1912           9 :   (*MI.memoperands_begin())->setFlags(MOSuppressPair);
    1913             : }
    1914             : 
    1915             : /// Check all MachineMemOperands for a hint that the load/store is strided.
    1916         133 : bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
    1917         133 :   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
    1918           0 :     return MMO->getFlags() & MOStridedAccess;
    1919         133 :   });
    1920             : }
    1921             : 
    1922       34422 : bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
    1923       34422 :   switch (Opc) {
    1924             :   default:
    1925             :     return false;
    1926        1185 :   case AArch64::STURSi:
    1927             :   case AArch64::STURDi:
    1928             :   case AArch64::STURQi:
    1929             :   case AArch64::STURBBi:
    1930             :   case AArch64::STURHHi:
    1931             :   case AArch64::STURWi:
    1932             :   case AArch64::STURXi:
    1933             :   case AArch64::LDURSi:
    1934             :   case AArch64::LDURDi:
    1935             :   case AArch64::LDURQi:
    1936             :   case AArch64::LDURWi:
    1937             :   case AArch64::LDURXi:
    1938             :   case AArch64::LDURSWi:
    1939             :   case AArch64::LDURHHi:
    1940             :   case AArch64::LDURBBi:
    1941             :   case AArch64::LDURSBWi:
    1942             :   case AArch64::LDURSHWi:
    1943        1185 :     return true;
    1944             :   }
    1945             : }
    1946             : 
    1947       72593 : bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
    1948      145186 :   switch (MI.getOpcode()) {
    1949             :   default:
    1950             :     return false;
    1951             :   // Scaled instructions.
    1952       10529 :   case AArch64::STRSui:
    1953             :   case AArch64::STRDui:
    1954             :   case AArch64::STRQui:
    1955             :   case AArch64::STRXui:
    1956             :   case AArch64::STRWui:
    1957             :   case AArch64::LDRSui:
    1958             :   case AArch64::LDRDui:
    1959             :   case AArch64::LDRQui:
    1960             :   case AArch64::LDRXui:
    1961             :   case AArch64::LDRWui:
    1962             :   case AArch64::LDRSWui:
    1963             :   // Unscaled instructions.
    1964             :   case AArch64::STURSi:
    1965             :   case AArch64::STURDi:
    1966             :   case AArch64::STURQi:
    1967             :   case AArch64::STURWi:
    1968             :   case AArch64::STURXi:
    1969             :   case AArch64::LDURSi:
    1970             :   case AArch64::LDURDi:
    1971             :   case AArch64::LDURQi:
    1972             :   case AArch64::LDURWi:
    1973             :   case AArch64::LDURXi:
    1974             :   case AArch64::LDURSWi:
    1975       10529 :     return true;
    1976             :   }
    1977             : }
    1978             : 
    1979          23 : unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
    1980             :                                                    bool &Is64Bit) {
    1981          23 :   switch (Opc) {
    1982           0 :   default:
    1983           0 :     llvm_unreachable("Opcode has no flag setting equivalent!");
    1984             :   // 32-bit cases:
    1985           3 :   case AArch64::ADDWri:
    1986           3 :     Is64Bit = false;
    1987           3 :     return AArch64::ADDSWri;
    1988           4 :   case AArch64::ADDWrr:
    1989           4 :     Is64Bit = false;
    1990           4 :     return AArch64::ADDSWrr;
    1991           0 :   case AArch64::ADDWrs:
    1992           0 :     Is64Bit = false;
    1993           0 :     return AArch64::ADDSWrs;
    1994           0 :   case AArch64::ADDWrx:
    1995           0 :     Is64Bit = false;
    1996           0 :     return AArch64::ADDSWrx;
    1997           3 :   case AArch64::ANDWri:
    1998           3 :     Is64Bit = false;
    1999           3 :     return AArch64::ANDSWri;
    2000           0 :   case AArch64::ANDWrr:
    2001           0 :     Is64Bit = false;
    2002           0 :     return AArch64::ANDSWrr;
    2003           0 :   case AArch64::ANDWrs:
    2004           0 :     Is64Bit = false;
    2005           0 :     return AArch64::ANDSWrs;
    2006           1 :   case AArch64::BICWrr:
    2007           1 :     Is64Bit = false;
    2008           1 :     return AArch64::BICSWrr;
    2009           0 :   case AArch64::BICWrs:
    2010           0 :     Is64Bit = false;
    2011           0 :     return AArch64::BICSWrs;
    2012           0 :   case AArch64::SUBWri:
    2013           0 :     Is64Bit = false;
    2014           0 :     return AArch64::SUBSWri;
    2015           0 :   case AArch64::SUBWrr:
    2016           0 :     Is64Bit = false;
    2017           0 :     return AArch64::SUBSWrr;
    2018           0 :   case AArch64::SUBWrs:
    2019           0 :     Is64Bit = false;
    2020           0 :     return AArch64::SUBSWrs;
    2021           0 :   case AArch64::SUBWrx:
    2022           0 :     Is64Bit = false;
    2023           0 :     return AArch64::SUBSWrx;
    2024             :   // 64-bit cases:
    2025          10 :   case AArch64::ADDXri:
    2026          10 :     Is64Bit = true;
    2027          10 :     return AArch64::ADDSXri;
    2028           1 :   case AArch64::ADDXrr:
    2029           1 :     Is64Bit = true;
    2030           1 :     return AArch64::ADDSXrr;
    2031           0 :   case AArch64::ADDXrs:
    2032           0 :     Is64Bit = true;
    2033           0 :     return AArch64::ADDSXrs;
    2034           0 :   case AArch64::ADDXrx:
    2035           0 :     Is64Bit = true;
    2036           0 :     return AArch64::ADDSXrx;
    2037           1 :   case AArch64::ANDXri:
    2038           1 :     Is64Bit = true;
    2039           1 :     return AArch64::ANDSXri;
    2040           0 :   case AArch64::ANDXrr:
    2041           0 :     Is64Bit = true;
    2042           0 :     return AArch64::ANDSXrr;
    2043           0 :   case AArch64::ANDXrs:
    2044           0 :     Is64Bit = true;
    2045           0 :     return AArch64::ANDSXrs;
    2046           0 :   case AArch64::BICXrr:
    2047           0 :     Is64Bit = true;
    2048           0 :     return AArch64::BICSXrr;
    2049           0 :   case AArch64::BICXrs:
    2050           0 :     Is64Bit = true;
    2051           0 :     return AArch64::BICSXrs;
    2052           0 :   case AArch64::SUBXri:
    2053           0 :     Is64Bit = true;
    2054           0 :     return AArch64::SUBSXri;
    2055           0 :   case AArch64::SUBXrr:
    2056           0 :     Is64Bit = true;
    2057           0 :     return AArch64::SUBSXrr;
    2058           0 :   case AArch64::SUBXrs:
    2059           0 :     Is64Bit = true;
    2060           0 :     return AArch64::SUBSXrs;
    2061           0 :   case AArch64::SUBXrx:
    2062           0 :     Is64Bit = true;
    2063           0 :     return AArch64::SUBSXrx;
    2064             :   }
    2065             : }
    2066             : 
    2067             : // Is this a candidate for ld/st merging or pairing?  For example, we don't
    2068             : // touch volatiles or load/stores that have a hint to avoid pair formation.
    2069       10452 : bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
    2070             :   // If this is a volatile load/store, don't mess with it.
    2071       10452 :   if (MI.hasOrderedMemoryRef())
    2072             :     return false;
    2073             : 
    2074             :   // Make sure this is a reg+imm (as opposed to an address reloc).
    2075             :   assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
    2076       16302 :   if (!MI.getOperand(2).isImm())
    2077             :     return false;
    2078             : 
    2079             :   // Can't merge/pair if the instruction modifies the base register.
    2080             :   // e.g., ldr x0, [x0]
    2081        7435 :   unsigned BaseReg = MI.getOperand(1).getReg();
    2082             :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    2083        7435 :   if (MI.modifiesRegister(BaseReg, TRI))
    2084             :     return false;
    2085             : 
    2086             :   // Check if this load/store has a hint to avoid pair formation.
    2087             :   // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
    2088        7316 :   if (isLdStPairSuppressed(MI))
    2089             :     return false;
    2090             : 
    2091             :   // On some CPUs quad load/store pairs are slower than two single load/stores.
    2092        7305 :   if (Subtarget.isPaired128Slow()) {
    2093         244 :     switch (MI.getOpcode()) {
    2094             :     default:
    2095             :       break;
    2096          98 :     case AArch64::LDURQi:
    2097             :     case AArch64::STURQi:
    2098             :     case AArch64::LDRQui:
    2099             :     case AArch64::STRQui:
    2100          98 :       return false;
    2101             :     }
    2102             :   }
    2103             : 
    2104             :   return true;
    2105             : }
    2106             : 
    2107       14347 : bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
    2108             :     MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
    2109             :     const TargetRegisterInfo *TRI) const {
    2110             :   unsigned Width;
    2111       14347 :   return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
    2112             : }
    2113             : 
    2114       25586 : bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
    2115             :     MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
    2116             :     const TargetRegisterInfo *TRI) const {
    2117             :   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
    2118             :   // Handle only loads/stores with base register followed by immediate offset.
    2119       25586 :   if (LdSt.getNumExplicitOperands() == 3) {
    2120             :     // Non-paired instruction (e.g., ldr x1, [x0, #8]).
    2121       41804 :     if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
    2122             :       return false;
    2123        4684 :   } else if (LdSt.getNumExplicitOperands() == 4) {
    2124             :     // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
    2125        5256 :     if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
    2126             :         !LdSt.getOperand(3).isImm())
    2127             :       return false;
    2128             :   } else
    2129             :     return false;
    2130             : 
    2131             :   // Get the scaling factor for the instruction and set the width for the
    2132             :   // instruction.
    2133       15341 :   unsigned Scale = 0;
    2134             :   int64_t Dummy1, Dummy2;
    2135             : 
    2136             :   // If this returns false, then it's an instruction we don't want to handle.
    2137       30682 :   if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
    2138             :     return false;
    2139             : 
    2140             :   // Compute the offset. Offset is calculated as the immediate operand
    2141             :   // multiplied by the scaling factor. Unscaled instructions have scaling factor
    2142             :   // set to 1.
    2143       15020 :   if (LdSt.getNumExplicitOperands() == 3) {
    2144       13370 :     BaseReg = LdSt.getOperand(1).getReg();
    2145       13370 :     Offset = LdSt.getOperand(2).getImm() * Scale;
    2146             :   } else {
    2147             :     assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
    2148        1650 :     BaseReg = LdSt.getOperand(2).getReg();
    2149        1650 :     Offset = LdSt.getOperand(3).getImm() * Scale;
    2150             :   }
    2151             :   return true;
    2152             : }
    2153             : 
    2154             : MachineOperand &
    2155           0 : AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
    2156             :   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
    2157           0 :   MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
    2158             :   assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
    2159           0 :   return OfsOp;
    2160             : }
    2161             : 
    2162       15438 : bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
    2163             :                                     unsigned &Width, int64_t &MinOffset,
    2164             :                                     int64_t &MaxOffset) const {
    2165       15438 :   switch (Opcode) {
    2166             :   // Not a memory operation or something we want to handle.
    2167         321 :   default:
    2168         321 :     Scale = Width = 0;
    2169         321 :     MinOffset = MaxOffset = 0;
    2170         321 :     return false;
    2171          19 :   case AArch64::STRWpost:
    2172             :   case AArch64::LDRWpost:
    2173          19 :     Width = 32;
    2174          19 :     Scale = 4;
    2175          19 :     MinOffset = -256;
    2176          19 :     MaxOffset = 255;
    2177          19 :     break;
    2178         208 :   case AArch64::LDURQi:
    2179             :   case AArch64::STURQi:
    2180         208 :     Width = 16;
    2181         208 :     Scale = 1;
    2182         208 :     MinOffset = -256;
    2183         208 :     MaxOffset = 255;
    2184         208 :     break;
    2185         197 :   case AArch64::LDURXi:
    2186             :   case AArch64::LDURDi:
    2187             :   case AArch64::STURXi:
    2188             :   case AArch64::STURDi:
    2189         197 :     Width = 8;
    2190         197 :     Scale = 1;
    2191         197 :     MinOffset = -256;
    2192         197 :     MaxOffset = 255;
    2193         197 :     break;
    2194         224 :   case AArch64::LDURWi:
    2195             :   case AArch64::LDURSi:
    2196             :   case AArch64::LDURSWi:
    2197             :   case AArch64::STURWi:
    2198             :   case AArch64::STURSi:
    2199         224 :     Width = 4;
    2200         224 :     Scale = 1;
    2201         224 :     MinOffset = -256;
    2202         224 :     MaxOffset = 255;
    2203         224 :     break;
    2204         114 :   case AArch64::LDURHi:
    2205             :   case AArch64::LDURHHi:
    2206             :   case AArch64::LDURSHXi:
    2207             :   case AArch64::LDURSHWi:
    2208             :   case AArch64::STURHi:
    2209             :   case AArch64::STURHHi:
    2210         114 :     Width = 2;
    2211         114 :     Scale = 1;
    2212         114 :     MinOffset = -256;
    2213         114 :     MaxOffset = 255;
    2214         114 :     break;
    2215          90 :   case AArch64::LDURBi:
    2216             :   case AArch64::LDURBBi:
    2217             :   case AArch64::LDURSBXi:
    2218             :   case AArch64::LDURSBWi:
    2219             :   case AArch64::STURBi:
    2220             :   case AArch64::STURBBi:
    2221          90 :     Width = 1;
    2222          90 :     Scale = 1;
    2223          90 :     MinOffset = -256;
    2224          90 :     MaxOffset = 255;
    2225          90 :     break;
    2226         917 :   case AArch64::LDPQi:
    2227             :   case AArch64::LDNPQi:
    2228             :   case AArch64::STPQi:
    2229             :   case AArch64::STNPQi:
    2230         917 :     Scale = 16;
    2231         917 :     Width = 32;
    2232         917 :     MinOffset = -64;
    2233         917 :     MaxOffset = 63;
    2234         917 :     break;
    2235        3869 :   case AArch64::LDRQui:
    2236             :   case AArch64::STRQui:
    2237        3869 :     Scale = Width = 16;
    2238        3869 :     MinOffset = 0;
    2239        3869 :     MaxOffset = 4095;
    2240        3869 :     break;
    2241         551 :   case AArch64::LDPXi:
    2242             :   case AArch64::LDPDi:
    2243             :   case AArch64::LDNPXi:
    2244             :   case AArch64::LDNPDi:
    2245             :   case AArch64::STPXi:
    2246             :   case AArch64::STPDi:
    2247             :   case AArch64::STNPXi:
    2248             :   case AArch64::STNPDi:
    2249         551 :     Scale = 8;
    2250         551 :     Width = 16;
    2251         551 :     MinOffset = -64;
    2252         551 :     MaxOffset = 63;
    2253         551 :     break;
    2254        5430 :   case AArch64::LDRXui:
    2255             :   case AArch64::LDRDui:
    2256             :   case AArch64::STRXui:
    2257             :   case AArch64::STRDui:
    2258        5430 :     Scale = Width = 8;
    2259        5430 :     MinOffset = 0;
    2260        5430 :     MaxOffset = 4095;
    2261        5430 :     break;
    2262         170 :   case AArch64::LDPWi:
    2263             :   case AArch64::LDPSi:
    2264             :   case AArch64::LDNPWi:
    2265             :   case AArch64::LDNPSi:
    2266             :   case AArch64::STPWi:
    2267             :   case AArch64::STPSi:
    2268             :   case AArch64::STNPWi:
    2269             :   case AArch64::STNPSi:
    2270         170 :     Scale = 4;
    2271         170 :     Width = 8;
    2272         170 :     MinOffset = -64;
    2273         170 :     MaxOffset = 63;
    2274         170 :     break;
    2275        2192 :   case AArch64::LDRWui:
    2276             :   case AArch64::LDRSui:
    2277             :   case AArch64::LDRSWui:
    2278             :   case AArch64::STRWui:
    2279             :   case AArch64::STRSui:
    2280        2192 :     Scale = Width = 4;
    2281        2192 :     MinOffset = 0;
    2282        2192 :     MaxOffset = 4095;
    2283        2192 :     break;
    2284         247 :   case AArch64::LDRHui:
    2285             :   case AArch64::LDRHHui:
    2286             :   case AArch64::STRHui:
    2287             :   case AArch64::STRHHui:
    2288         247 :     Scale = Width = 2;
    2289         247 :     MinOffset = 0;
    2290         247 :     MaxOffset = 4095;
    2291         247 :     break;
    2292         889 :   case AArch64::LDRBui:
    2293             :   case AArch64::LDRBBui:
    2294             :   case AArch64::STRBui:
    2295             :   case AArch64::STRBBui:
    2296         889 :     Scale = Width = 1;
    2297         889 :     MinOffset = 0;
    2298         889 :     MaxOffset = 4095;
    2299         889 :     break;
    2300             :   }
    2301             : 
    2302             :   return true;
    2303             : }
    2304             : 
    2305             : // Scale the unscaled offsets.  Returns false if the unscaled offset can't be
    2306             : // scaled.
    2307          55 : static bool scaleOffset(unsigned Opc, int64_t &Offset) {
    2308             :   unsigned OffsetStride = 1;
    2309          55 :   switch (Opc) {
    2310             :   default:
    2311             :     return false;
    2312             :   case AArch64::LDURQi:
    2313             :   case AArch64::STURQi:
    2314             :     OffsetStride = 16;
    2315             :     break;
    2316          12 :   case AArch64::LDURXi:
    2317             :   case AArch64::LDURDi:
    2318             :   case AArch64::STURXi:
    2319             :   case AArch64::STURDi:
    2320             :     OffsetStride = 8;
    2321          12 :     break;
    2322          35 :   case AArch64::LDURWi:
    2323             :   case AArch64::LDURSi:
    2324             :   case AArch64::LDURSWi:
    2325             :   case AArch64::STURWi:
    2326             :   case AArch64::STURSi:
    2327             :     OffsetStride = 4;
    2328          35 :     break;
    2329             :   }
    2330             :   // If the byte-offset isn't a multiple of the stride, we can't scale this
    2331             :   // offset.
    2332          55 :   if (Offset % OffsetStride != 0)
    2333             :     return false;
    2334             : 
    2335             :   // Convert the byte-offset used by unscaled into an "element" offset used
    2336             :   // by the scaled pair load/store instructions.
    2337          48 :   Offset /= OffsetStride;
    2338          48 :   return true;
    2339             : }
    2340             : 
    2341             : static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
    2342         556 :   if (FirstOpc == SecondOpc)
    2343             :     return true;
    2344             :   // We can also pair sign-ext and zero-ext instructions.
    2345          74 :   switch (FirstOpc) {
    2346             :   default:
    2347             :     return false;
    2348           3 :   case AArch64::LDRWui:
    2349             :   case AArch64::LDURWi:
    2350           3 :     return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
    2351           4 :   case AArch64::LDRSWui:
    2352             :   case AArch64::LDURSWi:
    2353           4 :     return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
    2354             :   }
    2355             :   // These instructions can't be paired based on their opcodes.
    2356             :   return false;
    2357             : }
    2358             : 
    2359             : /// Detect opportunities for ldp/stp formation.
    2360             : ///
    2361             : /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
    2362        1539 : bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
    2363             :                                            unsigned BaseReg1,
    2364             :                                            MachineInstr &SecondLdSt,
    2365             :                                            unsigned BaseReg2,
    2366             :                                            unsigned NumLoads) const {
    2367        1539 :   if (BaseReg1 != BaseReg2)
    2368             :     return false;
    2369             : 
    2370             :   // Only cluster up to a single pair.
    2371         783 :   if (NumLoads > 1)
    2372             :     return false;
    2373             : 
    2374         627 :   if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
    2375          71 :     return false;
    2376             : 
    2377             :   // Can we pair these instructions based on their opcodes?
    2378         556 :   unsigned FirstOpc = FirstLdSt.getOpcode();
    2379         556 :   unsigned SecondOpc = SecondLdSt.getOpcode();
    2380           7 :   if (!canPairLdStOpc(FirstOpc, SecondOpc))
    2381             :     return false;
    2382             : 
    2383             :   // Can't merge volatiles or load/stores that have a hint to avoid pair
    2384             :   // formation, for example.
    2385         946 :   if (!isCandidateToMergeOrPair(FirstLdSt) ||
    2386         458 :       !isCandidateToMergeOrPair(SecondLdSt))
    2387          32 :     return false;
    2388             : 
    2389             :   // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
    2390         456 :   int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
    2391         456 :   if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
    2392             :     return false;
    2393             : 
    2394         449 :   int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
    2395         449 :   if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
    2396             :     return false;
    2397             : 
    2398             :   // Pairwise instructions have a 7-bit signed offset field.
    2399         449 :   if (Offset1 > 63 || Offset1 < -64)
    2400             :     return false;
    2401             : 
    2402             :   // The caller should already have ordered First/SecondLdSt by offset.
    2403             :   assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
    2404         435 :   return Offset1 + 1 == Offset2;
    2405             : }
    2406             : 
    2407          87 : static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
    2408             :                                             unsigned Reg, unsigned SubIdx,
    2409             :                                             unsigned State,
    2410             :                                             const TargetRegisterInfo *TRI) {
    2411          87 :   if (!SubIdx)
    2412           0 :     return MIB.addReg(Reg, State);
    2413             : 
    2414          87 :   if (TargetRegisterInfo::isPhysicalRegister(Reg))
    2415          87 :     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
    2416           0 :   return MIB.addReg(Reg, State, SubIdx);
    2417             : }
    2418             : 
    2419             : static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
    2420             :                                         unsigned NumRegs) {
    2421             :   // We really want the positive remainder mod 32 here, that happens to be
    2422             :   // easily obtainable with a mask.
    2423          11 :   return ((DestReg - SrcReg) & 0x1f) < NumRegs;
    2424             : }
    2425             : 
    2426          11 : void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
    2427             :                                         MachineBasicBlock::iterator I,
    2428             :                                         const DebugLoc &DL, unsigned DestReg,
    2429             :                                         unsigned SrcReg, bool KillSrc,
    2430             :                                         unsigned Opcode,
    2431             :                                         ArrayRef<unsigned> Indices) const {
    2432             :   assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
    2433             :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    2434          11 :   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
    2435             :   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
    2436          11 :   unsigned NumRegs = Indices.size();
    2437             : 
    2438          11 :   int SubReg = 0, End = NumRegs, Incr = 1;
    2439          11 :   if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
    2440           4 :     SubReg = NumRegs - 1;
    2441             :     End = -1;
    2442             :     Incr = -1;
    2443             :   }
    2444             : 
    2445          40 :   for (; SubReg != End; SubReg += Incr) {
    2446          58 :     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
    2447          58 :     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
    2448          29 :     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
    2449          29 :     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
    2450             :   }
    2451          11 : }
    2452             : 
    2453        3507 : void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
    2454             :                                    MachineBasicBlock::iterator I,
    2455             :                                    const DebugLoc &DL, unsigned DestReg,
    2456             :                                    unsigned SrcReg, bool KillSrc) const {
    2457        4644 :   if (AArch64::GPR32spRegClass.contains(DestReg) &&
    2458         659 :       (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
    2459             :     const TargetRegisterInfo *TRI = &getRegisterInfo();
    2460             : 
    2461         853 :     if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
    2462             :       // If either operand is WSP, expand to ADD #0.
    2463           0 :       if (Subtarget.hasZeroCycleRegMove()) {
    2464             :         // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
    2465           0 :         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
    2466             :                                                      &AArch64::GPR64spRegClass);
    2467           0 :         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
    2468             :                                                     &AArch64::GPR64spRegClass);
    2469             :         // This instruction is reading and writing X registers.  This may upset
    2470             :         // the register scavenger and machine verifier, so we need to indicate
    2471             :         // that we are reading an undefined value from SrcRegX, but a proper
    2472             :         // value from SrcReg.
    2473           0 :         BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
    2474           0 :             .addReg(SrcRegX, RegState::Undef)
    2475             :             .addImm(0)
    2476             :             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
    2477           0 :             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
    2478             :       } else {
    2479           0 :         BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
    2480           0 :             .addReg(SrcReg, getKillRegState(KillSrc))
    2481             :             .addImm(0)
    2482             :             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
    2483             :       }
    2484         853 :     } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
    2485          94 :       BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
    2486             :           .addImm(0)
    2487             :           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
    2488             :     } else {
    2489         806 :       if (Subtarget.hasZeroCycleRegMove()) {
    2490             :         // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
    2491          76 :         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
    2492             :                                                      &AArch64::GPR64spRegClass);
    2493          76 :         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
    2494             :                                                     &AArch64::GPR64spRegClass);
    2495             :         // This instruction is reading and writing X registers.  This may upset
    2496             :         // the register scavenger and machine verifier, so we need to indicate
    2497             :         // that we are reading an undefined value from SrcRegX, but a proper
    2498             :         // value from SrcReg.
    2499         228 :         BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
    2500          76 :             .addReg(AArch64::XZR)
    2501          76 :             .addReg(SrcRegX, RegState::Undef)
    2502          76 :             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
    2503             :       } else {
    2504             :         // Otherwise, expand to ORR WZR.
    2505        2190 :         BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
    2506         730 :             .addReg(AArch64::WZR)
    2507         730 :             .addReg(SrcReg, getKillRegState(KillSrc));
    2508             :       }
    2509             :     }
    2510         853 :     return;
    2511             :   }
    2512             : 
    2513        3665 :   if (AArch64::GPR64spRegClass.contains(DestReg) &&
    2514         241 :       (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
    2515         882 :     if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
    2516             :       // If either operand is SP, expand to ADD #0.
    2517         345 :       BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
    2518         115 :           .addReg(SrcReg, getKillRegState(KillSrc))
    2519             :           .addImm(0)
    2520             :           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
    2521         767 :     } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
    2522          48 :       BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
    2523             :           .addImm(0)
    2524             :           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
    2525             :     } else {
    2526             :       // Otherwise, expand to ORR XZR.
    2527        2229 :       BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
    2528         743 :           .addReg(AArch64::XZR)
    2529         743 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2530             :     }
    2531         882 :     return;
    2532             :   }
    2533             : 
    2534             :   // Copy a DDDD register quad by copying the individual sub-registers.
    2535        1772 :   if (AArch64::DDDDRegClass.contains(DestReg) &&
    2536             :       AArch64::DDDDRegClass.contains(SrcReg)) {
    2537             :     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
    2538             :                                        AArch64::dsub2, AArch64::dsub3};
    2539           0 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
    2540             :                      Indices);
    2541           0 :     return;
    2542             :   }
    2543             : 
    2544             :   // Copy a DDD register triple by copying the individual sub-registers.
    2545        1773 :   if (AArch64::DDDRegClass.contains(DestReg) &&
    2546             :       AArch64::DDDRegClass.contains(SrcReg)) {
    2547             :     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
    2548             :                                        AArch64::dsub2};
    2549           1 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
    2550             :                      Indices);
    2551           1 :     return;
    2552             :   }
    2553             : 
    2554             :   // Copy a DD register pair by copying the individual sub-registers.
    2555        1775 :   if (AArch64::DDRegClass.contains(DestReg) &&
    2556             :       AArch64::DDRegClass.contains(SrcReg)) {
    2557             :     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
    2558           4 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
    2559             :                      Indices);
    2560           4 :     return;
    2561             :   }
    2562             : 
    2563             :   // Copy a QQQQ register quad by copying the individual sub-registers.
    2564        1769 :   if (AArch64::QQQQRegClass.contains(DestReg) &&
    2565             :       AArch64::QQQQRegClass.contains(SrcReg)) {
    2566             :     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
    2567             :                                        AArch64::qsub2, AArch64::qsub3};
    2568           2 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
    2569             :                      Indices);
    2570           2 :     return;
    2571             :   }
    2572             : 
    2573             :   // Copy a QQQ register triple by copying the individual sub-registers.
    2574        1767 :   if (AArch64::QQQRegClass.contains(DestReg) &&
    2575             :       AArch64::QQQRegClass.contains(SrcReg)) {
    2576             :     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
    2577             :                                        AArch64::qsub2};
    2578           2 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
    2579             :                      Indices);
    2580           2 :     return;
    2581             :   }
    2582             : 
    2583             :   // Copy a QQ register pair by copying the individual sub-registers.
    2584        1765 :   if (AArch64::QQRegClass.contains(DestReg) &&
    2585             :       AArch64::QQRegClass.contains(SrcReg)) {
    2586             :     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
    2587           2 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
    2588             :                      Indices);
    2589           2 :     return;
    2590             :   }
    2591             : 
    2592        2092 :   if (AArch64::FPR128RegClass.contains(DestReg) &&
    2593             :       AArch64::FPR128RegClass.contains(SrcReg)) {
    2594         331 :     if (Subtarget.hasNEON()) {
    2595         990 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2596         330 :           .addReg(SrcReg)
    2597         330 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2598             :     } else {
    2599           2 :       BuildMI(MBB, I, DL, get(AArch64::STRQpre))
    2600           1 :           .addReg(AArch64::SP, RegState::Define)
    2601           1 :           .addReg(SrcReg, getKillRegState(KillSrc))
    2602           1 :           .addReg(AArch64::SP)
    2603             :           .addImm(-16);
    2604           3 :       BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
    2605           1 :           .addReg(AArch64::SP, RegState::Define)
    2606           1 :           .addReg(DestReg, RegState::Define)
    2607           1 :           .addReg(AArch64::SP)
    2608             :           .addImm(16);
    2609             :     }
    2610         331 :     return;
    2611             :   }
    2612             : 
    2613        1772 :   if (AArch64::FPR64RegClass.contains(DestReg) &&
    2614             :       AArch64::FPR64RegClass.contains(SrcReg)) {
    2615         338 :     if (Subtarget.hasNEON()) {
    2616             :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
    2617             :                                        &AArch64::FPR128RegClass);
    2618         335 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
    2619             :                                       &AArch64::FPR128RegClass);
    2620        1005 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2621         335 :           .addReg(SrcReg)
    2622         335 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2623             :     } else {
    2624           9 :       BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
    2625           3 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2626             :     }
    2627         338 :     return;
    2628             :   }
    2629             : 
    2630        1465 :   if (AArch64::FPR32RegClass.contains(DestReg) &&
    2631             :       AArch64::FPR32RegClass.contains(SrcReg)) {
    2632          79 :     if (Subtarget.hasNEON()) {
    2633             :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
    2634             :                                        &AArch64::FPR128RegClass);
    2635          78 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
    2636             :                                       &AArch64::FPR128RegClass);
    2637         234 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2638          78 :           .addReg(SrcReg)
    2639          78 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2640             :     } else {
    2641           3 :       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
    2642           1 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2643             :     }
    2644          79 :     return;
    2645             :   }
    2646             : 
    2647        1053 :   if (AArch64::FPR16RegClass.contains(DestReg) &&
    2648             :       AArch64::FPR16RegClass.contains(SrcReg)) {
    2649          40 :     if (Subtarget.hasNEON()) {
    2650             :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
    2651             :                                        &AArch64::FPR128RegClass);
    2652          40 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
    2653             :                                       &AArch64::FPR128RegClass);
    2654         120 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2655          40 :           .addReg(SrcReg)
    2656          40 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2657             :     } else {
    2658             :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
    2659             :                                        &AArch64::FPR32RegClass);
    2660           0 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
    2661             :                                       &AArch64::FPR32RegClass);
    2662           0 :       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
    2663           0 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2664             :     }
    2665          40 :     return;
    2666             :   }
    2667             : 
    2668         973 :   if (AArch64::FPR8RegClass.contains(DestReg) &&
    2669             :       AArch64::FPR8RegClass.contains(SrcReg)) {
    2670           0 :     if (Subtarget.hasNEON()) {
    2671             :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
    2672             :                                        &AArch64::FPR128RegClass);
    2673           0 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
    2674             :                                       &AArch64::FPR128RegClass);
    2675           0 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2676           0 :           .addReg(SrcReg)
    2677           0 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2678             :     } else {
    2679             :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
    2680             :                                        &AArch64::FPR32RegClass);
    2681           0 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
    2682             :                                       &AArch64::FPR32RegClass);
    2683           0 :       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
    2684           0 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2685             :     }
    2686           0 :     return;
    2687             :   }
    2688             : 
    2689             :   // Copies between GPR64 and FPR64.
    2690         301 :   if (AArch64::FPR64RegClass.contains(DestReg) &&
    2691         150 :       AArch64::GPR64RegClass.contains(SrcReg)) {
    2692         300 :     BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
    2693         150 :         .addReg(SrcReg, getKillRegState(KillSrc));
    2694         150 :     return;
    2695             :   }
    2696         952 :   if (AArch64::GPR64RegClass.contains(DestReg) &&
    2697             :       AArch64::FPR64RegClass.contains(SrcReg)) {
    2698         256 :     BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
    2699         128 :         .addReg(SrcReg, getKillRegState(KillSrc));
    2700         128 :     return;
    2701             :   }
    2702             :   // Copies between GPR32 and FPR32.
    2703        1035 :   if (AArch64::FPR32RegClass.contains(DestReg) &&
    2704         409 :       AArch64::GPR32RegClass.contains(SrcReg)) {
    2705         818 :     BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
    2706         409 :         .addReg(SrcReg, getKillRegState(KillSrc));
    2707         409 :     return;
    2708             :   }
    2709         570 :   if (AArch64::GPR32RegClass.contains(DestReg) &&
    2710             :       AArch64::FPR32RegClass.contains(SrcReg)) {
    2711         568 :     BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
    2712         284 :         .addReg(SrcReg, getKillRegState(KillSrc));
    2713         284 :     return;
    2714             :   }
    2715             : 
    2716           2 :   if (DestReg == AArch64::NZCV) {
    2717             :     assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
    2718           2 :     BuildMI(MBB, I, DL, get(AArch64::MSR))
    2719             :         .addImm(AArch64SysReg::NZCV)
    2720           1 :         .addReg(SrcReg, getKillRegState(KillSrc))
    2721           1 :         .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
    2722           1 :     return;
    2723             :   }
    2724             : 
    2725           1 :   if (SrcReg == AArch64::NZCV) {
    2726             :     assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
    2727           2 :     BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
    2728             :         .addImm(AArch64SysReg::NZCV)
    2729           1 :         .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
    2730           1 :     return;
    2731             :   }
    2732             : 
    2733           0 :   llvm_unreachable("unimplemented reg-to-reg copy");
    2734             : }
    2735             : 
    2736        1020 : void AArch64InstrInfo::storeRegToStackSlot(
    2737             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
    2738             :     bool isKill, int FI, const TargetRegisterClass *RC,
    2739             :     const TargetRegisterInfo *TRI) const {
    2740        1020 :   DebugLoc DL;
    2741        1020 :   if (MBBI != MBB.end())
    2742             :     DL = MBBI->getDebugLoc();
    2743        1020 :   MachineFunction &MF = *MBB.getParent();
    2744        1020 :   MachineFrameInfo &MFI = MF.getFrameInfo();
    2745             :   unsigned Align = MFI.getObjectAlignment(FI);
    2746             : 
    2747        1020 :   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
    2748        1020 :   MachineMemOperand *MMO = MF.getMachineMemOperand(
    2749             :       PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
    2750             :   unsigned Opc = 0;
    2751             :   bool Offset = true;
    2752        1020 :   switch (TRI->getSpillSize(*RC)) {
    2753           0 :   case 1:
    2754           0 :     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
    2755             :       Opc = AArch64::STRBui;
    2756             :     break;
    2757           0 :   case 2:
    2758           0 :     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
    2759             :       Opc = AArch64::STRHui;
    2760             :     break;
    2761         140 :   case 4:
    2762         280 :     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
    2763             :       Opc = AArch64::STRWui;
    2764         127 :       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
    2765          33 :         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
    2766             :       else
    2767             :         assert(SrcReg != AArch64::WSP);
    2768          26 :     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
    2769             :       Opc = AArch64::STRSui;
    2770             :     break;
    2771         587 :   case 8:
    2772        1174 :     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
    2773             :       Opc = AArch64::STRXui;
    2774         433 :       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
    2775          98 :         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
    2776             :       else
    2777             :         assert(SrcReg != AArch64::SP);
    2778         308 :     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
    2779             :       Opc = AArch64::STRDui;
    2780             :     break;
    2781         286 :   case 16:
    2782         572 :     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
    2783             :       Opc = AArch64::STRQui;
    2784           0 :     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
    2785             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2786             :       Opc = AArch64::ST1Twov1d;
    2787             :       Offset = false;
    2788           0 :     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
    2789           0 :       BuildMI(MBB, MBBI, DL, get(AArch64::STPXi))
    2790             :           .addReg(TRI->getSubReg(SrcReg, AArch64::sube64),
    2791           0 :                   getKillRegState(isKill))
    2792             :           .addReg(TRI->getSubReg(SrcReg, AArch64::subo64),
    2793           0 :                   getKillRegState(isKill))
    2794             :           .addFrameIndex(FI)
    2795             :           .addImm(0)
    2796             :           .addMemOperand(MMO);
    2797             :       return;
    2798             :     }
    2799             :     break;
    2800           0 :   case 24:
    2801           0 :     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
    2802             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2803             :       Opc = AArch64::ST1Threev1d;
    2804             :       Offset = false;
    2805             :     }
    2806             :     break;
    2807           3 :   case 32:
    2808           6 :     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
    2809             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2810             :       Opc = AArch64::ST1Fourv1d;
    2811             :       Offset = false;
    2812           6 :     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
    2813             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2814             :       Opc = AArch64::ST1Twov2d;
    2815             :       Offset = false;
    2816             :     }
    2817             :     break;
    2818           2 :   case 48:
    2819           4 :     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
    2820             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2821             :       Opc = AArch64::ST1Threev2d;
    2822             :       Offset = false;
    2823             :     }
    2824             :     break;
    2825           2 :   case 64:
    2826           4 :     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
    2827             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2828             :       Opc = AArch64::ST1Fourv2d;
    2829             :       Offset = false;
    2830             :     }
    2831             :     break;
    2832             :   }
    2833             :   assert(Opc && "Unknown register class");
    2834             : 
    2835        2040 :   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
    2836        1020 :                                      .addReg(SrcReg, getKillRegState(isKill))
    2837        1020 :                                      .addFrameIndex(FI);
    2838             : 
    2839        1020 :   if (Offset)
    2840             :     MI.addImm(0);
    2841             :   MI.addMemOperand(MMO);
    2842             : }
    2843             : 
    2844         963 : void AArch64InstrInfo::loadRegFromStackSlot(
    2845             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
    2846             :     int FI, const TargetRegisterClass *RC,
    2847             :     const TargetRegisterInfo *TRI) const {
    2848         963 :   DebugLoc DL;
    2849         963 :   if (MBBI != MBB.end())
    2850             :     DL = MBBI->getDebugLoc();
    2851         963 :   MachineFunction &MF = *MBB.getParent();
    2852         963 :   MachineFrameInfo &MFI = MF.getFrameInfo();
    2853             :   unsigned Align = MFI.getObjectAlignment(FI);
    2854         963 :   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
    2855         963 :   MachineMemOperand *MMO = MF.getMachineMemOperand(
    2856             :       PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
    2857             : 
    2858             :   unsigned Opc = 0;
    2859             :   bool Offset = true;
    2860         963 :   switch (TRI->getSpillSize(*RC)) {
    2861           0 :   case 1:
    2862           0 :     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
    2863             :       Opc = AArch64::LDRBui;
    2864             :     break;
    2865           0 :   case 2:
    2866           0 :     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
    2867             :       Opc = AArch64::LDRHui;
    2868             :     break;
    2869          77 :   case 4:
    2870         154 :     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
    2871             :       Opc = AArch64::LDRWui;
    2872          68 :       if (TargetRegisterInfo::isVirtualRegister(DestReg))
    2873          34 :         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
    2874             :       else
    2875             :         assert(DestReg != AArch64::WSP);
    2876          18 :     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
    2877             :       Opc = AArch64::LDRSui;
    2878             :     break;
    2879         470 :   case 8:
    2880         940 :     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
    2881             :       Opc = AArch64::LDRXui;
    2882         367 :       if (TargetRegisterInfo::isVirtualRegister(DestReg))
    2883          88 :         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
    2884             :       else
    2885             :         assert(DestReg != AArch64::SP);
    2886         206 :     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
    2887             :       Opc = AArch64::LDRDui;
    2888             :     break;
    2889         409 :   case 16:
    2890         818 :     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
    2891             :       Opc = AArch64::LDRQui;
    2892           0 :     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
    2893             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2894             :       Opc = AArch64::LD1Twov1d;
    2895             :       Offset = false;
    2896           0 :     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
    2897           0 :       BuildMI(MBB, MBBI, DL, get(AArch64::LDPXi))
    2898             :           .addReg(TRI->getSubReg(DestReg, AArch64::sube64),
    2899           0 :                   getDefRegState(true))
    2900             :           .addReg(TRI->getSubReg(DestReg, AArch64::subo64),
    2901           0 :                   getDefRegState(true))
    2902             :           .addFrameIndex(FI)
    2903             :           .addImm(0)
    2904             :           .addMemOperand(MMO);
    2905             :       return;
    2906             :     }
    2907             :     break;
    2908           0 :   case 24:
    2909           0 :     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
    2910             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2911             :       Opc = AArch64::LD1Threev1d;
    2912             :       Offset = false;
    2913             :     }
    2914             :     break;
    2915           3 :   case 32:
    2916           6 :     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
    2917             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2918             :       Opc = AArch64::LD1Fourv1d;
    2919             :       Offset = false;
    2920           6 :     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
    2921             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2922             :       Opc = AArch64::LD1Twov2d;
    2923             :       Offset = false;
    2924             :     }
    2925             :     break;
    2926           2 :   case 48:
    2927           4 :     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
    2928             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2929             :       Opc = AArch64::LD1Threev2d;
    2930             :       Offset = false;
    2931             :     }
    2932             :     break;
    2933           2 :   case 64:
    2934           4 :     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
    2935             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2936             :       Opc = AArch64::LD1Fourv2d;
    2937             :       Offset = false;
    2938             :     }
    2939             :     break;
    2940             :   }
    2941             :   assert(Opc && "Unknown register class");
    2942             : 
    2943        1926 :   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
    2944         963 :                                      .addReg(DestReg, getDefRegState(true))
    2945         963 :                                      .addFrameIndex(FI);
    2946         963 :   if (Offset)
    2947             :     MI.addImm(0);
    2948             :   MI.addMemOperand(MMO);
    2949             : }
    2950             : 
    2951       15965 : void llvm::emitFrameOffset(MachineBasicBlock &MBB,
    2952             :                            MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
    2953             :                            unsigned DestReg, unsigned SrcReg, int Offset,
    2954             :                            const TargetInstrInfo *TII,
    2955             :                            MachineInstr::MIFlag Flag, bool SetNZCV) {
    2956       15965 :   if (DestReg == SrcReg && Offset == 0)
    2957             :     return;
    2958             : 
    2959             :   assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
    2960             :          "SP increment/decrement not 16-byte aligned");
    2961             : 
    2962             :   bool isSub = Offset < 0;
    2963        2126 :   if (isSub)
    2964         838 :     Offset = -Offset;
    2965             : 
    2966             :   // FIXME: If the offset won't fit in 24-bits, compute the offset into a
    2967             :   // scratch register.  If DestReg is a virtual register, use it as the
    2968             :   // scratch register; otherwise, create a new virtual register (to be
    2969             :   // replaced by the scavenger at the end of PEI).  That case can be optimized
    2970             :   // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
    2971             :   // register can be loaded with offset%8 and the add/sub can use an extending
    2972             :   // instruction with LSL#3.
    2973             :   // Currently the function handles any offsets but generates a poor sequence
    2974             :   // of code.
    2975             :   //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
    2976             : 
    2977             :   unsigned Opc;
    2978        2126 :   if (SetNZCV)
    2979           3 :     Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
    2980             :   else
    2981        2123 :     Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
    2982             :   const unsigned MaxEncoding = 0xfff;
    2983             :   const unsigned ShiftSize = 12;
    2984             :   const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
    2985        2159 :   while (((unsigned)Offset) >= (1 << ShiftSize)) {
    2986             :     unsigned ThisVal;
    2987          41 :     if (((unsigned)Offset) > MaxEncodableValue) {
    2988             :       ThisVal = MaxEncodableValue;
    2989             :     } else {
    2990          31 :       ThisVal = Offset & MaxEncodableValue;
    2991             :     }
    2992             :     assert((ThisVal >> ShiftSize) <= MaxEncoding &&
    2993             :            "Encoding cannot handle value that big");
    2994          82 :     BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
    2995          41 :         .addReg(SrcReg)
    2996          41 :         .addImm(ThisVal >> ShiftSize)
    2997             :         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
    2998             :         .setMIFlag(Flag);
    2999             : 
    3000             :     SrcReg = DestReg;
    3001          41 :     Offset -= ThisVal;
    3002          41 :     if (Offset == 0)
    3003             :       return;
    3004             :   }
    3005        6354 :   BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
    3006        2118 :       .addReg(SrcReg)
    3007        2118 :       .addImm(Offset)
    3008             :       .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
    3009             :       .setMIFlag(Flag);
    3010             : }
    3011             : 
    3012         841 : MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
    3013             :     MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
    3014             :     MachineBasicBlock::iterator InsertPt, int FrameIndex,
    3015             :     LiveIntervals *LIS) const {
    3016             :   // This is a bit of a hack. Consider this instruction:
    3017             :   //
    3018             :   //   %0 = COPY %sp; GPR64all:%0
    3019             :   //
    3020             :   // We explicitly chose GPR64all for the virtual register so such a copy might
    3021             :   // be eliminated by RegisterCoalescer. However, that may not be possible, and
    3022             :   // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
    3023             :   // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
    3024             :   //
    3025             :   // To prevent that, we are going to constrain the %0 register class here.
    3026             :   //
    3027             :   // <rdar://problem/11522048>
    3028             :   //
    3029             :   if (MI.isFullCopy()) {
    3030         390 :     unsigned DstReg = MI.getOperand(0).getReg();
    3031         390 :     unsigned SrcReg = MI.getOperand(1).getReg();
    3032         390 :     if (SrcReg == AArch64::SP &&
    3033             :         TargetRegisterInfo::isVirtualRegister(DstReg)) {
    3034           1 :       MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
    3035           1 :       return nullptr;
    3036             :     }
    3037         389 :     if (DstReg == AArch64::SP &&
    3038             :         TargetRegisterInfo::isVirtualRegister(SrcReg)) {
    3039           1 :       MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
    3040           1 :       return nullptr;
    3041             :     }
    3042             :   }
    3043             : 
    3044             :   // Handle the case where a copy is being spilled or filled but the source
    3045             :   // and destination register class don't match.  For example:
    3046             :   //
    3047             :   //   %0 = COPY %xzr; GPR64common:%0
    3048             :   //
    3049             :   // In this case we can still safely fold away the COPY and generate the
    3050             :   // following spill code:
    3051             :   //
    3052             :   //   STRXui %xzr, %stack.0
    3053             :   //
    3054             :   // This also eliminates spilled cross register class COPYs (e.g. between x and
    3055             :   // d regs) of the same size.  For example:
    3056             :   //
    3057             :   //   %0 = COPY %1; GPR64:%0, FPR64:%1
    3058             :   //
    3059             :   // will be filled as
    3060             :   //
    3061             :   //   LDRDui %0, fi<#0>
    3062             :   //
    3063             :   // instead of
    3064             :   //
    3065             :   //   LDRXui %Temp, fi<#0>
    3066             :   //   %0 = FMOV %Temp
    3067             :   //
    3068         839 :   if (MI.isCopy() && Ops.size() == 1 &&
    3069             :       // Make sure we're only folding the explicit COPY defs/uses.
    3070         560 :       (Ops[0] == 0 || Ops[0] == 1)) {
    3071             :     bool IsSpill = Ops[0] == 0;
    3072             :     bool IsFill = !IsSpill;
    3073         560 :     const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
    3074         560 :     const MachineRegisterInfo &MRI = MF.getRegInfo();
    3075         560 :     MachineBasicBlock &MBB = *MI.getParent();
    3076         560 :     const MachineOperand &DstMO = MI.getOperand(0);
    3077             :     const MachineOperand &SrcMO = MI.getOperand(1);
    3078         560 :     unsigned DstReg = DstMO.getReg();
    3079         560 :     unsigned SrcReg = SrcMO.getReg();
    3080             :     // This is slightly expensive to compute for physical regs since
    3081             :     // getMinimalPhysRegClass is slow.
    3082             :     auto getRegClass = [&](unsigned Reg) {
    3083             :       return TargetRegisterInfo::isVirtualRegister(Reg)
    3084             :                  ? MRI.getRegClass(Reg)
    3085             :                  : TRI.getMinimalPhysRegClass(Reg);
    3086         560 :     };
    3087             : 
    3088         560 :     if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
    3089             :       assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
    3090             :                  TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
    3091             :              "Mismatched register size in non subreg COPY");
    3092         388 :       if (IsSpill)
    3093         169 :         storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
    3094             :                             getRegClass(SrcReg), &TRI);
    3095             :       else
    3096         219 :         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
    3097             :                              getRegClass(DstReg), &TRI);
    3098         471 :       return &*--InsertPt;
    3099             :     }
    3100             : 
    3101             :     // Handle cases like spilling def of:
    3102             :     //
    3103             :     //   %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
    3104             :     //
    3105             :     // where the physical register source can be widened and stored to the full
    3106             :     // virtual reg destination stack slot, in this case producing:
    3107             :     //
    3108             :     //   STRXui %xzr, %stack.0
    3109             :     //
    3110         172 :     if (IsSpill && DstMO.isUndef() &&
    3111             :         TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
    3112             :       assert(SrcMO.getSubReg() == 0 &&
    3113             :              "Unexpected subreg on physical register");
    3114             :       const TargetRegisterClass *SpillRC;
    3115             :       unsigned SpillSubreg;
    3116          81 :       switch (DstMO.getSubReg()) {
    3117             :       default:
    3118             :         SpillRC = nullptr;
    3119             :         break;
    3120          57 :       case AArch64::sub_32:
    3121             :       case AArch64::ssub:
    3122          57 :         if (AArch64::GPR32RegClass.contains(SrcReg)) {
    3123             :           SpillRC = &AArch64::GPR64RegClass;
    3124             :           SpillSubreg = AArch64::sub_32;
    3125          54 :         } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
    3126             :           SpillRC = &AArch64::FPR64RegClass;
    3127             :           SpillSubreg = AArch64::ssub;
    3128             :         } else
    3129             :           SpillRC = nullptr;
    3130             :         break;
    3131          24 :       case AArch64::dsub:
    3132          24 :         if (AArch64::FPR64RegClass.contains(SrcReg)) {
    3133             :           SpillRC = &AArch64::FPR128RegClass;
    3134             :           SpillSubreg = AArch64::dsub;
    3135             :         } else
    3136             :           SpillRC = nullptr;
    3137             :         break;
    3138             :       }
    3139             : 
    3140             :       if (SpillRC)
    3141          81 :         if (unsigned WidenedSrcReg =
    3142          81 :                 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
    3143          81 :           storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
    3144             :                               FrameIndex, SpillRC, &TRI);
    3145          81 :           return &*--InsertPt;
    3146             :         }
    3147             :     }
    3148             : 
    3149             :     // Handle cases like filling use of:
    3150             :     //
    3151             :     //   %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
    3152             :     //
    3153             :     // where we can load the full virtual reg source stack slot, into the subreg
    3154             :     // destination, in this case producing:
    3155             :     //
    3156             :     //   LDRWui %0:sub_32<def,read-undef>, %stack.0
    3157             :     //
    3158          91 :     if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
    3159             :       const TargetRegisterClass *FillRC;
    3160           2 :       switch (DstMO.getSubReg()) {
    3161             :       default:
    3162             :         FillRC = nullptr;
    3163             :         break;
    3164             :       case AArch64::sub_32:
    3165             :         FillRC = &AArch64::GPR32RegClass;
    3166             :         break;
    3167           1 :       case AArch64::ssub:
    3168             :         FillRC = &AArch64::FPR32RegClass;
    3169           1 :         break;
    3170           0 :       case AArch64::dsub:
    3171             :         FillRC = &AArch64::FPR64RegClass;
    3172           0 :         break;
    3173             :       }
    3174             : 
    3175             :       if (FillRC) {
    3176             :         assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
    3177             :                    TRI.getRegSizeInBits(*FillRC) &&
    3178             :                "Mismatched regclass size on folded subreg COPY");
    3179           2 :         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
    3180             :         MachineInstr &LoadMI = *--InsertPt;
    3181           2 :         MachineOperand &LoadDst = LoadMI.getOperand(0);
    3182             :         assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
    3183             :         LoadDst.setSubReg(DstMO.getSubReg());
    3184             :         LoadDst.setIsUndef();
    3185           2 :         return &LoadMI;
    3186             :       }
    3187             :     }
    3188             :   }
    3189             : 
    3190             :   // Cannot fold.
    3191             :   return nullptr;
    3192             : }
    3193             : 
    3194        7845 : int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
    3195             :                                     bool *OutUseUnscaledOp,
    3196             :                                     unsigned *OutUnscaledOp,
    3197             :                                     int *EmittableOffset) {
    3198             :   int Scale = 1;
    3199             :   bool IsSigned = false;
    3200             :   // The ImmIdx should be changed case by case if it is not 2.
    3201             :   unsigned ImmIdx = 2;
    3202             :   unsigned UnscaledOp = 0;
    3203             :   // Set output values in case of early exit.
    3204        7845 :   if (EmittableOffset)
    3205        3306 :     *EmittableOffset = 0;
    3206        7845 :   if (OutUseUnscaledOp)
    3207        3306 :     *OutUseUnscaledOp = false;
    3208        7845 :   if (OutUnscaledOp)
    3209        3306 :     *OutUnscaledOp = 0;
    3210       15690 :   switch (MI.getOpcode()) {
    3211           0 :   default:
    3212           0 :     llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
    3213             :   // Vector spills/fills can't take an immediate offset.
    3214             :   case AArch64::LD1Twov2d:
    3215             :   case AArch64::LD1Threev2d:
    3216             :   case AArch64::LD1Fourv2d:
    3217             :   case AArch64::LD1Twov1d:
    3218             :   case AArch64::LD1Threev1d:
    3219             :   case AArch64::LD1Fourv1d:
    3220             :   case AArch64::ST1Twov2d:
    3221             :   case AArch64::ST1Threev2d:
    3222             :   case AArch64::ST1Fourv2d:
    3223             :   case AArch64::ST1Twov1d:
    3224             :   case AArch64::ST1Threev1d:
    3225             :   case AArch64::ST1Fourv1d:
    3226             :     return AArch64FrameOffsetCannotUpdate;
    3227             :   case AArch64::PRFMui:
    3228             :     Scale = 8;
    3229             :     UnscaledOp = AArch64::PRFUMi;
    3230             :     break;
    3231        1240 :   case AArch64::LDRXui:
    3232             :     Scale = 8;
    3233             :     UnscaledOp = AArch64::LDURXi;
    3234        1240 :     break;
    3235         469 :   case AArch64::LDRWui:
    3236             :     Scale = 4;
    3237             :     UnscaledOp = AArch64::LDURWi;
    3238         469 :     break;
    3239           0 :   case AArch64::LDRBui:
    3240             :     Scale = 1;
    3241             :     UnscaledOp = AArch64::LDURBi;
    3242           0 :     break;
    3243           4 :   case AArch64::LDRHui:
    3244             :     Scale = 2;
    3245             :     UnscaledOp = AArch64::LDURHi;
    3246           4 :     break;
    3247          87 :   case AArch64::LDRSui:
    3248             :     Scale = 4;
    3249             :     UnscaledOp = AArch64::LDURSi;
    3250          87 :     break;
    3251         366 :   case AArch64::LDRDui:
    3252             :     Scale = 8;
    3253             :     UnscaledOp = AArch64::LDURDi;
    3254         366 :     break;
    3255        1052 :   case AArch64::LDRQui:
    3256             :     Scale = 16;
    3257             :     UnscaledOp = AArch64::LDURQi;
    3258        1052 :     break;
    3259          51 :   case AArch64::LDRBBui:
    3260             :     Scale = 1;
    3261             :     UnscaledOp = AArch64::LDURBBi;
    3262          51 :     break;
    3263          33 :   case AArch64::LDRHHui:
    3264             :     Scale = 2;
    3265             :     UnscaledOp = AArch64::LDURHHi;
    3266          33 :     break;
    3267           0 :   case AArch64::LDRSBXui:
    3268             :     Scale = 1;
    3269             :     UnscaledOp = AArch64::LDURSBXi;
    3270           0 :     break;
    3271          15 :   case AArch64::LDRSBWui:
    3272             :     Scale = 1;
    3273             :     UnscaledOp = AArch64::LDURSBWi;
    3274          15 :     break;
    3275           0 :   case AArch64::LDRSHXui:
    3276             :     Scale = 2;
    3277             :     UnscaledOp = AArch64::LDURSHXi;
    3278           0 :     break;
    3279          12 :   case AArch64::LDRSHWui:
    3280             :     Scale = 2;
    3281             :     UnscaledOp = AArch64::LDURSHWi;
    3282          12 :     break;
    3283           6 :   case AArch64::LDRSWui:
    3284             :     Scale = 4;
    3285             :     UnscaledOp = AArch64::LDURSWi;
    3286           6 :     break;
    3287             : 
    3288        1601 :   case AArch64::STRXui:
    3289             :     Scale = 8;
    3290             :     UnscaledOp = AArch64::STURXi;
    3291        1601 :     break;
    3292        1004 :   case AArch64::STRWui:
    3293             :     Scale = 4;
    3294             :     UnscaledOp = AArch64::STURWi;
    3295        1004 :     break;
    3296           0 :   case AArch64::STRBui:
    3297             :     Scale = 1;
    3298             :     UnscaledOp = AArch64::STURBi;
    3299           0 :     break;
    3300           0 :   case AArch64::STRHui:
    3301             :     Scale = 2;
    3302             :     UnscaledOp = AArch64::STURHi;
    3303           0 :     break;
    3304          48 :   case AArch64::STRSui:
    3305             :     Scale = 4;
    3306             :     UnscaledOp = AArch64::STURSi;
    3307          48 :     break;
    3308         446 :   case AArch64::STRDui:
    3309             :     Scale = 8;
    3310             :     UnscaledOp = AArch64::STURDi;
    3311         446 :     break;
    3312        1271 :   case AArch64::STRQui:
    3313             :     Scale = 16;
    3314             :     UnscaledOp = AArch64::STURQi;
    3315        1271 :     break;
    3316          47 :   case AArch64::STRBBui:
    3317             :     Scale = 1;
    3318             :     UnscaledOp = AArch64::STURBBi;
    3319          47 :     break;
    3320          30 :   case AArch64::STRHHui:
    3321             :     Scale = 2;
    3322             :     UnscaledOp = AArch64::STURHHi;
    3323          30 :     break;
    3324             : 
    3325           6 :   case AArch64::LDPXi:
    3326             :   case AArch64::LDPDi:
    3327             :   case AArch64::STPXi:
    3328             :   case AArch64::STPDi:
    3329             :   case AArch64::LDNPXi:
    3330             :   case AArch64::LDNPDi:
    3331             :   case AArch64::STNPXi:
    3332             :   case AArch64::STNPDi:
    3333             :     ImmIdx = 3;
    3334             :     IsSigned = true;
    3335             :     Scale = 8;
    3336           6 :     break;
    3337           0 :   case AArch64::LDPQi:
    3338             :   case AArch64::STPQi:
    3339             :   case AArch64::LDNPQi:
    3340             :   case AArch64::STNPQi:
    3341             :     ImmIdx = 3;
    3342             :     IsSigned = true;
    3343             :     Scale = 16;
    3344           0 :     break;
    3345           0 :   case AArch64::LDPWi:
    3346             :   case AArch64::LDPSi:
    3347             :   case AArch64::STPWi:
    3348             :   case AArch64::STPSi:
    3349             :   case AArch64::LDNPWi:
    3350             :   case AArch64::LDNPSi:
    3351             :   case AArch64::STNPWi:
    3352             :   case AArch64::STNPSi:
    3353             :     ImmIdx = 3;
    3354             :     IsSigned = true;
    3355             :     Scale = 4;
    3356           0 :     break;
    3357             : 
    3358          10 :   case AArch64::LDURXi:
    3359             :   case AArch64::LDURWi:
    3360             :   case AArch64::LDURBi:
    3361             :   case AArch64::LDURHi:
    3362             :   case AArch64::LDURSi:
    3363             :   case AArch64::LDURDi:
    3364             :   case AArch64::LDURQi:
    3365             :   case AArch64::LDURHHi:
    3366             :   case AArch64::LDURBBi:
    3367             :   case AArch64::LDURSBXi:
    3368             :   case AArch64::LDURSBWi:
    3369             :   case AArch64::LDURSHXi:
    3370             :   case AArch64::LDURSHWi:
    3371             :   case AArch64::LDURSWi:
    3372             :   case AArch64::STURXi:
    3373             :   case AArch64::STURWi:
    3374             :   case AArch64::STURBi:
    3375             :   case AArch64::STURHi:
    3376             :   case AArch64::STURSi:
    3377             :   case AArch64::STURDi:
    3378             :   case AArch64::STURQi:
    3379             :   case AArch64::STURBBi:
    3380             :   case AArch64::STURHHi:
    3381             :     Scale = 1;
    3382          10 :     break;
    3383             :   }
    3384             : 
    3385        7822 :   Offset += MI.getOperand(ImmIdx).getImm() * Scale;
    3386             : 
    3387             :   bool useUnscaledOp = false;
    3388             :   // If the offset doesn't match the scale, we rewrite the instruction to
    3389             :   // use the unscaled instruction instead. Likewise, if we have a negative
    3390             :   // offset (and have an unscaled op to use).
    3391        7822 :   if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
    3392             :     useUnscaledOp = true;
    3393             : 
    3394             :   // Use an unscaled addressing mode if the instruction has a negative offset
    3395             :   // (or if the instruction is already using an unscaled addressing mode).
    3396             :   unsigned MaskBits;
    3397        7822 :   if (IsSigned) {
    3398             :     // ldp/stp instructions.
    3399             :     MaskBits = 7;
    3400           6 :     Offset /= Scale;
    3401        7816 :   } else if (UnscaledOp == 0 || useUnscaledOp) {
    3402             :     MaskBits = 9;
    3403             :     IsSigned = true;
    3404             :     Scale = 1;
    3405             :   } else {
    3406             :     MaskBits = 12;
    3407             :     IsSigned = false;
    3408        7435 :     Offset /= Scale;
    3409             :   }
    3410             : 
    3411             :   // Attempt to fold address computation.
    3412        7822 :   int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
    3413        7822 :   int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
    3414        7822 :   if (Offset >= MinOff && Offset <= MaxOff) {
    3415        7639 :     if (EmittableOffset)
    3416        3290 :       *EmittableOffset = Offset;
    3417        7639 :     Offset = 0;
    3418             :   } else {
    3419         183 :     int NewOff = Offset < 0 ? MinOff : MaxOff;
    3420         183 :     if (EmittableOffset)
    3421           1 :       *EmittableOffset = NewOff;
    3422         183 :     Offset = (Offset - NewOff) * Scale;
    3423             :   }
    3424        7822 :   if (OutUseUnscaledOp)
    3425        3291 :     *OutUseUnscaledOp = useUnscaledOp;
    3426        7822 :   if (OutUnscaledOp)
    3427        3291 :     *OutUnscaledOp = UnscaledOp;
    3428        7822 :   return AArch64FrameOffsetCanUpdate |
    3429        7822 :          (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
    3430             : }
    3431             : 
    3432        3520 : bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
    3433             :                                     unsigned FrameReg, int &Offset,
    3434             :                                     const AArch64InstrInfo *TII) {
    3435        3520 :   unsigned Opcode = MI.getOpcode();
    3436        3520 :   unsigned ImmIdx = FrameRegIdx + 1;
    3437             : 
    3438        3520 :   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
    3439         214 :     Offset += MI.getOperand(ImmIdx).getImm();
    3440         428 :     emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
    3441             :                     MI.getOperand(0).getReg(), FrameReg, Offset, TII,
    3442             :                     MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
    3443         214 :     MI.eraseFromParent();
    3444         214 :     Offset = 0;
    3445         214 :     return true;
    3446             :   }
    3447             : 
    3448             :   int NewOffset;
    3449             :   unsigned UnscaledOp;
    3450             :   bool UseUnscaledOp;
    3451        3306 :   int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
    3452             :                                          &UnscaledOp, &NewOffset);
    3453        3306 :   if (Status & AArch64FrameOffsetCanUpdate) {
    3454        3291 :     if (Status & AArch64FrameOffsetIsLegal)
    3455             :       // Replace the FrameIndex with FrameReg.
    3456        6580 :       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
    3457        3291 :     if (UseUnscaledOp)
    3458         175 :       MI.setDesc(TII->get(UnscaledOp));
    3459             : 
    3460        6582 :     MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
    3461        3291 :     return Offset == 0;
    3462             :   }
    3463             : 
    3464             :   return false;
    3465             : }
    3466             : 
    3467           0 : void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
    3468             :   NopInst.setOpcode(AArch64::HINT);
    3469           0 :   NopInst.addOperand(MCOperand::createImm(0));
    3470           0 : }
    3471             : 
    3472             : // AArch64 supports MachineCombiner.
    3473       13784 : bool AArch64InstrInfo::useMachineCombiner() const { return true; }
    3474             : 
    3475             : // True when Opc sets flag
    3476             : static bool isCombineInstrSettingFlag(unsigned Opc) {
    3477        2890 :   switch (Opc) {
    3478             :   case AArch64::ADDSWrr:
    3479             :   case AArch64::ADDSWri:
    3480             :   case AArch64::ADDSXrr:
    3481             :   case AArch64::ADDSXri:
    3482             :   case AArch64::SUBSWrr:
    3483             :   case AArch64::SUBSXrr:
    3484             :   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
    3485             :   case AArch64::SUBSWri:
    3486             :   case AArch64::SUBSXri:
    3487             :     return true;
    3488             :   default:
    3489             :     break;
    3490             :   }
    3491             :   return false;
    3492             : }
    3493             : 
    3494             : // 32b Opcodes that can be combined with a MUL
    3495             : static bool isCombineInstrCandidate32(unsigned Opc) {
    3496      117106 :   switch (Opc) {
    3497             :   case AArch64::ADDWrr:
    3498             :   case AArch64::ADDWri:
    3499             :   case AArch64::SUBWrr:
    3500             :   case AArch64::ADDSWrr:
    3501             :   case AArch64::ADDSWri:
    3502             :   case AArch64::SUBSWrr:
    3503             :   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
    3504             :   case AArch64::SUBWri:
    3505             :   case AArch64::SUBSWri:
    3506             :     return true;
    3507             :   default:
    3508             :     break;
    3509             :   }
    3510             :   return false;
    3511             : }
    3512             : 
    3513             : // 64b Opcodes that can be combined with a MUL
    3514             : static bool isCombineInstrCandidate64(unsigned Opc) {
    3515      115631 :   switch (Opc) {
    3516             :   case AArch64::ADDXrr:
    3517             :   case AArch64::ADDXri:
    3518             :   case AArch64::SUBXrr:
    3519             :   case AArch64::ADDSXrr:
    3520             :   case AArch64::ADDSXri:
    3521             :   case AArch64::SUBSXrr:
    3522             :   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
    3523             :   case AArch64::SUBXri:
    3524             :   case AArch64::SUBSXri:
    3525             :     return true;
    3526             :   default:
    3527             :     break;
    3528             :   }
    3529             :   return false;
    3530             : }
    3531             : 
    3532             : // FP Opcodes that can be combined with a FMUL
    3533      116905 : static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
    3534      233810 :   switch (Inst.getOpcode()) {
    3535             :   default:
    3536      116313 :     break;
    3537         592 :   case AArch64::FADDSrr:
    3538             :   case AArch64::FADDDrr:
    3539             :   case AArch64::FADDv2f32:
    3540             :   case AArch64::FADDv2f64:
    3541             :   case AArch64::FADDv4f32:
    3542             :   case AArch64::FSUBSrr:
    3543             :   case AArch64::FSUBDrr:
    3544             :   case AArch64::FSUBv2f32:
    3545             :   case AArch64::FSUBv2f64:
    3546             :   case AArch64::FSUBv4f32:
    3547         592 :     TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
    3548        1033 :     return (Options.UnsafeFPMath ||
    3549         592 :             Options.AllowFPOpFusion == FPOpFusion::Fast);
    3550             :   }
    3551      116313 :   return false;
    3552             : }
    3553             : 
    3554             : // Opcodes that can be combined with a MUL
    3555      117106 : static bool isCombineInstrCandidate(unsigned Opc) {
    3556      117106 :   return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
    3557             : }
    3558             : 
    3559             : //
    3560             : // Utility routine that checks if \param MO is defined by an
    3561             : // \param CombineOpc instruction in the basic block \param MBB
    3562        3795 : static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
    3563             :                        unsigned CombineOpc, unsigned ZeroReg = 0,
    3564             :                        bool CheckZeroReg = false) {
    3565        3795 :   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    3566             :   MachineInstr *MI = nullptr;
    3567             : 
    3568        3795 :   if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
    3569        3635 :     MI = MRI.getUniqueVRegDef(MO.getReg());
    3570             :   // And it needs to be in the trace (otherwise, it won't have a depth).
    3571        3635 :   if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
    3572        3502 :     return false;
    3573             :   // Must only used by the user we combine with.
    3574         293 :   if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
    3575             :     return false;
    3576             : 
    3577         278 :   if (CheckZeroReg) {
    3578             :     assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
    3579             :            MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
    3580             :            MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
    3581             :     // The third input reg must be zero.
    3582         210 :     if (MI->getOperand(3).getReg() != ZeroReg)
    3583           8 :       return false;
    3584             :   }
    3585             : 
    3586             :   return true;
    3587             : }
    3588             : 
    3589             : //
    3590             : // Is \param MO defined by an integer multiply and can be combined?
    3591             : static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
    3592             :                               unsigned MulOpc, unsigned ZeroReg) {
    3593        3076 :   return canCombine(MBB, MO, MulOpc, ZeroReg, true);
    3594             : }
    3595             : 
    3596             : //
    3597             : // Is \param MO defined by a floating-point multiply and can be combined?
    3598             : static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
    3599             :                                unsigned MulOpc) {
    3600         719 :   return canCombine(MBB, MO, MulOpc);
    3601             : }
    3602             : 
    3603             : // TODO: There are many more machine instruction opcodes to match:
    3604             : //       1. Other data types (integer, vectors)
    3605             : //       2. Other math / logic operations (xor, or)
    3606             : //       3. Other forms of the same operation (intrinsics and other variants)
    3607      116850 : bool AArch64InstrInfo::isAssociativeAndCommutative(
    3608             :     const MachineInstr &Inst) const {
    3609      233700 :   switch (Inst.getOpcode()) {
    3610         900 :   case AArch64::FADDDrr:
    3611             :   case AArch64::FADDSrr:
    3612             :   case AArch64::FADDv2f32:
    3613             :   case AArch64::FADDv2f64:
    3614             :   case AArch64::FADDv4f32:
    3615             :   case AArch64::FMULDrr:
    3616             :   case AArch64::FMULSrr:
    3617             :   case AArch64::FMULX32:
    3618             :   case AArch64::FMULX64:
    3619             :   case AArch64::FMULXv2f32:
    3620             :   case AArch64::FMULXv2f64:
    3621             :   case AArch64::FMULXv4f32:
    3622             :   case AArch64::FMULv2f32:
    3623             :   case AArch64::FMULv2f64:
    3624             :   case AArch64::FMULv4f32:
    3625         900 :     return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
    3626             :   default:
    3627             :     return false;
    3628             :   }
    3629             : }
    3630             : 
    3631             : /// Find instructions that can be turned into madd.
    3632      117106 : static bool getMaddPatterns(MachineInstr &Root,
    3633             :                             SmallVectorImpl<MachineCombinerPattern> &Patterns) {
    3634      117106 :   unsigned Opc = Root.getOpcode();
    3635      117106 :   MachineBasicBlock &MBB = *Root.getParent();
    3636             :   bool Found = false;
    3637             : 
    3638      117106 :   if (!isCombineInstrCandidate(Opc))
    3639             :     return false;
    3640             :   if (isCombineInstrSettingFlag(Opc)) {
    3641        1478 :     int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
    3642             :     // When NZCV is live bail out.
    3643        1478 :     if (Cmp_NZCV == -1)
    3644             :       return false;
    3645         645 :     unsigned NewOpc = convertToNonFlagSettingOpc(Root);
    3646             :     // When opcode can't change bail out.
    3647             :     // CHECKME: do we miss any cases for opcode conversion?
    3648         645 :     if (NewOpc == Opc)
    3649             :       return false;
    3650             :     Opc = NewOpc;
    3651             :   }
    3652             : 
    3653        2057 :   switch (Opc) {
    3654             :   default:
    3655             :     break;
    3656         298 :   case AArch64::ADDWrr:
    3657             :     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
    3658             :            "ADDWrr does not have register operands");
    3659         298 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
    3660             :                           AArch64::WZR)) {
    3661           1 :       Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
    3662             :       Found = true;
    3663             :     }
    3664         298 :     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
    3665             :                           AArch64::WZR)) {
    3666           2 :       Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
    3667             :       Found = true;
    3668             :     }
    3669             :     break;
    3670         255 :   case AArch64::ADDXrr:
    3671         255 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
    3672             :                           AArch64::XZR)) {
    3673           8 :       Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
    3674             :       Found = true;
    3675             :     }
    3676         255 :     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
    3677             :                           AArch64::XZR)) {
    3678          40 :       Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
    3679             :       Found = true;
    3680             :     }
    3681             :     break;
    3682         312 :   case AArch64::SUBWrr:
    3683         312 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
    3684             :                           AArch64::WZR)) {
    3685           0 :       Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
    3686             :       Found = true;
    3687             :     }
    3688         312 :     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
    3689             :                           AArch64::WZR)) {
    3690         130 :       Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
    3691             :       Found = true;
    3692             :     }
    3693             :     break;
    3694         154 :   case AArch64::SUBXrr:
    3695         154 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
    3696             :                           AArch64::XZR)) {
    3697           0 :       Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
    3698             :       Found = true;
    3699             :     }
    3700         154 :     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
    3701             :                           AArch64::XZR)) {
    3702          17 :       Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
    3703             :       Found = true;
    3704             :     }
    3705             :     break;
    3706         225 :   case AArch64::ADDWri:
    3707         225 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
    3708             :                           AArch64::WZR)) {
    3709           2 :       Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
    3710             :       Found = true;
    3711             :     }
    3712             :     break;
    3713         600 :   case AArch64::ADDXri:
    3714         600 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
    3715             :                           AArch64::XZR)) {
    3716           1 :       Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
    3717             :       Found = true;
    3718             :     }
    3719             :     break;
    3720          79 :   case AArch64::SUBWri:
    3721          79 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
    3722             :                           AArch64::WZR)) {
    3723           0 :       Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
    3724             :       Found = true;
    3725             :     }
    3726             :     break;
    3727         134 :   case AArch64::SUBXri:
    3728         134 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
    3729             :                           AArch64::XZR)) {
    3730           1 :       Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
    3731             :       Found = true;
    3732             :     }
    3733             :     break;
    3734             :   }
    3735             :   return Found;
    3736             : }
    3737             : /// Floating-Point Support
    3738             : 
    3739             : /// Find instructions that can be turned into madd.
    3740      116905 : static bool getFMAPatterns(MachineInstr &Root,
    3741             :                            SmallVectorImpl<MachineCombinerPattern> &Patterns) {
    3742             : 
    3743      116905 :   if (!isCombineInstrCandidateFP(Root))
    3744             :     return false;
    3745             : 
    3746         183 :   MachineBasicBlock &MBB = *Root.getParent();
    3747             :   bool Found = false;
    3748             : 
    3749         366 :   switch (Root.getOpcode()) {
    3750             :   default:
    3751             :     assert(false && "Unsupported FP instruction in combiner\n");
    3752             :     break;
    3753          55 :   case AArch64::FADDSrr:
    3754             :     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
    3755             :            "FADDWrr does not have register operands");
    3756          55 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
    3757           1 :       Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
    3758             :       Found = true;
    3759          54 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3760             :                                   AArch64::FMULv1i32_indexed)) {
    3761           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
    3762             :       Found = true;
    3763             :     }
    3764          55 :     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
    3765           0 :       Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
    3766             :       Found = true;
    3767          55 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3768             :                                   AArch64::FMULv1i32_indexed)) {
    3769           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
    3770             :       Found = true;
    3771             :     }
    3772             :     break;
    3773          30 :   case AArch64::FADDDrr:
    3774          30 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
    3775           1 :       Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
    3776             :       Found = true;
    3777          29 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3778             :                                   AArch64::FMULv1i64_indexed)) {
    3779           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
    3780             :       Found = true;
    3781             :     }
    3782          30 :     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
    3783           2 :       Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
    3784             :       Found = true;
    3785          28 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3786             :                                   AArch64::FMULv1i64_indexed)) {
    3787           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
    3788             :       Found = true;
    3789             :     }
    3790             :     break;
    3791           5 :   case AArch64::FADDv2f32:
    3792           5 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3793             :                            AArch64::FMULv2i32_indexed)) {
    3794           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
    3795             :       Found = true;
    3796           4 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3797             :                                   AArch64::FMULv2f32)) {
    3798           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
    3799             :       Found = true;
    3800             :     }
    3801           5 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3802             :                            AArch64::FMULv2i32_indexed)) {
    3803           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
    3804             :       Found = true;
    3805           5 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3806             :                                   AArch64::FMULv2f32)) {
    3807           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
    3808             :       Found = true;
    3809             :     }
    3810             :     break;
    3811          10 :   case AArch64::FADDv2f64:
    3812          10 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3813             :                            AArch64::FMULv2i64_indexed)) {
    3814           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
    3815             :       Found = true;
    3816           9 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3817             :                                   AArch64::FMULv2f64)) {
    3818           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
    3819             :       Found = true;
    3820             :     }
    3821          10 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3822             :                            AArch64::FMULv2i64_indexed)) {
    3823           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
    3824             :       Found = true;
    3825          10 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3826             :                                   AArch64::FMULv2f64)) {
    3827           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
    3828             :       Found = true;
    3829             :     }
    3830             :     break;
    3831          31 :   case AArch64::FADDv4f32:
    3832          31 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3833             :                            AArch64::FMULv4i32_indexed)) {
    3834           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
    3835             :       Found = true;
    3836          30 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3837             :                                   AArch64::FMULv4f32)) {
    3838           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
    3839             :       Found = true;
    3840             :     }
    3841          31 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3842             :                            AArch64::FMULv4i32_indexed)) {
    3843           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
    3844             :       Found = true;
    3845          31 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3846             :                                   AArch64::FMULv4f32)) {
    3847           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
    3848             :       Found = true;
    3849             :     }
    3850             :     break;
    3851             : 
    3852           6 :   case AArch64::FSUBSrr:
    3853           6 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
    3854           0 :       Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
    3855             :       Found = true;
    3856             :     }
    3857           6 :     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
    3858           0 :       Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
    3859             :       Found = true;
    3860           6 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3861             :                                   AArch64::FMULv1i32_indexed)) {
    3862           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
    3863             :       Found = true;
    3864             :     }
    3865           6 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
    3866           2 :       Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
    3867             :       Found = true;
    3868             :     }
    3869             :     break;
    3870           4 :   case AArch64::FSUBDrr:
    3871           4 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
    3872           0 :       Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
    3873             :       Found = true;
    3874             :     }
    3875           4 :     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
    3876           0 :       Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
    3877             :       Found = true;
    3878           4 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3879             :                                   AArch64::FMULv1i64_indexed)) {
    3880           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
    3881             :       Found = true;
    3882             :     }
    3883           4 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
    3884           2 :       Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
    3885             :       Found = true;
    3886             :     }
    3887             :     break;
    3888          14 :   case AArch64::FSUBv2f32:
    3889          14 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3890             :                            AArch64::FMULv2i32_indexed)) {
    3891           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
    3892             :       Found = true;
    3893          12 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3894             :                                   AArch64::FMULv2f32)) {
    3895           6 :       Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
    3896             :       Found = true;
    3897             :     }
    3898          14 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3899             :                            AArch64::FMULv2i32_indexed)) {
    3900           0 :       Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
    3901             :       Found = true;
    3902          14 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3903             :                                   AArch64::FMULv2f32)) {
    3904           8 :       Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
    3905             :       Found = true;
    3906             :     }
    3907             :     break;
    3908          16 :   case AArch64::FSUBv2f64:
    3909          16 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3910             :                            AArch64::FMULv2i64_indexed)) {
    3911           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
    3912             :       Found = true;
    3913          14 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3914             :                                   AArch64::FMULv2f64)) {
    3915           6 :       Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
    3916             :       Found = true;
    3917             :     }
    3918          16 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3919             :                            AArch64::FMULv2i64_indexed)) {
    3920           0 :       Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
    3921             :       Found = true;
    3922          16 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3923             :                                   AArch64::FMULv2f64)) {
    3924           8 :       Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
    3925             :       Found = true;
    3926             :     }
    3927             :     break;
    3928          12 :   case AArch64::FSUBv4f32:
    3929          12 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3930             :                            AArch64::FMULv4i32_indexed)) {
    3931           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
    3932             :       Found = true;
    3933          10 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3934             :                                   AArch64::FMULv4f32)) {
    3935           6 :       Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
    3936             :       Found = true;
    3937             :     }
    3938          12 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3939             :                            AArch64::FMULv4i32_indexed)) {
    3940           0 :       Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
    3941             :       Found = true;
    3942          12 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3943             :                                   AArch64::FMULv4f32)) {
    3944           8 :       Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
    3945             :       Found = true;
    3946             :     }
    3947             :     break;
    3948             :   }
    3949             :   return Found;
    3950             : }
    3951             : 
    3952             : /// Return true when a code sequence can improve throughput. It
    3953             : /// should be called only for instructions in loops.
    3954             : /// \param Pattern - combiner pattern
    3955          39 : bool AArch64InstrInfo::isThroughputPattern(
    3956             :     MachineCombinerPattern Pattern) const {
    3957             :   switch (Pattern) {
    3958             :   default:
    3959             :     break;
    3960             :   case MachineCombinerPattern::FMULADDS_OP1:
    3961             :   case MachineCombinerPattern::FMULADDS_OP2:
    3962             :   case MachineCombinerPattern::FMULSUBS_OP1:
    3963             :   case MachineCombinerPattern::FMULSUBS_OP2:
    3964             :   case MachineCombinerPattern::FMULADDD_OP1:
    3965             :   case MachineCombinerPattern::FMULADDD_OP2:
    3966             :   case MachineCombinerPattern::FMULSUBD_OP1:
    3967             :   case MachineCombinerPattern::FMULSUBD_OP2:
    3968             :   case MachineCombinerPattern::FNMULSUBS_OP1:
    3969             :   case MachineCombinerPattern::FNMULSUBD_OP1:
    3970             :   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
    3971             :   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
    3972             :   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
    3973             :   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
    3974             :   case MachineCombinerPattern::FMLAv2f32_OP2:
    3975             :   case MachineCombinerPattern::FMLAv2f32_OP1:
    3976             :   case MachineCombinerPattern::FMLAv2f64_OP1:
    3977             :   case MachineCombinerPattern::FMLAv2f64_OP2:
    3978             :   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
    3979             :   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
    3980             :   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
    3981             :   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
    3982             :   case MachineCombinerPattern::FMLAv4f32_OP1:
    3983             :   case MachineCombinerPattern::FMLAv4f32_OP2:
    3984             :   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
    3985             :   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
    3986             :   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
    3987             :   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
    3988             :   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
    3989             :   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
    3990             :   case MachineCombinerPattern::FMLSv2f32_OP2:
    3991             :   case MachineCombinerPattern::FMLSv2f64_OP2:
    3992             :   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
    3993             :   case MachineCombinerPattern::FMLSv4f32_OP2:
    3994             :     return true;
    3995             :   } // end switch (Pattern)
    3996             :   return false;
    3997             : }
    3998             : /// Return true when there is potentially a faster code sequence for an
    3999             : /// instruction chain ending in \p Root. All potential patterns are listed in
    4000             : /// the \p Pattern vector. Pattern should be sorted in priority order since the
    4001             : /// pattern evaluator stops checking as soon as it finds a faster sequence.
    4002             : 
    4003      117106 : bool AArch64InstrInfo::getMachineCombinerPatterns(
    4004             :     MachineInstr &Root,
    4005             :     SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
    4006             :   // Integer patterns
    4007      117106 :   if (getMaddPatterns(Root, Patterns))
    4008             :     return true;
    4009             :   // Floating point patterns
    4010      116905 :   if (getFMAPatterns(Root, Patterns))
    4011             :     return true;
    4012             : 
    4013      116850 :   return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
    4014             : }
    4015             : 
    4016             : enum class FMAInstKind { Default, Indexed, Accumulator };
    4017             : /// genFusedMultiply - Generate fused multiply instructions.
    4018             : /// This function supports both integer and floating point instructions.
    4019             : /// A typical example:
    4020             : ///  F|MUL I=A,B,0
    4021             : ///  F|ADD R,I,C
    4022             : ///  ==> F|MADD R,A,B,C
    4023             : /// \param MF Containing MachineFunction
    4024             : /// \param MRI Register information
    4025             : /// \param TII Target information
    4026             : /// \param Root is the F|ADD instruction
    4027             : /// \param [out] InsInstrs is a vector of machine instructions and will
    4028             : /// contain the generated madd instruction
    4029             : /// \param IdxMulOpd is index of operand in Root that is the result of
    4030             : /// the F|MUL. In the example above IdxMulOpd is 1.
    4031             : /// \param MaddOpc the opcode fo the f|madd instruction
    4032             : /// \param RC Register class of operands
    4033             : /// \param kind of fma instruction (addressing mode) to be generated
    4034             : /// \param ReplacedAddend is the result register from the instruction
    4035             : /// replacing the non-combined operand, if any.
    4036             : static MachineInstr *
    4037         290 : genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
    4038             :                  const TargetInstrInfo *TII, MachineInstr &Root,
    4039             :                  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
    4040             :                  unsigned MaddOpc, const TargetRegisterClass *RC,
    4041             :                  FMAInstKind kind = FMAInstKind::Default,
    4042             :                  const unsigned *ReplacedAddend = nullptr) {
    4043             :   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
    4044             : 
    4045         290 :   unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
    4046         580 :   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
    4047         290 :   unsigned ResultReg = Root.getOperand(0).getReg();
    4048         290 :   unsigned SrcReg0 = MUL->getOperand(1).getReg();
    4049             :   bool Src0IsKill = MUL->getOperand(1).isKill();
    4050         290 :   unsigned SrcReg1 = MUL->getOperand(2).getReg();
    4051             :   bool Src1IsKill = MUL->getOperand(2).isKill();
    4052             : 
    4053             :   unsigned SrcReg2;
    4054             :   bool Src2IsKill;
    4055         290 :   if (ReplacedAddend) {
    4056             :     // If we just generated a new addend, we must be it's only use.
    4057          36 :     SrcReg2 = *ReplacedAddend;
    4058             :     Src2IsKill = true;
    4059             :   } else {
    4060         254 :     SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
    4061             :     Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
    4062             :   }
    4063             : 
    4064         290 :   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
    4065         290 :     MRI.constrainRegClass(ResultReg, RC);
    4066         290 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
    4067         290 :     MRI.constrainRegClass(SrcReg0, RC);
    4068         290 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
    4069         290 :     MRI.constrainRegClass(SrcReg1, RC);
    4070         290 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
    4071         290 :     MRI.constrainRegClass(SrcReg2, RC);
    4072             : 
    4073             :   MachineInstrBuilder MIB;
    4074         290 :   if (kind == FMAInstKind::Default)
    4075         618 :     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
    4076         206 :               .addReg(SrcReg0, getKillRegState(Src0IsKill))
    4077         206 :               .addReg(SrcReg1, getKillRegState(Src1IsKill))
    4078         206 :               .addReg(SrcReg2, getKillRegState(Src2IsKill));
    4079          84 :   else if (kind == FMAInstKind::Indexed)
    4080          45 :     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
    4081          15 :               .addReg(SrcReg2, getKillRegState(Src2IsKill))
    4082          15 :               .addReg(SrcReg0, getKillRegState(Src0IsKill))
    4083          15 :               .addReg(SrcReg1, getKillRegState(Src1IsKill))
    4084          15 :               .addImm(MUL->getOperand(3).getImm());
    4085          69 :   else if (kind == FMAInstKind::Accumulator)
    4086         207 :     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
    4087          69 :               .addReg(SrcReg2, getKillRegState(Src2IsKill))
    4088          69 :               .addReg(SrcReg0, getKillRegState(Src0IsKill))
    4089          69 :               .addReg(SrcReg1, getKillRegState(Src1IsKill));
    4090             :   else
    4091             :     assert(false && "Invalid FMA instruction kind \n");
    4092             :   // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
    4093         290 :   InsInstrs.push_back(MIB);
    4094         290 :   return MUL;
    4095             : }
    4096             : 
    4097             : /// genMaddR - Generate madd instruction and combine mul and add using
    4098             : /// an extra virtual register
    4099             : /// Example - an ADD intermediate needs to be stored in a register:
    4100             : ///   MUL I=A,B,0
    4101             : ///   ADD R,I,Imm
    4102             : ///   ==> ORR  V, ZR, Imm
    4103             : ///   ==> MADD R,A,B,V
    4104             : /// \param MF Containing MachineFunction
    4105             : /// \param MRI Register information
    4106             : /// \param TII Target information
    4107             : /// \param Root is the ADD instruction
    4108             : /// \param [out] InsInstrs is a vector of machine instructions and will
    4109             : /// contain the generated madd instruction
    4110             : /// \param IdxMulOpd is index of operand in Root that is the result of
    4111             : /// the MUL. In the example above IdxMulOpd is 1.
    4112             : /// \param MaddOpc the opcode fo the madd instruction
    4113             : /// \param VR is a virtual register that holds the value of an ADD operand
    4114             : /// (V in the example above).
    4115             : /// \param RC Register class of operands
    4116           3 : static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
    4117             :                               const TargetInstrInfo *TII, MachineInstr &Root,
    4118             :                               SmallVectorImpl<MachineInstr *> &InsInstrs,
    4119             :                               unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
    4120             :                               const TargetRegisterClass *RC) {
    4121             :   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
    4122             : 
    4123           6 :   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
    4124           3 :   unsigned ResultReg = Root.getOperand(0).getReg();
    4125           3 :   unsigned SrcReg0 = MUL->getOperand(1).getReg();
    4126             :   bool Src0IsKill = MUL->getOperand(1).isKill();
    4127           3 :   unsigned SrcReg1 = MUL->getOperand(2).getReg();
    4128             :   bool Src1IsKill = MUL->getOperand(2).isKill();
    4129             : 
    4130           3 :   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
    4131           3 :     MRI.constrainRegClass(ResultReg, RC);
    4132           3 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
    4133           3 :     MRI.constrainRegClass(SrcReg0, RC);
    4134           3 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
    4135           3 :     MRI.constrainRegClass(SrcReg1, RC);
    4136           3 :   if (TargetRegisterInfo::isVirtualRegister(VR))
    4137           3 :     MRI.constrainRegClass(VR, RC);
    4138             : 
    4139             :   MachineInstrBuilder MIB =
    4140           6 :       BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
    4141           3 :           .addReg(SrcReg0, getKillRegState(Src0IsKill))
    4142           3 :           .addReg(SrcReg1, getKillRegState(Src1IsKill))
    4143           3 :           .addReg(VR);
    4144             :   // Insert the MADD
    4145           3 :   InsInstrs.push_back(MIB);
    4146           3 :   return MUL;
    4147             : }
    4148             : 
    4149             : /// When getMachineCombinerPatterns() finds potential patterns,
    4150             : /// this function generates the instructions that could replace the
    4151             : /// original code sequence
    4152         476 : void AArch64InstrInfo::genAlternativeCodeSequence(
    4153             :     MachineInstr &Root, MachineCombinerPattern Pattern,
    4154             :     SmallVectorImpl<MachineInstr *> &InsInstrs,
    4155             :     SmallVectorImpl<MachineInstr *> &DelInstrs,
    4156             :     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
    4157         476 :   MachineBasicBlock &MBB = *Root.getParent();
    4158         476 :   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    4159             :   MachineFunction &MF = *MBB.getParent();
    4160         476 :   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
    4161             : 
    4162             :   MachineInstr *MUL;
    4163             :   const TargetRegisterClass *RC;
    4164             :   unsigned Opc;
    4165         476 :   switch (Pattern) {
    4166         182 :   default:
    4167             :     // Reassociate instructions.
    4168         182 :     TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
    4169             :                                                 DelInstrs, InstrIdxForVirtReg);
    4170         182 :     return;
    4171           9 :   case MachineCombinerPattern::MULADDW_OP1:
    4172             :   case MachineCombinerPattern::MULADDX_OP1:
    4173             :     // MUL I=A,B,0
    4174             :     // ADD R,I,C
    4175             :     // ==> MADD R,A,B,C
    4176             :     // --- Create(MADD);
    4177           9 :     if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
    4178             :       Opc = AArch64::MADDWrrr;
    4179             :       RC = &AArch64::GPR32RegClass;
    4180             :     } else {
    4181             :       Opc = AArch64::MADDXrrr;
    4182             :       RC = &AArch64::GPR64RegClass;
    4183             :     }
    4184           9 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    4185           9 :     break;
    4186          42 :   case MachineCombinerPattern::MULADDW_OP2:
    4187             :   case MachineCombinerPattern::MULADDX_OP2:
    4188             :     // MUL I=A,B,0
    4189             :     // ADD R,C,I
    4190             :     // ==> MADD R,A,B,C
    4191             :     // --- Create(MADD);
    4192          42 :     if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
    4193             :       Opc = AArch64::MADDWrrr;
    4194             :       RC = &AArch64::GPR32RegClass;
    4195             :     } else {
    4196             :       Opc = AArch64::MADDXrrr;
    4197             :       RC = &AArch64::GPR64RegClass;
    4198             :     }
    4199          42 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    4200          42 :     break;
    4201           3 :   case MachineCombinerPattern::MULADDWI_OP1:
    4202             :   case MachineCombinerPattern::MULADDXI_OP1: {
    4203             :     // MUL I=A,B,0
    4204             :     // ADD R,I,Imm
    4205             :     // ==> ORR  V, ZR, Imm
    4206             :     // ==> MADD R,A,B,V
    4207             :     // --- Create(MADD);
    4208             :     const TargetRegisterClass *OrrRC;
    4209             :     unsigned BitSize, OrrOpc, ZeroReg;
    4210           3 :     if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
    4211             :       OrrOpc = AArch64::ORRWri;
    4212             :       OrrRC = &AArch64::GPR32spRegClass;
    4213             :       BitSize = 32;
    4214             :       ZeroReg = AArch64::WZR;
    4215             :       Opc = AArch64::MADDWrrr;
    4216             :       RC = &AArch64::GPR32RegClass;
    4217             :     } else {
    4218             :       OrrOpc = AArch64::ORRXri;
    4219             :       OrrRC = &AArch64::GPR64spRegClass;
    4220             :       BitSize = 64;
    4221             :       ZeroReg = AArch64::XZR;
    4222             :       Opc = AArch64::MADDXrrr;
    4223             :       RC = &AArch64::GPR64RegClass;
    4224             :     }
    4225           3 :     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
    4226           3 :     uint64_t Imm = Root.getOperand(2).getImm();
    4227             : 
    4228           3 :     if (Root.getOperand(3).isImm()) {
    4229           3 :       unsigned Val = Root.getOperand(3).getImm();
    4230           3 :       Imm = Imm << Val;
    4231             :     }
    4232           3 :     uint64_t UImm = SignExtend64(Imm, BitSize);
    4233             :     uint64_t Encoding;
    4234           3 :     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
    4235             :       MachineInstrBuilder MIB1 =
    4236           4 :           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
    4237           2 :               .addReg(ZeroReg)
    4238           2 :               .addImm(Encoding);
    4239           2 :       InsInstrs.push_back(MIB1);
    4240           2 :       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4241           2 :       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
    4242             :     }
    4243             :     break;
    4244             :   }
    4245           0 :   case MachineCombinerPattern::MULSUBW_OP1:
    4246             :   case MachineCombinerPattern::MULSUBX_OP1: {
    4247             :     // MUL I=A,B,0
    4248             :     // SUB R,I, C
    4249             :     // ==> SUB  V, 0, C
    4250             :     // ==> MADD R,A,B,V // = -C + A*B
    4251             :     // --- Create(MADD);
    4252             :     const TargetRegisterClass *SubRC;
    4253             :     unsigned SubOpc, ZeroReg;
    4254           0 :     if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
    4255             :       SubOpc = AArch64::SUBWrr;
    4256             :       SubRC = &AArch64::GPR32spRegClass;
    4257             :       ZeroReg = AArch64::WZR;
    4258             :       Opc = AArch64::MADDWrrr;
    4259             :       RC = &AArch64::GPR32RegClass;
    4260             :     } else {
    4261             :       SubOpc = AArch64::SUBXrr;
    4262             :       SubRC = &AArch64::GPR64spRegClass;
    4263             :       ZeroReg = AArch64::XZR;
    4264             :       Opc = AArch64::MADDXrrr;
    4265             :       RC = &AArch64::GPR64RegClass;
    4266             :     }
    4267           0 :     unsigned NewVR = MRI.createVirtualRegister(SubRC);
    4268             :     // SUB NewVR, 0, C
    4269             :     MachineInstrBuilder MIB1 =
    4270           0 :         BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
    4271           0 :             .addReg(ZeroReg)
    4272           0 :             .add(Root.getOperand(2));
    4273           0 :     InsInstrs.push_back(MIB1);
    4274           0 :     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4275           0 :     MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
    4276             :     break;
    4277             :   }
    4278         147 :   case MachineCombinerPattern::MULSUBW_OP2:
    4279             :   case MachineCombinerPattern::MULSUBX_OP2:
    4280             :     // MUL I=A,B,0
    4281             :     // SUB R,C,I
    4282             :     // ==> MSUB R,A,B,C (computes C - A*B)
    4283             :     // --- Create(MSUB);
    4284         147 :     if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
    4285             :       Opc = AArch64::MSUBWrrr;
    4286             :       RC = &AArch64::GPR32RegClass;
    4287             :     } else {
    4288             :       Opc = AArch64::MSUBXrrr;
    4289             :       RC = &AArch64::GPR64RegClass;
    4290             :     }
    4291         147 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    4292         147 :     break;
    4293           1 :   case MachineCombinerPattern::MULSUBWI_OP1:
    4294             :   case MachineCombinerPattern::MULSUBXI_OP1: {
    4295             :     // MUL I=A,B,0
    4296             :     // SUB R,I, Imm
    4297             :     // ==> ORR  V, ZR, -Imm
    4298             :     // ==> MADD R,A,B,V // = -Imm + A*B
    4299             :     // --- Create(MADD);
    4300             :     const TargetRegisterClass *OrrRC;
    4301             :     unsigned BitSize, OrrOpc, ZeroReg;
    4302           1 :     if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
    4303             :       OrrOpc = AArch64::ORRWri;
    4304             :       OrrRC = &AArch64::GPR32spRegClass;
    4305             :       BitSize = 32;
    4306             :       ZeroReg = AArch64::WZR;
    4307             :       Opc = AArch64::MADDWrrr;
    4308             :       RC = &AArch64::GPR32RegClass;
    4309             :     } else {
    4310             :       OrrOpc = AArch64::ORRXri;
    4311             :       OrrRC = &AArch64::GPR64spRegClass;
    4312             :       BitSize = 64;
    4313             :       ZeroReg = AArch64::XZR;
    4314             :       Opc = AArch64::MADDXrrr;
    4315             :       RC = &AArch64::GPR64RegClass;
    4316             :     }
    4317           1 :     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
    4318           1 :     uint64_t Imm = Root.getOperand(2).getImm();
    4319           1 :     if (Root.getOperand(3).isImm()) {
    4320           1 :       unsigned Val = Root.getOperand(3).getImm();
    4321           1 :       Imm = Imm << Val;
    4322             :     }
    4323           1 :     uint64_t UImm = SignExtend64(-Imm, BitSize);
    4324             :     uint64_t Encoding;
    4325           1 :     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
    4326             :       MachineInstrBuilder MIB1 =
    4327           2 :           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
    4328           1 :               .addReg(ZeroReg)
    4329           1 :               .addImm(Encoding);
    4330           1 :       InsInstrs.push_back(MIB1);
    4331           1 :       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4332           1 :       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
    4333             :     }
    4334             :     break;
    4335             :   }
    4336             :   // Floating Point Support
    4337           2 :   case MachineCombinerPattern::FMULADDS_OP1:
    4338             :   case MachineCombinerPattern::FMULADDD_OP1:
    4339             :     // MUL I=A,B,0
    4340             :     // ADD R,I,C
    4341             :     // ==> MADD R,A,B,C
    4342             :     // --- Create(MADD);
    4343           2 :     if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
    4344             :       Opc = AArch64::FMADDSrrr;
    4345             :       RC = &AArch64::FPR32RegClass;
    4346             :     } else {
    4347             :       Opc = AArch64::FMADDDrrr;
    4348             :       RC = &AArch64::FPR64RegClass;
    4349             :     }
    4350           2 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    4351           2 :     break;
    4352           2 :   case MachineCombinerPattern::FMULADDS_OP2:
    4353             :   case MachineCombinerPattern::FMULADDD_OP2:
    4354             :     // FMUL I=A,B,0
    4355             :     // FADD R,C,I
    4356             :     // ==> FMADD R,A,B,C
    4357             :     // --- Create(FMADD);
    4358           2 :     if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
    4359             :       Opc = AArch64::FMADDSrrr;
    4360             :       RC = &AArch64::FPR32RegClass;
    4361             :     } else {
    4362             :       Opc = AArch64::FMADDDrrr;
    4363             :       RC = &AArch64::FPR64RegClass;
    4364             :     }
    4365           2 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    4366           2 :     break;
    4367             : 
    4368           1 :   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
    4369             :     Opc = AArch64::FMLAv1i32_indexed;
    4370             :     RC = &AArch64::FPR32RegClass;
    4371           1 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4372             :                            FMAInstKind::Indexed);
    4373           1 :     break;
    4374           0 :   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
    4375             :     Opc = AArch64::FMLAv1i32_indexed;
    4376             :     RC = &AArch64::FPR32RegClass;
    4377           0 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4378             :                            FMAInstKind::Indexed);
    4379           0 :     break;
    4380             : 
    4381           1 :   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
    4382             :     Opc = AArch64::FMLAv1i64_indexed;
    4383             :     RC = &AArch64::FPR64RegClass;
    4384           1 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4385             :                            FMAInstKind::Indexed);
    4386           1 :     break;
    4387           0 :   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
    4388             :     Opc = AArch64::FMLAv1i64_indexed;
    4389             :     RC = &AArch64::FPR64RegClass;
    4390           0 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4391             :                            FMAInstKind::Indexed);
    4392           0 :     break;
    4393             : 
    4394           2 :   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
    4395             :   case MachineCombinerPattern::FMLAv2f32_OP1:
    4396             :     RC = &AArch64::FPR64RegClass;
    4397           2 :     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
    4398             :       Opc = AArch64::FMLAv2i32_indexed;
    4399           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4400             :                              FMAInstKind::Indexed);
    4401             :     } else {
    4402             :       Opc = AArch64::FMLAv2f32;
    4403           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4404             :                              FMAInstKind::Accumulator);
    4405             :     }
    4406             :     break;
    4407           0 :   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
    4408             :   case MachineCombinerPattern::FMLAv2f32_OP2:
    4409             :     RC = &AArch64::FPR64RegClass;
    4410           0 :     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
    4411             :       Opc = AArch64::FMLAv2i32_indexed;
    4412           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4413             :                              FMAInstKind::Indexed);
    4414             :     } else {
    4415             :       Opc = AArch64::FMLAv2f32;
    4416           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4417             :                              FMAInstKind::Accumulator);
    4418             :     }
    4419             :     break;
    4420             : 
    4421           2 :   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
    4422             :   case MachineCombinerPattern::FMLAv2f64_OP1:
    4423             :     RC = &AArch64::FPR128RegClass;
    4424           2 :     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
    4425             :       Opc = AArch64::FMLAv2i64_indexed;
    4426           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4427             :                              FMAInstKind::Indexed);
    4428             :     } else {
    4429             :       Opc = AArch64::FMLAv2f64;
    4430           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4431             :                              FMAInstKind::Accumulator);
    4432             :     }
    4433             :     break;
    4434           0 :   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
    4435             :   case MachineCombinerPattern::FMLAv2f64_OP2:
    4436             :     RC = &AArch64::FPR128RegClass;
    4437           0 :     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
    4438             :       Opc = AArch64::FMLAv2i64_indexed;
    4439           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4440             :                              FMAInstKind::Indexed);
    4441             :     } else {
    4442             :       Opc = AArch64::FMLAv2f64;
    4443           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4444             :                              FMAInstKind::Accumulator);
    4445             :     }
    4446             :     break;
    4447             : 
    4448           2 :   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
    4449             :   case MachineCombinerPattern::FMLAv4f32_OP1:
    4450             :     RC = &AArch64::FPR128RegClass;
    4451           2 :     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
    4452             :       Opc = AArch64::FMLAv4i32_indexed;
    4453           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4454             :                              FMAInstKind::Indexed);
    4455             :     } else {
    4456             :       Opc = AArch64::FMLAv4f32;
    4457           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4458             :                              FMAInstKind::Accumulator);
    4459             :     }
    4460             :     break;
    4461             : 
    4462           0 :   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
    4463             :   case MachineCombinerPattern::FMLAv4f32_OP2:
    4464             :     RC = &AArch64::FPR128RegClass;
    4465           0 :     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
    4466             :       Opc = AArch64::FMLAv4i32_indexed;
    4467           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4468             :                              FMAInstKind::Indexed);
    4469             :     } else {
    4470             :       Opc = AArch64::FMLAv4f32;
    4471           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4472             :                              FMAInstKind::Accumulator);
    4473             :     }
    4474             :     break;
    4475             : 
    4476           0 :   case MachineCombinerPattern::FMULSUBS_OP1:
    4477             :   case MachineCombinerPattern::FMULSUBD_OP1: {
    4478             :     // FMUL I=A,B,0
    4479             :     // FSUB R,I,C
    4480             :     // ==> FNMSUB R,A,B,C // = -C + A*B
    4481             :     // --- Create(FNMSUB);
    4482           0 :     if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
    4483             :       Opc = AArch64::FNMSUBSrrr;
    4484             :       RC = &AArch64::FPR32RegClass;
    4485             :     } else {
    4486             :       Opc = AArch64::FNMSUBDrrr;
    4487             :       RC = &AArch64::FPR64RegClass;
    4488             :     }
    4489           0 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    4490           0 :     break;
    4491             :   }
    4492             : 
    4493           4 :   case MachineCombinerPattern::FNMULSUBS_OP1:
    4494             :   case MachineCombinerPattern::FNMULSUBD_OP1: {
    4495             :     // FNMUL I=A,B,0
    4496             :     // FSUB R,I,C
    4497             :     // ==> FNMADD R,A,B,C // = -A*B - C
    4498             :     // --- Create(FNMADD);
    4499           4 :     if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
    4500             :       Opc = AArch64::FNMADDSrrr;
    4501             :       RC = &AArch64::FPR32RegClass;
    4502             :     } else {
    4503             :       Opc = AArch64::FNMADDDrrr;
    4504             :       RC = &AArch64::FPR64RegClass;
    4505             :     }
    4506           4 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    4507           4 :     break;
    4508             :   }
    4509             : 
    4510           0 :   case MachineCombinerPattern::FMULSUBS_OP2:
    4511             :   case MachineCombinerPattern::FMULSUBD_OP2: {
    4512             :     // FMUL I=A,B,0
    4513             :     // FSUB R,C,I
    4514             :     // ==> FMSUB R,A,B,C (computes C - A*B)
    4515             :     // --- Create(FMSUB);
    4516           0 :     if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
    4517             :       Opc = AArch64::FMSUBSrrr;
    4518             :       RC = &AArch64::FPR32RegClass;
    4519             :     } else {
    4520             :       Opc = AArch64::FMSUBDrrr;
    4521             :       RC = &AArch64::FPR64RegClass;
    4522             :     }
    4523           0 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    4524           0 :     break;
    4525             :   }
    4526             : 
    4527           2 :   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
    4528             :     Opc = AArch64::FMLSv1i32_indexed;
    4529             :     RC = &AArch64::FPR32RegClass;
    4530           2 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4531             :                            FMAInstKind::Indexed);
    4532           2 :     break;
    4533             : 
    4534           2 :   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
    4535             :     Opc = AArch64::FMLSv1i64_indexed;
    4536             :     RC = &AArch64::FPR64RegClass;
    4537           2 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4538             :                            FMAInstKind::Indexed);
    4539           2 :     break;
    4540             : 
    4541          12 :   case MachineCombinerPattern::FMLSv2f32_OP2:
    4542             :   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
    4543             :     RC = &AArch64::FPR64RegClass;
    4544          12 :     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
    4545             :       Opc = AArch64::FMLSv2i32_indexed;
    4546           2 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4547             :                              FMAInstKind::Indexed);
    4548             :     } else {
    4549             :       Opc = AArch64::FMLSv2f32;
    4550          10 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4551             :                              FMAInstKind::Accumulator);
    4552             :     }
    4553             :     break;
    4554             : 
    4555          12 :   case MachineCombinerPattern::FMLSv2f64_OP2:
    4556             :   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
    4557             :     RC = &AArch64::FPR128RegClass;
    4558          12 :     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
    4559             :       Opc = AArch64::FMLSv2i64_indexed;
    4560           2 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4561             :                              FMAInstKind::Indexed);
    4562             :     } else {
    4563             :       Opc = AArch64::FMLSv2f64;
    4564          10 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4565             :                              FMAInstKind::Accumulator);
    4566             :     }
    4567             :     break;
    4568             : 
    4569          12 :   case MachineCombinerPattern::FMLSv4f32_OP2:
    4570             :   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
    4571             :     RC = &AArch64::FPR128RegClass;
    4572          12 :     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
    4573             :       Opc = AArch64::FMLSv4i32_indexed;
    4574           2 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4575             :                              FMAInstKind::Indexed);
    4576             :     } else {
    4577             :       Opc = AArch64::FMLSv4f32;
    4578          10 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4579             :                              FMAInstKind::Accumulator);
    4580             :     }
    4581             :     break;
    4582          12 :   case MachineCombinerPattern::FMLSv2f32_OP1:
    4583             :   case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
    4584             :     RC = &AArch64::FPR64RegClass;
    4585          12 :     unsigned NewVR = MRI.createVirtualRegister(RC);
    4586             :     MachineInstrBuilder MIB1 =
    4587          24 :         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
    4588          12 :             .add(Root.getOperand(2));
    4589          12 :     InsInstrs.push_back(MIB1);
    4590          12 :     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4591          12 :     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
    4592             :       Opc = AArch64::FMLAv2i32_indexed;
    4593           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4594             :                              FMAInstKind::Indexed, &NewVR);
    4595             :     } else {
    4596             :       Opc = AArch64::FMLAv2f32;
    4597          12 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4598             :                              FMAInstKind::Accumulator, &NewVR);
    4599             :     }
    4600             :     break;
    4601             :   }
    4602          12 :   case MachineCombinerPattern::FMLSv4f32_OP1:
    4603             :   case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
    4604             :     RC = &AArch64::FPR128RegClass;
    4605          12 :     unsigned NewVR = MRI.createVirtualRegister(RC);
    4606             :     MachineInstrBuilder MIB1 =
    4607          24 :         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
    4608          12 :             .add(Root.getOperand(2));
    4609          12 :     InsInstrs.push_back(MIB1);
    4610          12 :     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4611          12 :     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
    4612             :       Opc = AArch64::FMLAv4i32_indexed;
    4613           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4614             :                              FMAInstKind::Indexed, &NewVR);
    4615             :     } else {
    4616             :       Opc = AArch64::FMLAv4f32;
    4617          12 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4618             :                              FMAInstKind::Accumulator, &NewVR);
    4619             :     }
    4620             :     break;
    4621             :   }
    4622          12 :   case MachineCombinerPattern::FMLSv2f64_OP1:
    4623             :   case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
    4624             :     RC = &AArch64::FPR128RegClass;
    4625          12 :     unsigned NewVR = MRI.createVirtualRegister(RC);
    4626             :     MachineInstrBuilder MIB1 =
    4627          24 :         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
    4628          12 :             .add(Root.getOperand(2));
    4629          12 :     InsInstrs.push_back(MIB1);
    4630          12 :     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4631          12 :     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
    4632             :       Opc = AArch64::FMLAv2i64_indexed;
    4633           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4634             :                              FMAInstKind::Indexed, &NewVR);
    4635             :     } else {
    4636             :       Opc = AArch64::FMLAv2f64;
    4637          12 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4638             :                              FMAInstKind::Accumulator, &NewVR);
    4639             :     }
    4640             :     break;
    4641             :   }
    4642             :   } // end switch (Pattern)
    4643             :   // Record MUL and ADD/SUB for deletion
    4644         294 :   DelInstrs.push_back(MUL);
    4645         294 :   DelInstrs.push_back(&Root);
    4646             : }
    4647             : 
    4648             : /// Replace csincr-branch sequence by simple conditional branch
    4649             : ///
    4650             : /// Examples:
    4651             : /// 1. \code
    4652             : ///   csinc  w9, wzr, wzr, <condition code>
    4653             : ///   tbnz   w9, #0, 0x44
    4654             : ///    \endcode
    4655             : /// to
    4656             : ///    \code
    4657             : ///   b.<inverted condition code>
    4658             : ///    \endcode
    4659             : ///
    4660             : /// 2. \code
    4661             : ///   csinc w9, wzr, wzr, <condition code>
    4662             : ///   tbz   w9, #0, 0x44
    4663             : ///    \endcode
    4664             : /// to
    4665             : ///    \code
    4666             : ///   b.<condition code>
    4667             : ///    \endcode
    4668             : ///
    4669             : /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
    4670             : /// compare's constant operand is power of 2.
    4671             : ///
    4672             : /// Examples:
    4673             : ///    \code
    4674             : ///   and  w8, w8, #0x400
    4675             : ///   cbnz w8, L1
    4676             : ///    \endcode
    4677             : /// to
    4678             : ///    \code
    4679             : ///   tbnz w8, #10, L1
    4680             : ///    \endcode
    4681             : ///
    4682             : /// \param  MI Conditional Branch
    4683             : /// \return True when the simple conditional branch is generated
    4684             : ///
    4685         954 : bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
    4686             :   bool IsNegativeBranch = false;
    4687             :   bool IsTestAndBranch = false;
    4688             :   unsigned TargetBBInMI = 0;
    4689        1908 :   switch (MI.getOpcode()) {
    4690           0 :   default:
    4691           0 :     llvm_unreachable("Unknown branch instruction?");
    4692             :   case AArch64::Bcc:
    4693             :     return false;
    4694             :   case AArch64::CBZW:
    4695             :   case AArch64::CBZX:
    4696             :     TargetBBInMI = 1;
    4697             :     break;
    4698         183 :   case AArch64::CBNZW:
    4699             :   case AArch64::CBNZX:
    4700             :     TargetBBInMI = 1;
    4701             :     IsNegativeBranch = true;
    4702         183 :     break;
    4703          80 :   case AArch64::TBZW:
    4704             :   case AArch64::TBZX:
    4705             :     TargetBBInMI = 2;
    4706             :     IsTestAndBranch = true;
    4707          80 :     break;
    4708          31 :   case AArch64::TBNZW:
    4709             :   case AArch64::TBNZX:
    4710             :     TargetBBInMI = 2;
    4711             :     IsNegativeBranch = true;
    4712             :     IsTestAndBranch = true;
    4713          31 :     break;
    4714             :   }
    4715             :   // So we increment a zero register and test for bits other
    4716             :   // than bit 0? Conservatively bail out in case the verifier
    4717             :   // missed this case.
    4718         389 :   if (IsTestAndBranch && MI.getOperand(1).getImm())
    4719             :     return false;
    4720             : 
    4721             :   // Find Definition.
    4722             :   assert(MI.getParent() && "Incomplete machine instruciton\n");
    4723         350 :   MachineBasicBlock *MBB = MI.getParent();
    4724         350 :   MachineFunction *MF = MBB->getParent();
    4725         350 :   MachineRegisterInfo *MRI = &MF->getRegInfo();
    4726         350 :   unsigned VReg = MI.getOperand(0).getReg();
    4727         350 :   if (!TargetRegisterInfo::isVirtualRegister(VReg))
    4728             :     return false;
    4729             : 
    4730         350 :   MachineInstr *DefMI = MRI->getVRegDef(VReg);
    4731             : 
    4732             :   // Look through COPY instructions to find definition.
    4733         396 :   while (DefMI->isCopy()) {
    4734         201 :     unsigned CopyVReg = DefMI->getOperand(1).getReg();
    4735         201 :     if (!MRI->hasOneNonDBGUse(CopyVReg))
    4736             :       return false;
    4737         145 :     if (!MRI->hasOneDef(CopyVReg))
    4738             :       return false;
    4739          46 :     DefMI = MRI->getVRegDef(CopyVReg);
    4740             :   }
    4741             : 
    4742         195 :   switch (DefMI->getOpcode()) {
    4743             :   default:
    4744             :     return false;
    4745             :   // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
    4746           7 :   case AArch64::ANDWri:
    4747             :   case AArch64::ANDXri: {
    4748           7 :     if (IsTestAndBranch)
    4749             :       return false;
    4750           7 :     if (DefMI->getParent() != MBB)
    4751             :       return false;
    4752           6 :     if (!MRI->hasOneNonDBGUse(VReg))
    4753             :       return false;
    4754             : 
    4755           5 :     bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
    4756           5 :     uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
    4757           5 :         DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
    4758             :     if (!isPowerOf2_64(Mask))
    4759             :       return false;
    4760             : 
    4761             :     MachineOperand &MO = DefMI->getOperand(1);
    4762           5 :     unsigned NewReg = MO.getReg();
    4763           5 :     if (!TargetRegisterInfo::isVirtualRegister(NewReg))
    4764             :       return false;
    4765             : 
    4766             :     assert(!MRI->def_empty(NewReg) && "Register must be defined.");
    4767             : 
    4768             :     MachineBasicBlock &RefToMBB = *MBB;
    4769           5 :     MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
    4770             :     DebugLoc DL = MI.getDebugLoc();
    4771             :     unsigned Imm = Log2_64(Mask);
    4772             :     unsigned Opc = (Imm < 32)
    4773           5 :                        ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
    4774             :                        : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
    4775          10 :     MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
    4776           5 :                               .addReg(NewReg)
    4777           5 :                               .addImm(Imm)
    4778           5 :                               .addMBB(TBB);
    4779             :     // Register lives on to the CBZ now.
    4780             :     MO.setIsKill(false);
    4781             : 
    4782             :     // For immediate smaller than 32, we need to use the 32-bit
    4783             :     // variant (W) in all cases. Indeed the 64-bit variant does not
    4784             :     // allow to encode them.
    4785             :     // Therefore, if the input register is 64-bit, we need to take the
    4786             :     // 32-bit sub-part.
    4787           5 :     if (!Is32Bit && Imm < 32)
    4788           3 :       NewMI->getOperand(0).setSubReg(AArch64::sub_32);
    4789           5 :     MI.eraseFromParent();
    4790             :     return true;
    4791             :   }
    4792             :   // Look for CSINC
    4793           1 :   case AArch64::CSINCWr:
    4794             :   case AArch64::CSINCXr: {
    4795           2 :     if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
    4796           1 :           DefMI->getOperand(2).getReg() == AArch64::WZR) &&
    4797           0 :         !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
    4798           0 :           DefMI->getOperand(2).getReg() == AArch64::XZR))
    4799             :       return false;
    4800             : 
    4801           1 :     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
    4802             :       return false;
    4803             : 
    4804           1 :     AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
    4805             :     // Convert only when the condition code is not modified between
    4806             :     // the CSINC and the branch. The CC may be used by other
    4807             :     // instructions in between.
    4808           1 :     if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
    4809             :       return false;
    4810             :     MachineBasicBlock &RefToMBB = *MBB;
    4811           2 :     MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
    4812             :     DebugLoc DL = MI.getDebugLoc();
    4813           1 :     if (IsNegativeBranch)
    4814             :       CC = AArch64CC::getInvertedCondCode(CC);
    4815           2 :     BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
    4816           1 :     MI.eraseFromParent();
    4817             :     return true;
    4818             :   }
    4819             :   }
    4820             : }
    4821             : 
    4822             : std::pair<unsigned, unsigned>
    4823        2451 : AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
    4824             :   const unsigned Mask = AArch64II::MO_FRAGMENT;
    4825        2451 :   return std::make_pair(TF & Mask, TF & ~Mask);
    4826             : }
    4827             : 
    4828             : ArrayRef<std::pair<unsigned, const char *>>
    4829        2458 : AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
    4830             :   using namespace AArch64II;
    4831             : 
    4832             :   static const std::pair<unsigned, const char *> TargetFlags[] = {
    4833             :       {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
    4834             :       {MO_G3, "aarch64-g3"},     {MO_G2, "aarch64-g2"},
    4835             :       {MO_G1, "aarch64-g1"},     {MO_G0, "aarch64-g0"},
    4836             :       {MO_HI12, "aarch64-hi12"}};
    4837        2458 :   return makeArrayRef(TargetFlags);
    4838             : }
    4839             : 
    4840             : ArrayRef<std::pair<unsigned, const char *>>
    4841        1238 : AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
    4842             :   using namespace AArch64II;
    4843             : 
    4844             :   static const std::pair<unsigned, const char *> TargetFlags[] = {
    4845             :       {MO_COFFSTUB, "aarch64-coffstub"},
    4846             :       {MO_GOT, "aarch64-got"},   {MO_NC, "aarch64-nc"},
    4847             :       {MO_TLS, "aarch64-tls"},   {MO_DLLIMPORT, "aarch64-dllimport"}};
    4848        1238 :   return makeArrayRef(TargetFlags);
    4849             : }
    4850             : 
    4851             : ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
    4852          42 : AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
    4853             :   static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
    4854             :       {{MOSuppressPair, "aarch64-suppress-pair"},
    4855             :        {MOStridedAccess, "aarch64-strided-access"}};
    4856          42 :   return makeArrayRef(TargetFlags);
    4857             : }
    4858             : 
    4859             : /// Constants defining how certain sequences should be outlined.
    4860             : /// This encompasses how an outlined function should be called, and what kind of
    4861             : /// frame should be emitted for that outlined function.
    4862             : ///
    4863             : /// \p MachineOutlinerDefault implies that the function should be called with
    4864             : /// a save and restore of LR to the stack.
    4865             : ///
    4866             : /// That is,
    4867             : ///
    4868             : /// I1     Save LR                    OUTLINED_FUNCTION:
    4869             : /// I2 --> BL OUTLINED_FUNCTION       I1
    4870             : /// I3     Restore LR                 I2
    4871             : ///                                   I3
    4872             : ///                                   RET
    4873             : ///
    4874             : /// * Call construction overhead: 3 (save + BL + restore)
    4875             : /// * Frame construction overhead: 1 (ret)
    4876             : /// * Requires stack fixups? Yes
    4877             : ///
    4878             : /// \p MachineOutlinerTailCall implies that the function is being created from
    4879             : /// a sequence of instructions ending in a return.
    4880             : ///
    4881             : /// That is,
    4882             : ///
    4883             : /// I1                             OUTLINED_FUNCTION:
    4884             : /// I2 --> B OUTLINED_FUNCTION     I1
    4885             : /// RET                            I2
    4886             : ///                                RET
    4887             : ///
    4888             : /// * Call construction overhead: 1 (B)
    4889             : /// * Frame construction overhead: 0 (Return included in sequence)
    4890             : /// * Requires stack fixups? No
    4891             : ///
    4892             : /// \p MachineOutlinerNoLRSave implies that the function should be called using
    4893             : /// a BL instruction, but doesn't require LR to be saved and restored. This
    4894             : /// happens when LR is known to be dead.
    4895             : ///
    4896             : /// That is,
    4897             : ///
    4898             : /// I1                                OUTLINED_FUNCTION:
    4899             : /// I2 --> BL OUTLINED_FUNCTION       I1
    4900             : /// I3                                I2
    4901             : ///                                   I3
    4902             : ///                                   RET
    4903             : ///
    4904             : /// * Call construction overhead: 1 (BL)
    4905             : /// * Frame construction overhead: 1 (RET)
    4906             : /// * Requires stack fixups? No
    4907             : ///
    4908             : /// \p MachineOutlinerThunk implies that the function is being created from
    4909             : /// a sequence of instructions ending in a call. The outlined function is
    4910             : /// called with a BL instruction, and the outlined function tail-calls the
    4911             : /// original call destination.
    4912             : ///
    4913             : /// That is,
    4914             : ///
    4915             : /// I1                                OUTLINED_FUNCTION:
    4916             : /// I2 --> BL OUTLINED_FUNCTION       I1
    4917             : /// BL f                              I2
    4918             : ///                                   B f
    4919             : /// * Call construction overhead: 1 (BL)
    4920             : /// * Frame construction overhead: 0
    4921             : /// * Requires stack fixups? No
    4922             : ///
    4923             : /// \p MachineOutlinerRegSave implies that the function should be called with a
    4924             : /// save and restore of LR to an available register. This allows us to avoid
    4925             : /// stack fixups. Note that this outlining variant is compatible with the
    4926             : /// NoLRSave case.
    4927             : ///
    4928             : /// That is,
    4929             : ///
    4930             : /// I1     Save LR                    OUTLINED_FUNCTION:
    4931             : /// I2 --> BL OUTLINED_FUNCTION       I1
    4932             : /// I3     Restore LR                 I2
    4933             : ///                                   I3
    4934             : ///                                   RET
    4935             : ///
    4936             : /// * Call construction overhead: 3 (save + BL + restore)
    4937             : /// * Frame construction overhead: 1 (ret)
    4938             : /// * Requires stack fixups? No
    4939             : enum MachineOutlinerClass {
    4940             :   MachineOutlinerDefault,  /// Emit a save, restore, call, and return.
    4941             :   MachineOutlinerTailCall, /// Only emit a branch.
    4942             :   MachineOutlinerNoLRSave, /// Emit a call and return.
    4943             :   MachineOutlinerThunk,    /// Emit a call and tail-call.
    4944             :   MachineOutlinerRegSave   /// Same as default, but save to a register.
    4945             : };
    4946             : 
    4947             : enum MachineOutlinerMBBFlags {
    4948             :   LRUnavailableSomewhere = 0x2,
    4949             :   HasCalls = 0x4
    4950             : };
    4951             : 
    4952             : unsigned
    4953         254 : AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
    4954         254 :   MachineFunction *MF = C.getMF();
    4955             :   const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
    4956         254 :       MF->getSubtarget().getRegisterInfo());
    4957             : 
    4958             :   // Check if there is an available register across the sequence that we can
    4959             :   // use.
    4960         448 :   for (unsigned Reg : AArch64::GPR64RegClass) {
    4961         443 :     if (!ARI->isReservedReg(*MF, Reg) &&
    4962         856 :         Reg != AArch64::LR &&  // LR is not reserved, but don't use it.
    4963         428 :         Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
    4964         413 :         Reg != AArch64::X17 && // Ditto for X17.
    4965        1115 :         C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
    4966         249 :       return Reg;
    4967             :   }
    4968             : 
    4969             :   // No suitable register. Return 0.
    4970             :   return 0u;
    4971             : }
    4972             : 
    4973             : outliner::OutlinedFunction
    4974         176 : AArch64InstrInfo::getOutliningCandidateInfo(
    4975             :     std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
    4976         176 :   unsigned SequenceSize = std::accumulate(
    4977         176 :       RepeatedSequenceLocs[0].front(),
    4978         176 :       std::next(RepeatedSequenceLocs[0].back()),
    4979             :       0, [this](unsigned Sum, const MachineInstr &MI) {
    4980         858 :         return Sum + getInstSizeInBytes(MI);
    4981         176 :       });
    4982             : 
    4983             :   // Compute liveness information for each candidate.
    4984         176 :   const TargetRegisterInfo &TRI = getRegisterInfo();
    4985             :   std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
    4986         438 :                 [&TRI](outliner::Candidate &C) { C.initLRU(TRI); });
    4987             : 
    4988             :   // According to the AArch64 Procedure Call Standard, the following are
    4989             :   // undefined on entry/exit from a function call:
    4990             :   //
    4991             :   // * Registers x16, x17, (and thus w16, w17)
    4992             :   // * Condition codes (and thus the NZCV register)
    4993             :   //
    4994             :   // Because if this, we can't outline any sequence of instructions where
    4995             :   // one
    4996             :   // of these registers is live into/across it. Thus, we need to delete
    4997             :   // those
    4998             :   // candidates.
    4999             :   auto CantGuaranteeValueAcrossCall = [](outliner::Candidate &C) {
    5000             :     LiveRegUnits LRU = C.LRU;
    5001             :     return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
    5002             :             !LRU.available(AArch64::NZCV));
    5003             :   };
    5004             : 
    5005             :   // Erase every candidate that violates the restrictions above. (It could be
    5006             :   // true that we have viable candidates, so it's not worth bailing out in
    5007             :   // the case that, say, 1 out of 20 candidates violate the restructions.)
    5008             :   RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
    5009             :                                             RepeatedSequenceLocs.end(),
    5010         176 :                                             CantGuaranteeValueAcrossCall),
    5011             :                              RepeatedSequenceLocs.end());
    5012             : 
    5013             :   // If the sequence is empty, we're done.
    5014         176 :   if (RepeatedSequenceLocs.empty())
    5015             :     return outliner::OutlinedFunction();
    5016             : 
    5017             :   // At this point, we have only "safe" candidates to outline. Figure out
    5018             :   // frame + call instruction information.
    5019             : 
    5020         171 :   unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
    5021             : 
    5022             :   // Helper lambda which sets call information for every candidate.
    5023             :   auto SetCandidateCallInfo =
    5024             :       [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
    5025         587 :         for (outliner::Candidate &C : RepeatedSequenceLocs)
    5026             :           C.setCallInfo(CallID, NumBytesForCall);
    5027             :       };
    5028             : 
    5029             :   unsigned FrameID = MachineOutlinerDefault;
    5030             :   unsigned NumBytesToCreateFrame = 4;
    5031             : 
    5032             :   // If the last instruction in any candidate is a terminator, then we should
    5033             :   // tail call all of the candidates.
    5034         171 :   if (RepeatedSequenceLocs[0].back()->isTerminator()) {
    5035             :     FrameID = MachineOutlinerTailCall;
    5036             :     NumBytesToCreateFrame = 0;
    5037             :     SetCandidateCallInfo(MachineOutlinerTailCall, 4);
    5038             :   }
    5039             : 
    5040         136 :   else if (LastInstrOpcode == AArch64::BL || LastInstrOpcode == AArch64::BLR) {
    5041             :     // FIXME: Do we need to check if the code after this uses the value of LR?
    5042             :     FrameID = MachineOutlinerThunk;
    5043             :     NumBytesToCreateFrame = 0;
    5044             :     SetCandidateCallInfo(MachineOutlinerThunk, 4);
    5045             :   }
    5046             : 
    5047             :   // Make sure that LR isn't live on entry to this candidate. The only
    5048             :   // instructions that use LR that could possibly appear in a repeated sequence
    5049             :   // are calls. Therefore, we only have to check and see if LR is dead on entry
    5050             :   // to (or exit from) some candidate.
    5051         119 :   else if (std::all_of(RepeatedSequenceLocs.begin(),
    5052             :                        RepeatedSequenceLocs.end(),
    5053             :                        [](outliner::Candidate &C) {
    5054           0 :                          return C.LRU.available(AArch64::LR);
    5055             :                          })) {
    5056             :     FrameID = MachineOutlinerNoLRSave;
    5057             :     NumBytesToCreateFrame = 4;
    5058             :     SetCandidateCallInfo(MachineOutlinerNoLRSave, 4);
    5059             :   }
    5060             : 
    5061             :   // LR is live, so we need to save it. Decide whether it should be saved to
    5062             :   // the stack, or if it can be saved to a register.
    5063             :   else {
    5064         103 :     if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
    5065             :                     [this](outliner::Candidate &C) {
    5066           0 :                       return findRegisterToSaveLRTo(C);
    5067             :                     })) {
    5068             :       // Every candidate has an available callee-saved register for the save.
    5069             :       // We can save LR to a register.
    5070             :       FrameID = MachineOutlinerRegSave;
    5071             :       NumBytesToCreateFrame = 4;
    5072             :       SetCandidateCallInfo(MachineOutlinerRegSave, 12);
    5073             :     }
    5074             : 
    5075             :     else {
    5076             :       // At least one candidate does not have an available callee-saved
    5077             :       // register. We must save LR to the stack.
    5078             :       FrameID = MachineOutlinerDefault;
    5079             :       NumBytesToCreateFrame = 4;
    5080             :       SetCandidateCallInfo(MachineOutlinerDefault, 12);
    5081             :     }
    5082             :   }
    5083             : 
    5084             :   // Check if the range contains a call. These require a save + restore of the
    5085             :   // link register.
    5086         171 :   if (std::any_of(RepeatedSequenceLocs[0].front(),
    5087         171 :                   RepeatedSequenceLocs[0].back(),
    5088             :                   [](const MachineInstr &MI) { return MI.isCall(); }))
    5089           7 :     NumBytesToCreateFrame += 8; // Save + restore the link register.
    5090             : 
    5091             :   // Handle the last instruction separately. If this is a tail call, then the
    5092             :   // last instruction is a call. We don't want to save + restore in this case.
    5093             :   // However, it could be possible that the last instruction is a call without
    5094             :   // it being valid to tail call this sequence. We should consider this as well.
    5095         328 :   else if (FrameID != MachineOutlinerThunk &&
    5096         276 :            FrameID != MachineOutlinerTailCall &&
    5097         112 :            RepeatedSequenceLocs[0].back()->isCall())
    5098           0 :     NumBytesToCreateFrame += 8;
    5099             : 
    5100             :   return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
    5101         171 :                                     NumBytesToCreateFrame, FrameID);
    5102             : }
    5103             : 
    5104         114 : bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
    5105             :     MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
    5106         114 :   const Function &F = MF.getFunction();
    5107             : 
    5108             :   // Can F be deduplicated by the linker? If it can, don't outline from it.
    5109         114 :   if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
    5110             :     return false;
    5111             : 
    5112             :   // Don't outline from functions with section markings; the program could
    5113             :   // expect that all the code is in the named section.
    5114             :   // FIXME: Allow outlining from multiple functions with the same section
    5115             :   // marking.
    5116         112 :   if (F.hasSection())
    5117             :     return false;
    5118             : 
    5119             :   // Outlining from functions with redzones is unsafe since the outliner may
    5120             :   // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
    5121             :   // outline from it.
    5122         109 :   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
    5123         109 :   if (!AFI || AFI->hasRedZone().getValueOr(true))
    5124           1 :     return false;
    5125             : 
    5126             :   // It's safe to outline from MF.
    5127             :   return true;
    5128             : }
    5129             : 
    5130             : unsigned
    5131         126 : AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const {
    5132             :   unsigned Flags = 0x0;
    5133             :   // Check if there's a call inside this MachineBasicBlock. If there is, then
    5134             :   // set a flag.
    5135         126 :   if (std::any_of(MBB.begin(), MBB.end(),
    5136             :                   [](MachineInstr &MI) { return MI.isCall(); }))
    5137             :     Flags |= MachineOutlinerMBBFlags::HasCalls;
    5138             : 
    5139             :   // Check if LR is available through all of the MBB. If it's not, then set
    5140             :   // a flag.
    5141             :   assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
    5142             :          "Suitable Machine Function for outlining must track liveness");
    5143         126 :   LiveRegUnits LRU(getRegisterInfo());
    5144         126 :   LRU.addLiveOuts(MBB);
    5145             : 
    5146             :   std::for_each(MBB.rbegin(),
    5147             :                 MBB.rend(),
    5148        1379 :                 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
    5149             : 
    5150         126 :   if (!LRU.available(AArch64::LR))
    5151         110 :       Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
    5152             : 
    5153         126 :   return Flags;
    5154             : }
    5155             : 
    5156             : outliner::InstrType
    5157        1253 : AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
    5158             :                                    unsigned Flags) const {
    5159             :   MachineInstr &MI = *MIT;
    5160        1253 :   MachineBasicBlock *MBB = MI.getParent();
    5161        1253 :   MachineFunction *MF = MBB->getParent();
    5162        1253 :   AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
    5163             : 
    5164             :   // Don't outline LOHs.
    5165        1253 :   if (FuncInfo->getLOHRelated().count(&MI))
    5166             :     return outliner::InstrType::Illegal;
    5167             : 
    5168             :   // Don't allow debug values to impact outlining type.
    5169             :   if (MI.isDebugInstr() || MI.isIndirectDebugValue())
    5170             :     return outliner::InstrType::Invisible;
    5171             : 
    5172             :   // At this point, KILL instructions don't really tell us much so we can go
    5173             :   // ahead and skip over them.
    5174        1247 :   if (MI.isKill())
    5175             :     return outliner::InstrType::Invisible;
    5176             : 
    5177             :   // Is this a terminator for a basic block?
    5178        1246 :   if (MI.isTerminator()) {
    5179             : 
    5180             :     // Is this the end of a function?
    5181         210 :     if (MI.getParent()->succ_empty())
    5182             :       return outliner::InstrType::Legal;
    5183             : 
    5184             :     // It's not, so don't outline it.
    5185           7 :     return outliner::InstrType::Illegal;
    5186             :   }
    5187             : 
    5188             :   // Make sure none of the operands are un-outlinable.
    5189        4793 :   for (const MachineOperand &MOP : MI.operands()) {
    5190        3734 :     if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
    5191             :         MOP.isTargetIndex())
    5192             :       return outliner::InstrType::Illegal;
    5193             : 
    5194             :     // If it uses LR or W30 explicitly, then don't touch it.
    5195        3679 :     if (MOP.isReg() && !MOP.isImplicit() &&
    5196        2353 :         (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
    5197             :       return outliner::InstrType::Illegal;
    5198             :   }
    5199             : 
    5200             :   // Special cases for instructions that can always be outlined, but will fail
    5201             :   // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
    5202             :   // be outlined because they don't require a *specific* value to be in LR.
    5203        2118 :   if (MI.getOpcode() == AArch64::ADRP)
    5204             :     return outliner::InstrType::Legal;
    5205             : 
    5206             :   // If MI is a call we might be able to outline it. We don't want to outline
    5207             :   // any calls that rely on the position of items on the stack. When we outline
    5208             :   // something containing a call, we have to emit a save and restore of LR in
    5209             :   // the outlined function. Currently, this always happens by saving LR to the
    5210             :   // stack. Thus, if we outline, say, half the parameters for a function call
    5211             :   // plus the call, then we'll break the callee's expectations for the layout
    5212             :   // of the stack.
    5213             :   //
    5214             :   // FIXME: Allow calls to functions which construct a stack frame, as long
    5215             :   // as they don't access arguments on the stack.
    5216             :   // FIXME: Figure out some way to analyze functions defined in other modules.
    5217             :   // We should be able to compute the memory usage based on the IR calling
    5218             :   // convention, even if we can't see the definition.
    5219        1001 :   if (MI.isCall()) {
    5220             :     // Get the function associated with the call. Look at each operand and find
    5221             :     // the one that represents the callee and get its name.
    5222             :     const Function *Callee = nullptr;
    5223          16 :     for (const MachineOperand &MOP : MI.operands()) {
    5224          16 :       if (MOP.isGlobal()) {
    5225          16 :         Callee = dyn_cast<Function>(MOP.getGlobal());
    5226             :         break;
    5227             :       }
    5228             :     }
    5229             : 
    5230             :     // Never outline calls to mcount.  There isn't any rule that would require
    5231             :     // this, but the Linux kernel's "ftrace" feature depends on it.
    5232          16 :     if (Callee && Callee->getName() == "\01_mcount")
    5233             :       return outliner::InstrType::Illegal;
    5234             : 
    5235             :     // If we don't know anything about the callee, assume it depends on the
    5236             :     // stack layout of the caller. In that case, it's only legal to outline
    5237             :     // as a tail-call.  Whitelist the call instructions we know about so we
    5238             :     // don't get unexpected results with call pseudo-instructions.
    5239             :     auto UnknownCallOutlineType = outliner::InstrType::Illegal;
    5240          32 :     if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
    5241             :       UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
    5242             : 
    5243          16 :     if (!Callee)
    5244             :       return UnknownCallOutlineType;
    5245             : 
    5246             :     // We have a function we have information about. Check it if it's something
    5247             :     // can safely outline.
    5248          16 :     MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
    5249             : 
    5250             :     // We don't know what's going on with the callee at all. Don't touch it.
    5251          16 :     if (!CalleeMF)
    5252             :       return UnknownCallOutlineType;
    5253             : 
    5254             :     // Check if we know anything about the callee saves on the function. If we
    5255             :     // don't, then don't touch it, since that implies that we haven't
    5256             :     // computed anything about its stack frame yet.
    5257          10 :     MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
    5258          10 :     if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
    5259             :         MFI.getNumObjects() > 0)
    5260           2 :       return UnknownCallOutlineType;
    5261             : 
    5262             :     // At this point, we can say that CalleeMF ought to not pass anything on the
    5263             :     // stack. Therefore, we can outline it.
    5264             :     return outliner::InstrType::Legal;
    5265             :   }
    5266             : 
    5267             :   // Don't outline positions.
    5268             :   if (MI.isPosition())
    5269             :     return outliner::InstrType::Illegal;
    5270             : 
    5271             :   // Don't touch the link register or W30.
    5272        1970 :   if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
    5273             :       MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
    5274           0 :     return outliner::InstrType::Illegal;
    5275             : 
    5276             :   // Does this use the stack?
    5277        1919 :   if (MI.modifiesRegister(AArch64::SP, &RI) ||
    5278             :       MI.readsRegister(AArch64::SP, &RI)) {
    5279             :     // True if there is no chance that any outlined candidate from this range
    5280             :     // could require stack fixups. That is, both
    5281             :     // * LR is available in the range (No save/restore around call)
    5282             :     // * The range doesn't include calls (No save/restore in outlined frame)
    5283             :     // are true.
    5284             :     // FIXME: This is very restrictive; the flags check the whole block,
    5285             :     // not just the bit we will try to outline.
    5286             :     bool MightNeedStackFixUp =
    5287         157 :         (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
    5288             :                   MachineOutlinerMBBFlags::HasCalls));
    5289             : 
    5290             :     // If this instruction is in a range where it *never* needs to be fixed
    5291             :     // up, then we can *always* outline it. This is true even if it's not
    5292             :     // possible to fix that instruction up.
    5293             :     //
    5294             :     // Why? Consider two equivalent instructions I1, I2 where both I1 and I2
    5295             :     // use SP. Suppose that I1 sits within a range that definitely doesn't
    5296             :     // need stack fixups, while I2 sits in a range that does.
    5297             :     //
    5298             :     // First, I1 can be outlined as long as we *never* fix up the stack in
    5299             :     // any sequence containing it. I1 is already a safe instruction in the
    5300             :     // original program, so as long as we don't modify it we're good to go.
    5301             :     // So this leaves us with showing that outlining I2 won't break our
    5302             :     // program.
    5303             :     //
    5304             :     // Suppose I1 and I2 belong to equivalent candidate sequences. When we
    5305             :     // look at I2, we need to see if it can be fixed up. Suppose I2, (and
    5306             :     // thus I1) cannot be fixed up. Then I2 will be assigned an unique
    5307             :     // integer label; thus, I2 cannot belong to any candidate sequence (a
    5308             :     // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up
    5309             :     // as well, so we're good. Thus, I1 is always safe to outline.
    5310             :     //
    5311             :     // This gives us two things: first off, it buys us some more instructions
    5312             :     // for our search space by deeming stack instructions illegal only when
    5313             :     // they can't be fixed up AND we might have to fix them up. Second off,
    5314             :     // This allows us to catch tricky instructions like, say,
    5315             :     // %xi = ADDXri %sp, n, 0. We can't safely outline these since they might
    5316             :     // be paired with later SUBXris, which might *not* end up being outlined.
    5317             :     // If we mess with the stack to save something, then an ADDXri messes with
    5318             :     // it *after*, then we aren't going to restore the right something from
    5319             :     // the stack if we don't outline the corresponding SUBXri first. ADDXris and
    5320             :     // SUBXris are extremely common in prologue/epilogue code, so supporting
    5321             :     // them in the outliner can be a pretty big win!
    5322         157 :     if (!MightNeedStackFixUp)
    5323             :       return outliner::InstrType::Legal;
    5324             : 
    5325             :     // Any modification of SP will break our code to save/restore LR.
    5326             :     // FIXME: We could handle some instructions which add a constant offset to
    5327             :     // SP, with a bit more work.
    5328         155 :     if (MI.modifiesRegister(AArch64::SP, &RI))
    5329             :       return outliner::InstrType::Illegal;
    5330             : 
    5331             :     // At this point, we have a stack instruction that we might need to fix
    5332             :     // up. We'll handle it if it's a load or store.
    5333         104 :     if (MI.mayLoadOrStore()) {
    5334             :       unsigned Base;  // Filled with the base regiser of MI.
    5335             :       int64_t Offset; // Filled with the offset of MI.
    5336             :       unsigned DummyWidth;
    5337             : 
    5338             :       // Does it allow us to offset the base register and is the base SP?
    5339          97 :       if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
    5340          97 :           Base != AArch64::SP)
    5341             :         return outliner::InstrType::Illegal;
    5342             : 
    5343             :       // Find the minimum/maximum offset for this instruction and check if
    5344             :       // fixing it up would be in range.
    5345             :       int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction.
    5346             :       unsigned Scale;               // The scale to multiply the offsets by.
    5347         194 :       getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
    5348             : 
    5349             :       // TODO: We should really test what happens if an instruction overflows.
    5350             :       // This is tricky to test with IR tests, but when the outliner is moved
    5351             :       // to a MIR test, it really ought to be checked.
    5352          97 :       Offset += 16; // Update the offset to what it would be if we outlined.
    5353          97 :       if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
    5354             :         return outliner::InstrType::Illegal;
    5355             : 
    5356             :       // It's in range, so we can outline it.
    5357          97 :       return outliner::InstrType::Legal;
    5358             :     }
    5359             : 
    5360             :     // FIXME: Add handling for instructions like "add x0, sp, #8".
    5361             : 
    5362             :     // We can't fix it up, so don't outline it.
    5363             :     return outliner::InstrType::Illegal;
    5364             :   }
    5365             : 
    5366             :   return outliner::InstrType::Legal;
    5367             : }
    5368             : 
    5369           3 : void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
    5370          21 :   for (MachineInstr &MI : MBB) {
    5371             :     unsigned Base, Width;
    5372             :     int64_t Offset;
    5373             : 
    5374             :     // Is this a load or store with an immediate offset with SP as the base?
    5375          18 :     if (!MI.mayLoadOrStore() ||
    5376          18 :         !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
    5377           0 :         Base != AArch64::SP)
    5378          18 :       continue;
    5379             : 
    5380             :     // It is, so we have to fix it up.
    5381             :     unsigned Scale;
    5382             :     int64_t Dummy1, Dummy2;
    5383             : 
    5384           0 :     MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
    5385             :     assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
    5386           0 :     getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
    5387             :     assert(Scale != 0 && "Unexpected opcode!");
    5388             : 
    5389             :     // We've pushed the return address to the stack, so add 16 to the offset.
    5390             :     // This is safe, since we already checked if it would overflow when we
    5391             :     // checked if this instruction was legal to outline.
    5392           0 :     int64_t NewImm = (Offset + 16) / Scale;
    5393             :     StackOffsetOperand.setImm(NewImm);
    5394             :   }
    5395           3 : }
    5396             : 
    5397          25 : void AArch64InstrInfo::buildOutlinedFrame(
    5398             :     MachineBasicBlock &MBB, MachineFunction &MF,
    5399             :     const outliner::OutlinedFunction &OF) const {
    5400             :   // For thunk outlining, rewrite the last instruction from a call to a
    5401             :   // tail-call.
    5402          25 :   if (OF.FrameConstructionID == MachineOutlinerThunk) {
    5403             :     MachineInstr *Call = &*--MBB.instr_end();
    5404             :     unsigned TailOpcode;
    5405           8 :     if (Call->getOpcode() == AArch64::BL) {
    5406             :       TailOpcode = AArch64::TCRETURNdi;
    5407             :     } else {
    5408             :       assert(Call->getOpcode() == AArch64::BLR);
    5409             :       TailOpcode = AArch64::TCRETURNri;
    5410             :     }
    5411          12 :     MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
    5412           4 :                             .add(Call->getOperand(0))
    5413             :                             .addImm(0);
    5414             :     MBB.insert(MBB.end(), TC);
    5415           4 :     Call->eraseFromParent();
    5416             :   }
    5417             : 
    5418             :   // Is there a call in the outlined range?
    5419             :   auto IsNonTailCall = [](MachineInstr &MI) {
    5420             :     return MI.isCall() && !MI.isReturn();
    5421             :   };
    5422          25 :   if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
    5423             :     // Fix up the instructions in the range, since we're going to modify the
    5424             :     // stack.
    5425             :     assert(OF.FrameConstructionID != MachineOutlinerDefault &&
    5426             :            "Can only fix up stack references once");
    5427           2 :     fixupPostOutline(MBB);
    5428             : 
    5429             :     // LR has to be a live in so that we can save it.
    5430             :     MBB.addLiveIn(AArch64::LR);
    5431             : 
    5432             :     MachineBasicBlock::iterator It = MBB.begin();
    5433             :     MachineBasicBlock::iterator Et = MBB.end();
    5434             : 
    5435           2 :     if (OF.FrameConstructionID == MachineOutlinerTailCall ||
    5436             :         OF.FrameConstructionID == MachineOutlinerThunk)
    5437           0 :       Et = std::prev(MBB.end());
    5438             : 
    5439             :     // Insert a save before the outlined region
    5440           6 :     MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
    5441           2 :                                 .addReg(AArch64::SP, RegState::Define)
    5442           2 :                                 .addReg(AArch64::LR)
    5443           2 :                                 .addReg(AArch64::SP)
    5444           2 :                                 .addImm(-16);
    5445             :     It = MBB.insert(It, STRXpre);
    5446             : 
    5447           2 :     const TargetSubtargetInfo &STI = MF.getSubtarget();
    5448           2 :     const MCRegisterInfo *MRI = STI.getRegisterInfo();
    5449           2 :     unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
    5450             : 
    5451             :     // Add a CFI saying the stack was moved 16 B down.
    5452             :     int64_t StackPosEntry =
    5453           2 :         MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16));
    5454           4 :     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
    5455             :         .addCFIIndex(StackPosEntry)
    5456             :         .setMIFlags(MachineInstr::FrameSetup);
    5457             : 
    5458             :     // Add a CFI saying that the LR that we want to find is now 16 B higher than
    5459             :     // before.
    5460             :     int64_t LRPosEntry =
    5461           2 :         MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
    5462           4 :     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
    5463             :         .addCFIIndex(LRPosEntry)
    5464             :         .setMIFlags(MachineInstr::FrameSetup);
    5465             : 
    5466             :     // Insert a restore before the terminator for the function.
    5467           6 :     MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
    5468           2 :                                  .addReg(AArch64::SP, RegState::Define)
    5469           2 :                                  .addReg(AArch64::LR, RegState::Define)
    5470           2 :                                  .addReg(AArch64::SP)
    5471           2 :                                  .addImm(16);
    5472             :     Et = MBB.insert(Et, LDRXpost);
    5473             :   }
    5474             : 
    5475             :   // If this is a tail call outlined function, then there's already a return.
    5476          25 :   if (OF.FrameConstructionID == MachineOutlinerTailCall ||
    5477             :       OF.FrameConstructionID == MachineOutlinerThunk)
    5478             :     return;
    5479             : 
    5480             :   // It's not a tail call, so we have to insert the return ourselves.
    5481          42 :   MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
    5482          14 :                           .addReg(AArch64::LR, RegState::Undef);
    5483             :   MBB.insert(MBB.end(), ret);
    5484             : 
    5485             :   // Did we have to modify the stack by saving the link register?
    5486          14 :   if (OF.FrameConstructionID != MachineOutlinerDefault)
    5487             :     return;
    5488             : 
    5489             :   // We modified the stack.
    5490             :   // Walk over the basic block and fix up all the stack accesses.
    5491           1 :   fixupPostOutline(MBB);
    5492             : }
    5493             : 
    5494          73 : MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
    5495             :     Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
    5496             :     MachineFunction &MF, const outliner::Candidate &C) const {
    5497             : 
    5498             :   // Are we tail calling?
    5499          73 :   if (C.CallConstructionID == MachineOutlinerTailCall) {
    5500             :     // If yes, then we can just branch to the label.
    5501          52 :     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
    5502          26 :                             .addGlobalAddress(M.getNamedValue(MF.getName()))
    5503          26 :                             .addImm(0));
    5504          26 :     return It;
    5505             :   }
    5506             : 
    5507             :   // Are we saving the link register?
    5508          47 :   if (C.CallConstructionID == MachineOutlinerNoLRSave ||
    5509             :       C.CallConstructionID == MachineOutlinerThunk) {
    5510             :     // No, so just insert the call.
    5511          44 :     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
    5512          44 :                             .addGlobalAddress(M.getNamedValue(MF.getName())));
    5513          22 :     return It;
    5514             :   }
    5515             : 
    5516             :   // We want to return the spot where we inserted the call.
    5517             :   MachineBasicBlock::iterator CallPt;
    5518             : 
    5519             :   // Instructions for saving and restoring LR around the call instruction we're
    5520             :   // going to insert.
    5521             :   MachineInstr *Save;
    5522             :   MachineInstr *Restore;
    5523             :   // Can we save to a register?
    5524          25 :   if (C.CallConstructionID == MachineOutlinerRegSave) {
    5525             :     // FIXME: This logic should be sunk into a target-specific interface so that
    5526             :     // we don't have to recompute the register.
    5527          22 :     unsigned Reg = findRegisterToSaveLRTo(C);
    5528             :     assert(Reg != 0 && "No callee-saved register available?");
    5529             : 
    5530             :     // Save and restore LR from that register.
    5531          44 :     Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
    5532          22 :                .addReg(AArch64::XZR)
    5533          22 :                .addReg(AArch64::LR)
    5534          22 :                .addImm(0);
    5535          66 :     Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
    5536          22 :                 .addReg(AArch64::XZR)
    5537          22 :                 .addReg(Reg)
    5538          22 :                 .addImm(0);
    5539             :   } else {
    5540             :     // We have the default case. Save and restore from SP.
    5541           6 :     Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
    5542           3 :                .addReg(AArch64::SP, RegState::Define)
    5543           3 :                .addReg(AArch64::LR)
    5544           3 :                .addReg(AArch64::SP)
    5545           3 :                .addImm(-16);
    5546           9 :     Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
    5547           3 :                   .addReg(AArch64::SP, RegState::Define)
    5548           3 :                   .addReg(AArch64::LR, RegState::Define)
    5549           3 :                   .addReg(AArch64::SP)
    5550           3 :                   .addImm(16);
    5551             :   }
    5552             : 
    5553             :   It = MBB.insert(It, Save);
    5554             :   It++;
    5555             : 
    5556             :   // Insert the call.
    5557          50 :   It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
    5558          50 :                           .addGlobalAddress(M.getNamedValue(MF.getName())));
    5559          25 :   CallPt = It;
    5560             :   It++;
    5561             : 
    5562          25 :   It = MBB.insert(It, Restore);
    5563          25 :   return CallPt;
    5564             : }
    5565             : 
    5566       13689 : bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
    5567             :   MachineFunction &MF) const {
    5568       13689 :   return MF.getFunction().optForMinSize();
    5569             : }

Generated by: LCOV version 1.13