LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64InstrInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1573 1858 84.7 %
Date: 2018-06-17 00:07:59 Functions: 91 92 98.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file contains the AArch64 implementation of the TargetInstrInfo class.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "AArch64InstrInfo.h"
      15             : #include "AArch64MachineFunctionInfo.h"
      16             : #include "AArch64Subtarget.h"
      17             : #include "MCTargetDesc/AArch64AddressingModes.h"
      18             : #include "Utils/AArch64BaseInfo.h"
      19             : #include "llvm/ADT/ArrayRef.h"
      20             : #include "llvm/ADT/STLExtras.h"
      21             : #include "llvm/ADT/SmallVector.h"
      22             : #include "llvm/CodeGen/LiveRegUnits.h"
      23             : #include "llvm/CodeGen/MachineBasicBlock.h"
      24             : #include "llvm/CodeGen/MachineFrameInfo.h"
      25             : #include "llvm/CodeGen/MachineFunction.h"
      26             : #include "llvm/CodeGen/MachineInstr.h"
      27             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      28             : #include "llvm/CodeGen/MachineMemOperand.h"
      29             : #include "llvm/CodeGen/MachineOperand.h"
      30             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      31             : #include "llvm/CodeGen/MachineModuleInfo.h"
      32             : #include "llvm/CodeGen/StackMaps.h"
      33             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      34             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      35             : #include "llvm/IR/DebugLoc.h"
      36             : #include "llvm/IR/GlobalValue.h"
      37             : #include "llvm/MC/MCInst.h"
      38             : #include "llvm/MC/MCInstrDesc.h"
      39             : #include "llvm/Support/Casting.h"
      40             : #include "llvm/Support/CodeGen.h"
      41             : #include "llvm/Support/CommandLine.h"
      42             : #include "llvm/Support/Compiler.h"
      43             : #include "llvm/Support/ErrorHandling.h"
      44             : #include "llvm/Support/MathExtras.h"
      45             : #include "llvm/Target/TargetMachine.h"
      46             : #include "llvm/Target/TargetOptions.h"
      47             : #include <cassert>
      48             : #include <cstdint>
      49             : #include <iterator>
      50             : #include <utility>
      51             : 
      52             : using namespace llvm;
      53             : 
      54             : #define GET_INSTRINFO_CTOR_DTOR
      55             : #include "AArch64GenInstrInfo.inc"
      56             : 
      57      101169 : static cl::opt<unsigned> TBZDisplacementBits(
      58      202338 :     "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
      59      202338 :     cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
      60             : 
      61      101169 : static cl::opt<unsigned> CBZDisplacementBits(
      62      202338 :     "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
      63      202338 :     cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
      64             : 
      65             : static cl::opt<unsigned>
      66      303507 :     BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
      67      202338 :                         cl::desc("Restrict range of Bcc instructions (DEBUG)"));
      68             : 
      69        1438 : AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
      70             :     : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
      71        2876 :       RI(STI.getTargetTriple()), Subtarget(STI) {}
      72             : 
      73             : /// GetInstSize - Return the number of bytes of code the specified
      74             : /// instruction may be.  This returns the maximum number of bytes.
      75       83392 : unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
      76       83392 :   const MachineBasicBlock &MBB = *MI.getParent();
      77       83392 :   const MachineFunction *MF = MBB.getParent();
      78       83392 :   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
      79             : 
      80      166784 :   if (MI.getOpcode() == AArch64::INLINEASM)
      81         213 :     return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
      82             : 
      83             :   // FIXME: We currently only handle pseudoinstructions that don't get expanded
      84             :   //        before the assembly printer.
      85             :   unsigned NumBytes = 0;
      86             :   const MCInstrDesc &Desc = MI.getDesc();
      87       83179 :   switch (Desc.getOpcode()) {
      88             :   default:
      89             :     // Anything not explicitly designated otherwise is a normal 4-byte insn.
      90             :     NumBytes = 4;
      91             :     break;
      92        3238 :   case TargetOpcode::DBG_VALUE:
      93             :   case TargetOpcode::EH_LABEL:
      94             :   case TargetOpcode::IMPLICIT_DEF:
      95             :   case TargetOpcode::KILL:
      96             :     NumBytes = 0;
      97        3238 :     break;
      98          17 :   case TargetOpcode::STACKMAP:
      99             :     // The upper bound for a stackmap intrinsic is the full length of its shadow
     100          34 :     NumBytes = StackMapOpers(&MI).getNumPatchBytes();
     101             :     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
     102          17 :     break;
     103          48 :   case TargetOpcode::PATCHPOINT:
     104             :     // The size of the patchpoint intrinsic is the number of bytes requested
     105          96 :     NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
     106             :     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
     107          48 :     break;
     108          25 :   case AArch64::TLSDESC_CALLSEQ:
     109             :     // This gets lowered to an instruction sequence which takes 16 bytes
     110             :     NumBytes = 16;
     111          25 :     break;
     112             :   }
     113             : 
     114             :   return NumBytes;
     115             : }
     116             : 
     117       37836 : static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
     118             :                             SmallVectorImpl<MachineOperand> &Cond) {
     119             :   // Block ends with fall-through condbranch.
     120       75672 :   switch (LastInst->getOpcode()) {
     121           0 :   default:
     122           0 :     llvm_unreachable("Unknown branch instruction?");
     123       17790 :   case AArch64::Bcc:
     124       17790 :     Target = LastInst->getOperand(1).getMBB();
     125       17790 :     Cond.push_back(LastInst->getOperand(0));
     126       17790 :     break;
     127       12400 :   case AArch64::CBZW:
     128             :   case AArch64::CBZX:
     129             :   case AArch64::CBNZW:
     130             :   case AArch64::CBNZX:
     131       12400 :     Target = LastInst->getOperand(1).getMBB();
     132       24800 :     Cond.push_back(MachineOperand::CreateImm(-1));
     133       37200 :     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
     134       12400 :     Cond.push_back(LastInst->getOperand(0));
     135       12400 :     break;
     136        7646 :   case AArch64::TBZW:
     137             :   case AArch64::TBZX:
     138             :   case AArch64::TBNZW:
     139             :   case AArch64::TBNZX:
     140        7646 :     Target = LastInst->getOperand(2).getMBB();
     141       15292 :     Cond.push_back(MachineOperand::CreateImm(-1));
     142       22938 :     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
     143        7646 :     Cond.push_back(LastInst->getOperand(0));
     144       15292 :     Cond.push_back(LastInst->getOperand(1));
     145             :   }
     146       37836 : }
     147             : 
     148             : static unsigned getBranchDisplacementBits(unsigned Opc) {
     149        1343 :   switch (Opc) {
     150           0 :   default:
     151           0 :     llvm_unreachable("unexpected opcode!");
     152             :   case AArch64::B:
     153             :     return 64;
     154             :   case AArch64::TBNZW:
     155             :   case AArch64::TBZW:
     156             :   case AArch64::TBNZX:
     157             :   case AArch64::TBZX:
     158             :     return TBZDisplacementBits;
     159             :   case AArch64::CBNZW:
     160             :   case AArch64::CBZW:
     161             :   case AArch64::CBNZX:
     162             :   case AArch64::CBZX:
     163             :     return CBZDisplacementBits;
     164             :   case AArch64::Bcc:
     165             :     return BCCDisplacementBits;
     166             :   }
     167             : }
     168             : 
     169        1343 : bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
     170             :                                              int64_t BrOffset) const {
     171             :   unsigned Bits = getBranchDisplacementBits(BranchOp);
     172             :   assert(Bits >= 3 && "max branch displacement must be enough to jump"
     173             :                       "over conditional branch expansion");
     174        2445 :   return isIntN(Bits, BrOffset / 4);
     175             : }
     176             : 
     177             : MachineBasicBlock *
     178        1421 : AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
     179        2842 :   switch (MI.getOpcode()) {
     180           0 :   default:
     181           0 :     llvm_unreachable("unexpected opcode!");
     182         241 :   case AArch64::B:
     183         241 :     return MI.getOperand(0).getMBB();
     184         210 :   case AArch64::TBZW:
     185             :   case AArch64::TBNZW:
     186             :   case AArch64::TBZX:
     187             :   case AArch64::TBNZX:
     188         210 :     return MI.getOperand(2).getMBB();
     189         970 :   case AArch64::CBZW:
     190             :   case AArch64::CBNZW:
     191             :   case AArch64::CBZX:
     192             :   case AArch64::CBNZX:
     193             :   case AArch64::Bcc:
     194         970 :     return MI.getOperand(1).getMBB();
     195             :   }
     196             : }
     197             : 
     198             : // Branch analysis.
     199      381097 : bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
     200             :                                      MachineBasicBlock *&TBB,
     201             :                                      MachineBasicBlock *&FBB,
     202             :                                      SmallVectorImpl<MachineOperand> &Cond,
     203             :                                      bool AllowModify) const {
     204             :   // If the block has no terminators, it just falls into the block after it.
     205      381097 :   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
     206      381097 :   if (I == MBB.end())
     207             :     return false;
     208             : 
     209      379708 :   if (!isUnpredicatedTerminator(*I))
     210             :     return false;
     211             : 
     212             :   // Get the last instruction in the block.
     213             :   MachineInstr *LastInst = &*I;
     214             : 
     215             :   // If there is only one terminator instruction, process it.
     216      356029 :   unsigned LastOpc = LastInst->getOpcode();
     217      697561 :   if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
     218      338662 :     if (isUncondBranchOpcode(LastOpc)) {
     219        7060 :       TBB = LastInst->getOperand(0).getMBB();
     220        7060 :       return false;
     221             :     }
     222             :     if (isCondBranchOpcode(LastOpc)) {
     223             :       // Block ends with fall-through condbranch.
     224       20721 :       parseCondBranch(LastInst, TBB, Cond);
     225       20721 :       return false;
     226             :     }
     227             :     return true; // Can't handle indirect branch.
     228             :   }
     229             : 
     230             :   // Get the instruction before it if it is a terminator.
     231             :   MachineInstr *SecondLastInst = &*I;
     232       17367 :   unsigned SecondLastOpc = SecondLastInst->getOpcode();
     233             : 
     234             :   // If AllowModify is true and the block ends with two or more unconditional
     235             :   // branches, delete all but the first unconditional branch.
     236       19732 :   if (AllowModify && isUncondBranchOpcode(LastOpc)) {
     237        2357 :     while (isUncondBranchOpcode(SecondLastOpc)) {
     238           2 :       LastInst->eraseFromParent();
     239             :       LastInst = SecondLastInst;
     240           2 :       LastOpc = LastInst->getOpcode();
     241           4 :       if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
     242             :         // Return now the only terminator is an unconditional branch.
     243           2 :         TBB = LastInst->getOperand(0).getMBB();
     244           2 :         return false;
     245             :       } else {
     246             :         SecondLastInst = &*I;
     247           0 :         SecondLastOpc = SecondLastInst->getOpcode();
     248             :       }
     249             :     }
     250             :   }
     251             : 
     252             :   // If there are three terminators, we don't know what sort of block this is.
     253       50721 :   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
     254             :     return true;
     255             : 
     256             :   // If the block ends with a B and a Bcc, handle it.
     257       17135 :   if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
     258       17115 :     parseCondBranch(SecondLastInst, TBB, Cond);
     259       17115 :     FBB = LastInst->getOperand(0).getMBB();
     260       17115 :     return false;
     261             :   }
     262             : 
     263             :   // If the block ends with two unconditional branches, handle it.  The second
     264             :   // one is not executed, so remove it.
     265          68 :   if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
     266           0 :     TBB = SecondLastInst->getOperand(0).getMBB();
     267           0 :     I = LastInst;
     268           0 :     if (AllowModify)
     269           0 :       I->eraseFromParent();
     270             :     return false;
     271             :   }
     272             : 
     273             :   // ...likewise if it ends with an indirect branch followed by an unconditional
     274             :   // branch.
     275          68 :   if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
     276           0 :     I = LastInst;
     277           0 :     if (AllowModify)
     278           0 :       I->eraseFromParent();
     279             :     return true;
     280             :   }
     281             : 
     282             :   // Otherwise, can't handle this.
     283             :   return true;
     284             : }
     285             : 
     286        2007 : bool AArch64InstrInfo::reverseBranchCondition(
     287             :     SmallVectorImpl<MachineOperand> &Cond) const {
     288        2007 :   if (Cond[0].getImm() != -1) {
     289             :     // Regular Bcc
     290        1285 :     AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
     291        1285 :     Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
     292             :   } else {
     293             :     // Folded compare-and-branch
     294         722 :     switch (Cond[1].getImm()) {
     295           0 :     default:
     296           0 :       llvm_unreachable("Unknown conditional branch!");
     297         187 :     case AArch64::CBZW:
     298             :       Cond[1].setImm(AArch64::CBNZW);
     299             :       break;
     300         180 :     case AArch64::CBNZW:
     301             :       Cond[1].setImm(AArch64::CBZW);
     302             :       break;
     303          46 :     case AArch64::CBZX:
     304             :       Cond[1].setImm(AArch64::CBNZX);
     305             :       break;
     306          46 :     case AArch64::CBNZX:
     307             :       Cond[1].setImm(AArch64::CBZX);
     308             :       break;
     309         111 :     case AArch64::TBZW:
     310             :       Cond[1].setImm(AArch64::TBNZW);
     311             :       break;
     312         110 :     case AArch64::TBNZW:
     313             :       Cond[1].setImm(AArch64::TBZW);
     314             :       break;
     315          24 :     case AArch64::TBZX:
     316             :       Cond[1].setImm(AArch64::TBNZX);
     317             :       break;
     318          18 :     case AArch64::TBNZX:
     319             :       Cond[1].setImm(AArch64::TBZX);
     320             :       break;
     321             :     }
     322             :   }
     323             : 
     324        2007 :   return false;
     325             : }
     326             : 
     327        3727 : unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
     328             :                                         int *BytesRemoved) const {
     329        3727 :   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
     330        3727 :   if (I == MBB.end())
     331             :     return 0;
     332             : 
     333        7454 :   if (!isUncondBranchOpcode(I->getOpcode()) &&
     334             :       !isCondBranchOpcode(I->getOpcode()))
     335             :     return 0;
     336             : 
     337             :   // Remove the branch.
     338        3528 :   I->eraseFromParent();
     339             : 
     340        3528 :   I = MBB.end();
     341             : 
     342        3528 :   if (I == MBB.begin()) {
     343         387 :     if (BytesRemoved)
     344           2 :       *BytesRemoved = 4;
     345             :     return 1;
     346             :   }
     347             :   --I;
     348        3141 :   if (!isCondBranchOpcode(I->getOpcode())) {
     349        2061 :     if (BytesRemoved)
     350           6 :       *BytesRemoved = 4;
     351             :     return 1;
     352             :   }
     353             : 
     354             :   // Remove the branch.
     355        1080 :   I->eraseFromParent();
     356        1080 :   if (BytesRemoved)
     357           2 :     *BytesRemoved = 8;
     358             : 
     359             :   return 2;
     360             : }
     361             : 
     362        2619 : void AArch64InstrInfo::instantiateCondBranch(
     363             :     MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
     364             :     ArrayRef<MachineOperand> Cond) const {
     365        2619 :   if (Cond[0].getImm() != -1) {
     366             :     // Regular Bcc
     367        3166 :     BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
     368             :   } else {
     369             :     // Folded compare-and-branch
     370             :     // Note that we use addOperand instead of addReg to keep the flags.
     371             :     const MachineInstrBuilder MIB =
     372        1036 :         BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
     373        1036 :     if (Cond.size() > 3)
     374         355 :       MIB.addImm(Cond[3].getImm());
     375             :     MIB.addMBB(TBB);
     376             :   }
     377        2619 : }
     378             : 
     379        3539 : unsigned AArch64InstrInfo::insertBranch(
     380             :     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
     381             :     ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
     382             :   // Shouldn't be a fall through.
     383             :   assert(TBB && "insertBranch must not be told to insert a fallthrough");
     384             : 
     385        3539 :   if (!FBB) {
     386        3476 :     if (Cond.empty()) // Unconditional branch?
     387         920 :       BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
     388             :     else
     389        2556 :       instantiateCondBranch(MBB, DL, TBB, Cond);
     390             : 
     391        3476 :     if (BytesAdded)
     392           2 :       *BytesAdded = 4;
     393             : 
     394             :     return 1;
     395             :   }
     396             : 
     397             :   // Two-way conditional branch.
     398          63 :   instantiateCondBranch(MBB, DL, TBB, Cond);
     399          63 :   BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
     400             : 
     401          63 :   if (BytesAdded)
     402          10 :     *BytesAdded = 8;
     403             : 
     404             :   return 2;
     405             : }
     406             : 
     407             : // Find the original register that VReg is copied from.
     408         240 : static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
     409         706 :   while (TargetRegisterInfo::isVirtualRegister(VReg)) {
     410         384 :     const MachineInstr *DefMI = MRI.getVRegDef(VReg);
     411             :     if (!DefMI->isFullCopy())
     412             :       return VReg;
     413         233 :     VReg = DefMI->getOperand(1).getReg();
     414             :   }
     415             :   return VReg;
     416             : }
     417             : 
     418             : // Determine if VReg is defined by an instruction that can be folded into a
     419             : // csel instruction. If so, return the folded opcode, and the replacement
     420             : // register.
     421         224 : static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
     422             :                                 unsigned *NewVReg = nullptr) {
     423         224 :   VReg = removeCopies(MRI, VReg);
     424         224 :   if (!TargetRegisterInfo::isVirtualRegister(VReg))
     425             :     return 0;
     426             : 
     427         151 :   bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
     428         151 :   const MachineInstr *DefMI = MRI.getVRegDef(VReg);
     429             :   unsigned Opc = 0;
     430             :   unsigned SrcOpNum = 0;
     431         302 :   switch (DefMI->getOpcode()) {
     432           0 :   case AArch64::ADDSXri:
     433             :   case AArch64::ADDSWri:
     434             :     // if NZCV is used, do not fold.
     435           0 :     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
     436             :       return 0;
     437             :     // fall-through to ADDXri and ADDWri.
     438             :     LLVM_FALLTHROUGH;
     439             :   case AArch64::ADDXri:
     440             :   case AArch64::ADDWri:
     441             :     // add x, 1 -> csinc.
     442         202 :     if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
     443          12 :         DefMI->getOperand(3).getImm() != 0)
     444             :       return 0;
     445             :     SrcOpNum = 1;
     446          12 :     Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
     447             :     break;
     448             : 
     449           8 :   case AArch64::ORNXrr:
     450             :   case AArch64::ORNWrr: {
     451             :     // not x -> csinv, represented as orn dst, xzr, src.
     452           8 :     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
     453           8 :     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
     454             :       return 0;
     455             :     SrcOpNum = 2;
     456           8 :     Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
     457             :     break;
     458             :   }
     459             : 
     460           8 :   case AArch64::SUBSXrr:
     461             :   case AArch64::SUBSWrr:
     462             :     // if NZCV is used, do not fold.
     463           8 :     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
     464             :       return 0;
     465             :     // fall-through to SUBXrr and SUBWrr.
     466             :     LLVM_FALLTHROUGH;
     467             :   case AArch64::SUBXrr:
     468             :   case AArch64::SUBWrr: {
     469             :     // neg x -> csneg, represented as sub dst, xzr, src.
     470           8 :     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
     471           8 :     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
     472             :       return 0;
     473             :     SrcOpNum = 2;
     474           8 :     Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
     475             :     break;
     476             :   }
     477             :   default:
     478             :     return 0;
     479             :   }
     480             :   assert(Opc && SrcOpNum && "Missing parameters");
     481             : 
     482          28 :   if (NewVReg)
     483          28 :     *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
     484             :   return Opc;
     485             : }
     486             : 
     487          97 : bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
     488             :                                        ArrayRef<MachineOperand> Cond,
     489             :                                        unsigned TrueReg, unsigned FalseReg,
     490             :                                        int &CondCycles, int &TrueCycles,
     491             :                                        int &FalseCycles) const {
     492             :   // Check register classes.
     493          97 :   const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
     494             :   const TargetRegisterClass *RC =
     495         194 :       RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
     496          97 :   if (!RC)
     497             :     return false;
     498             : 
     499             :   // Expanding cbz/tbz requires an extra cycle of latency on the condition.
     500          97 :   unsigned ExtraCondLat = Cond.size() != 1;
     501             : 
     502             :   // GPRs are handled by csel.
     503             :   // FIXME: Fold in x+1, -x, and ~x when applicable.
     504         235 :   if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
     505          41 :       AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
     506             :     // Single-cycle csel, csinc, csinv, and csneg.
     507          94 :     CondCycles = 1 + ExtraCondLat;
     508          94 :     TrueCycles = FalseCycles = 1;
     509          94 :     if (canFoldIntoCSel(MRI, TrueReg))
     510           6 :       TrueCycles = 0;
     511          88 :     else if (canFoldIntoCSel(MRI, FalseReg))
     512           8 :       FalseCycles = 0;
     513             :     return true;
     514             :   }
     515             : 
     516             :   // Scalar floating point is handled by fcsel.
     517             :   // FIXME: Form fabs, fmin, and fmax when applicable.
     518           9 :   if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
     519           3 :       AArch64::FPR32RegClass.hasSubClassEq(RC)) {
     520           0 :     CondCycles = 5 + ExtraCondLat;
     521           0 :     TrueCycles = FalseCycles = 2;
     522           0 :     return true;
     523             :   }
     524             : 
     525             :   // Can't do vectors.
     526             :   return false;
     527             : }
     528             : 
     529          24 : void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
     530             :                                     MachineBasicBlock::iterator I,
     531             :                                     const DebugLoc &DL, unsigned DstReg,
     532             :                                     ArrayRef<MachineOperand> Cond,
     533             :                                     unsigned TrueReg, unsigned FalseReg) const {
     534          24 :   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
     535             : 
     536             :   // Parse the condition code, see parseCondBranch() above.
     537             :   AArch64CC::CondCode CC;
     538          24 :   switch (Cond.size()) {
     539           0 :   default:
     540           0 :     llvm_unreachable("Unknown condition opcode in Cond");
     541          15 :   case 1: // b.cc
     542          15 :     CC = AArch64CC::CondCode(Cond[0].getImm());
     543          15 :     break;
     544           5 :   case 3: { // cbz/cbnz
     545             :     // We must insert a compare against 0.
     546             :     bool Is64Bit;
     547           5 :     switch (Cond[1].getImm()) {
     548           0 :     default:
     549           0 :       llvm_unreachable("Unknown branch opcode in Cond");
     550             :     case AArch64::CBZW:
     551             :       Is64Bit = false;
     552             :       CC = AArch64CC::EQ;
     553             :       break;
     554           1 :     case AArch64::CBZX:
     555             :       Is64Bit = true;
     556             :       CC = AArch64CC::EQ;
     557           1 :       break;
     558           2 :     case AArch64::CBNZW:
     559             :       Is64Bit = false;
     560             :       CC = AArch64CC::NE;
     561           2 :       break;
     562           1 :     case AArch64::CBNZX:
     563             :       Is64Bit = true;
     564             :       CC = AArch64CC::NE;
     565           1 :       break;
     566             :     }
     567           5 :     unsigned SrcReg = Cond[2].getReg();
     568           5 :     if (Is64Bit) {
     569             :       // cmp reg, #0 is actually subs xzr, reg, #0.
     570           2 :       MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
     571           6 :       BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
     572           2 :           .addReg(SrcReg)
     573             :           .addImm(0)
     574             :           .addImm(0);
     575             :     } else {
     576           3 :       MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
     577           9 :       BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
     578           3 :           .addReg(SrcReg)
     579             :           .addImm(0)
     580             :           .addImm(0);
     581             :     }
     582             :     break;
     583             :   }
     584           4 :   case 4: { // tbz/tbnz
     585             :     // We must insert a tst instruction.
     586           4 :     switch (Cond[1].getImm()) {
     587           0 :     default:
     588           0 :       llvm_unreachable("Unknown branch opcode in Cond");
     589             :     case AArch64::TBZW:
     590             :     case AArch64::TBZX:
     591             :       CC = AArch64CC::EQ;
     592             :       break;
     593           2 :     case AArch64::TBNZW:
     594             :     case AArch64::TBNZX:
     595             :       CC = AArch64CC::NE;
     596           2 :       break;
     597             :     }
     598             :     // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
     599           4 :     if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
     600           6 :       BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
     601           2 :           .addReg(Cond[2].getReg())
     602             :           .addImm(
     603           4 :               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
     604             :     else
     605           6 :       BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
     606           2 :           .addReg(Cond[2].getReg())
     607             :           .addImm(
     608           4 :               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
     609             :     break;
     610             :   }
     611             :   }
     612             : 
     613             :   unsigned Opc = 0;
     614             :   const TargetRegisterClass *RC = nullptr;
     615             :   bool TryFold = false;
     616          24 :   if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
     617             :     RC = &AArch64::GPR64RegClass;
     618             :     Opc = AArch64::CSELXr;
     619             :     TryFold = true;
     620          14 :   } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
     621             :     RC = &AArch64::GPR32RegClass;
     622             :     Opc = AArch64::CSELWr;
     623             :     TryFold = true;
     624           0 :   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
     625             :     RC = &AArch64::FPR64RegClass;
     626             :     Opc = AArch64::FCSELDrrr;
     627           0 :   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
     628             :     RC = &AArch64::FPR32RegClass;
     629             :     Opc = AArch64::FCSELSrrr;
     630             :   }
     631             :   assert(RC && "Unsupported regclass");
     632             : 
     633             :   // Try folding simple instructions into the csel.
     634          24 :   if (TryFold) {
     635          24 :     unsigned NewVReg = 0;
     636          24 :     unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
     637          24 :     if (FoldedOpc) {
     638             :       // The folded opcodes csinc, csinc and csneg apply the operation to
     639             :       // FalseReg, so we need to invert the condition.
     640             :       CC = AArch64CC::getInvertedCondCode(CC);
     641             :       TrueReg = FalseReg;
     642             :     } else
     643          18 :       FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
     644             : 
     645             :     // Fold the operation. Leave any dead instructions for DCE to clean up.
     646          24 :     if (FoldedOpc) {
     647          14 :       FalseReg = NewVReg;
     648             :       Opc = FoldedOpc;
     649             :       // The extends the live range of NewVReg.
     650          14 :       MRI.clearKillFlags(NewVReg);
     651             :     }
     652             :   }
     653             : 
     654             :   // Pull all virtual register into the appropriate class.
     655          24 :   MRI.constrainRegClass(TrueReg, RC);
     656          24 :   MRI.constrainRegClass(FalseReg, RC);
     657             : 
     658             :   // Insert the csel.
     659          72 :   BuildMI(MBB, I, DL, get(Opc), DstReg)
     660          24 :       .addReg(TrueReg)
     661          24 :       .addReg(FalseReg)
     662          24 :       .addImm(CC);
     663          24 : }
     664             : 
     665             : /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an  ORRxx.
     666             : static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
     667           5 :   uint64_t Imm = MI.getOperand(1).getImm();
     668           5 :   uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
     669             :   uint64_t Encoding;
     670           5 :   return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
     671             : }
     672             : 
     673             : // FIXME: this implementation should be micro-architecture dependent, so a
     674             : // micro-architecture target hook should be introduced here in future.
     675       12854 : bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
     676       12854 :   if (!Subtarget.hasCustomCheapAsMoveHandling())
     677       11688 :     return MI.isAsCheapAsAMove();
     678             : 
     679        1166 :   if (Subtarget.hasExynosCheapAsMoveHandling()) {
     680         645 :     if (isExynosResetFast(MI) || isExynosShiftLeftFast(MI))
     681             :       return true;
     682             :     else
     683         628 :       return MI.isAsCheapAsAMove();
     684             :   }
     685             : 
     686        1042 :   switch (MI.getOpcode()) {
     687             :   default:
     688             :     return false;
     689             : 
     690             :   // add/sub on register without shift
     691          18 :   case AArch64::ADDWri:
     692             :   case AArch64::ADDXri:
     693             :   case AArch64::SUBWri:
     694             :   case AArch64::SUBXri:
     695          18 :     return (MI.getOperand(3).getImm() == 0);
     696             : 
     697             :   // logical ops on immediate
     698           0 :   case AArch64::ANDWri:
     699             :   case AArch64::ANDXri:
     700             :   case AArch64::EORWri:
     701             :   case AArch64::EORXri:
     702             :   case AArch64::ORRWri:
     703             :   case AArch64::ORRXri:
     704           0 :     return true;
     705             : 
     706             :   // logical ops on register without shift
     707           0 :   case AArch64::ANDWrr:
     708             :   case AArch64::ANDXrr:
     709             :   case AArch64::BICWrr:
     710             :   case AArch64::BICXrr:
     711             :   case AArch64::EONWrr:
     712             :   case AArch64::EONXrr:
     713             :   case AArch64::EORWrr:
     714             :   case AArch64::EORXrr:
     715             :   case AArch64::ORNWrr:
     716             :   case AArch64::ORNXrr:
     717             :   case AArch64::ORRWrr:
     718             :   case AArch64::ORRXrr:
     719           0 :     return true;
     720             : 
     721             :   // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
     722             :   // ORRXri, it is as cheap as MOV
     723             :   case AArch64::MOVi32imm:
     724           5 :     return canBeExpandedToORR(MI, 32);
     725             :   case AArch64::MOVi64imm:
     726           0 :     return canBeExpandedToORR(MI, 64);
     727             : 
     728             :   // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
     729             :   // feature.
     730          12 :   case AArch64::FMOVH0:
     731             :   case AArch64::FMOVS0:
     732             :   case AArch64::FMOVD0:
     733          12 :     return Subtarget.hasZeroCycleZeroing();
     734           0 :   case TargetOpcode::COPY:
     735           0 :     return (Subtarget.hasZeroCycleZeroing() &&
     736           0 :             (MI.getOperand(1).getReg() == AArch64::WZR ||
     737             :              MI.getOperand(1).getReg() == AArch64::XZR));
     738             :   }
     739             : 
     740             :   llvm_unreachable("Unknown opcode to check as cheap as a move!");
     741             : }
     742             : 
     743        1197 : bool AArch64InstrInfo::isExynosResetFast(const MachineInstr &MI) const {
     744             :   unsigned Reg, Imm, Shift;
     745             : 
     746        2394 :   switch (MI.getOpcode()) {
     747             :   default:
     748             :     return false;
     749             : 
     750             :   // MOV Rd, SP
     751         322 :   case AArch64::ADDWri:
     752             :   case AArch64::ADDXri:
     753         958 :     if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
     754             :       return false;
     755             : 
     756         252 :     Reg = MI.getOperand(1).getReg();
     757         252 :     Imm = MI.getOperand(2).getImm();
     758         252 :     return ((Reg == AArch64::WSP || Reg == AArch64::SP) && Imm == 0);
     759             : 
     760             :   // Literal
     761           3 :   case AArch64::ADR:
     762             :   case AArch64::ADRP:
     763           3 :     return true;
     764             : 
     765             :   // MOVI Vd, #0
     766          27 :   case AArch64::MOVID:
     767             :   case AArch64::MOVIv8b_ns:
     768             :   case AArch64::MOVIv2d_ns:
     769             :   case AArch64::MOVIv16b_ns:
     770          27 :     Imm = MI.getOperand(1).getImm();
     771          27 :     return (Imm == 0);
     772             : 
     773             :   // MOVI Vd, #0
     774           0 :   case AArch64::MOVIv2i32:
     775             :   case AArch64::MOVIv4i16:
     776             :   case AArch64::MOVIv4i32:
     777             :   case AArch64::MOVIv8i16:
     778           0 :     Imm = MI.getOperand(1).getImm();
     779           0 :     Shift = MI.getOperand(2).getImm();
     780           0 :     return (Imm == 0 && Shift == 0);
     781             : 
     782             :   // MOV Rd, Imm
     783           0 :   case AArch64::MOVNWi:
     784             :   case AArch64::MOVNXi:
     785             : 
     786             :   // MOV Rd, Imm
     787             :   case AArch64::MOVZWi:
     788             :   case AArch64::MOVZXi:
     789           0 :     return true;
     790             : 
     791             :   // MOV Rd, Imm
     792           7 :   case AArch64::ORRWri:
     793             :   case AArch64::ORRXri:
     794          14 :     if (!MI.getOperand(1).isReg())
     795             :       return false;
     796             : 
     797           7 :     Reg = MI.getOperand(1).getReg();
     798           7 :     Imm = MI.getOperand(2).getImm();
     799           7 :     return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Imm == 0);
     800             : 
     801             :   // MOV Rd, Rm
     802          22 :   case AArch64::ORRWrs:
     803             :   case AArch64::ORRXrs:
     804          44 :     if (!MI.getOperand(1).isReg())
     805             :       return false;
     806             : 
     807          22 :     Reg = MI.getOperand(1).getReg();
     808          22 :     Imm = MI.getOperand(3).getImm();
     809             :     Shift = AArch64_AM::getShiftValue(Imm);
     810          22 :     return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Shift == 0);
     811             :   }
     812             : }
     813             : 
     814        1598 : bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr &MI) const {
     815             :   unsigned Imm, Shift;
     816             :   AArch64_AM::ShiftExtendType Ext;
     817             : 
     818        3196 :   switch (MI.getOpcode()) {
     819             :   default:
     820             :     return false;
     821             : 
     822             :   // WriteI
     823         358 :   case AArch64::ADDSWri:
     824             :   case AArch64::ADDSXri:
     825             :   case AArch64::ADDWri:
     826             :   case AArch64::ADDXri:
     827             :   case AArch64::SUBSWri:
     828             :   case AArch64::SUBSXri:
     829             :   case AArch64::SUBWri:
     830             :   case AArch64::SUBXri:
     831         358 :     return true;
     832             : 
     833             :   // WriteISReg
     834         370 :   case AArch64::ADDSWrs:
     835             :   case AArch64::ADDSXrs:
     836             :   case AArch64::ADDWrs:
     837             :   case AArch64::ADDXrs:
     838             :   case AArch64::ANDSWrs:
     839             :   case AArch64::ANDSXrs:
     840             :   case AArch64::ANDWrs:
     841             :   case AArch64::ANDXrs:
     842             :   case AArch64::BICSWrs:
     843             :   case AArch64::BICSXrs:
     844             :   case AArch64::BICWrs:
     845             :   case AArch64::BICXrs:
     846             :   case AArch64::EONWrs:
     847             :   case AArch64::EONXrs:
     848             :   case AArch64::EORWrs:
     849             :   case AArch64::EORXrs:
     850             :   case AArch64::ORNWrs:
     851             :   case AArch64::ORNXrs:
     852             :   case AArch64::ORRWrs:
     853             :   case AArch64::ORRXrs:
     854             :   case AArch64::SUBSWrs:
     855             :   case AArch64::SUBSXrs:
     856             :   case AArch64::SUBWrs:
     857             :   case AArch64::SUBXrs:
     858         370 :     Imm = MI.getOperand(3).getImm();
     859             :     Shift = AArch64_AM::getShiftValue(Imm);
     860             :     Ext = AArch64_AM::getShiftType(Imm);
     861         370 :     return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
     862             : 
     863             :   // WriteIEReg
     864          50 :   case AArch64::ADDSWrx:
     865             :   case AArch64::ADDSXrx:
     866             :   case AArch64::ADDSXrx64:
     867             :   case AArch64::ADDWrx:
     868             :   case AArch64::ADDXrx:
     869             :   case AArch64::ADDXrx64:
     870             :   case AArch64::SUBSWrx:
     871             :   case AArch64::SUBSXrx:
     872             :   case AArch64::SUBSXrx64:
     873             :   case AArch64::SUBWrx:
     874             :   case AArch64::SUBXrx:
     875             :   case AArch64::SUBXrx64:
     876          50 :     Imm = MI.getOperand(3).getImm();
     877             :     Shift = AArch64_AM::getArithShiftValue(Imm);
     878             :     Ext = AArch64_AM::getArithExtendType(Imm);
     879          50 :     return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX));
     880             : 
     881         185 :   case AArch64::PRFMroW:
     882             :   case AArch64::PRFMroX:
     883             : 
     884             :   // WriteLDIdx
     885             :   case AArch64::LDRBBroW:
     886             :   case AArch64::LDRBBroX:
     887             :   case AArch64::LDRHHroW:
     888             :   case AArch64::LDRHHroX:
     889             :   case AArch64::LDRSBWroW:
     890             :   case AArch64::LDRSBWroX:
     891             :   case AArch64::LDRSBXroW:
     892             :   case AArch64::LDRSBXroX:
     893             :   case AArch64::LDRSHWroW:
     894             :   case AArch64::LDRSHWroX:
     895             :   case AArch64::LDRSHXroW:
     896             :   case AArch64::LDRSHXroX:
     897             :   case AArch64::LDRSWroW:
     898             :   case AArch64::LDRSWroX:
     899             :   case AArch64::LDRWroW:
     900             :   case AArch64::LDRWroX:
     901             :   case AArch64::LDRXroW:
     902             :   case AArch64::LDRXroX:
     903             : 
     904             :   case AArch64::LDRBroW:
     905             :   case AArch64::LDRBroX:
     906             :   case AArch64::LDRDroW:
     907             :   case AArch64::LDRDroX:
     908             :   case AArch64::LDRHroW:
     909             :   case AArch64::LDRHroX:
     910             :   case AArch64::LDRSroW:
     911             :   case AArch64::LDRSroX:
     912             : 
     913             :   // WriteSTIdx
     914             :   case AArch64::STRBBroW:
     915             :   case AArch64::STRBBroX:
     916             :   case AArch64::STRHHroW:
     917             :   case AArch64::STRHHroX:
     918             :   case AArch64::STRWroW:
     919             :   case AArch64::STRWroX:
     920             :   case AArch64::STRXroW:
     921             :   case AArch64::STRXroX:
     922             : 
     923             :   case AArch64::STRBroW:
     924             :   case AArch64::STRBroX:
     925             :   case AArch64::STRDroW:
     926             :   case AArch64::STRDroX:
     927             :   case AArch64::STRHroW:
     928             :   case AArch64::STRHroX:
     929             :   case AArch64::STRSroW:
     930             :   case AArch64::STRSroX:
     931         185 :     Imm = MI.getOperand(3).getImm();
     932             :     Ext = AArch64_AM::getMemExtendType(Imm);
     933         185 :     return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
     934             :   }
     935             : }
     936             : 
     937         112 : bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
     938         224 :   switch (MI.getOpcode()) {
     939             :   default:
     940             :     return false;
     941             : 
     942           6 :   case AArch64::ADDWrs:
     943             :   case AArch64::ADDXrs:
     944             :   case AArch64::ADDSWrs:
     945             :   case AArch64::ADDSXrs: {
     946           6 :     unsigned Imm = MI.getOperand(3).getImm();
     947             :     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
     948           6 :     if (ShiftVal == 0)
     949             :       return true;
     950           0 :     return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
     951             :   }
     952             : 
     953          27 :   case AArch64::ADDWrx:
     954             :   case AArch64::ADDXrx:
     955             :   case AArch64::ADDXrx64:
     956             :   case AArch64::ADDSWrx:
     957             :   case AArch64::ADDSXrx:
     958             :   case AArch64::ADDSXrx64: {
     959          27 :     unsigned Imm = MI.getOperand(3).getImm();
     960          27 :     switch (AArch64_AM::getArithExtendType(Imm)) {
     961             :     default:
     962             :       return false;
     963             :     case AArch64_AM::UXTB:
     964             :     case AArch64_AM::UXTH:
     965             :     case AArch64_AM::UXTW:
     966             :     case AArch64_AM::UXTX:
     967          27 :       return AArch64_AM::getArithShiftValue(Imm) <= 4;
     968             :     }
     969             :   }
     970             : 
     971          10 :   case AArch64::SUBWrs:
     972             :   case AArch64::SUBSWrs: {
     973          10 :     unsigned Imm = MI.getOperand(3).getImm();
     974             :     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
     975          10 :     return ShiftVal == 0 ||
     976           0 :            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
     977             :   }
     978             : 
     979           0 :   case AArch64::SUBXrs:
     980             :   case AArch64::SUBSXrs: {
     981           0 :     unsigned Imm = MI.getOperand(3).getImm();
     982             :     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
     983           0 :     return ShiftVal == 0 ||
     984           0 :            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
     985             :   }
     986             : 
     987          20 :   case AArch64::SUBWrx:
     988             :   case AArch64::SUBXrx:
     989             :   case AArch64::SUBXrx64:
     990             :   case AArch64::SUBSWrx:
     991             :   case AArch64::SUBSXrx:
     992             :   case AArch64::SUBSXrx64: {
     993          20 :     unsigned Imm = MI.getOperand(3).getImm();
     994          20 :     switch (AArch64_AM::getArithExtendType(Imm)) {
     995             :     default:
     996             :       return false;
     997             :     case AArch64_AM::UXTB:
     998             :     case AArch64_AM::UXTH:
     999             :     case AArch64_AM::UXTW:
    1000             :     case AArch64_AM::UXTX:
    1001          20 :       return AArch64_AM::getArithShiftValue(Imm) == 0;
    1002             :     }
    1003             :   }
    1004             : 
    1005          49 :   case AArch64::LDRBBroW:
    1006             :   case AArch64::LDRBBroX:
    1007             :   case AArch64::LDRBroW:
    1008             :   case AArch64::LDRBroX:
    1009             :   case AArch64::LDRDroW:
    1010             :   case AArch64::LDRDroX:
    1011             :   case AArch64::LDRHHroW:
    1012             :   case AArch64::LDRHHroX:
    1013             :   case AArch64::LDRHroW:
    1014             :   case AArch64::LDRHroX:
    1015             :   case AArch64::LDRQroW:
    1016             :   case AArch64::LDRQroX:
    1017             :   case AArch64::LDRSBWroW:
    1018             :   case AArch64::LDRSBWroX:
    1019             :   case AArch64::LDRSBXroW:
    1020             :   case AArch64::LDRSBXroX:
    1021             :   case AArch64::LDRSHWroW:
    1022             :   case AArch64::LDRSHWroX:
    1023             :   case AArch64::LDRSHXroW:
    1024             :   case AArch64::LDRSHXroX:
    1025             :   case AArch64::LDRSWroW:
    1026             :   case AArch64::LDRSWroX:
    1027             :   case AArch64::LDRSroW:
    1028             :   case AArch64::LDRSroX:
    1029             :   case AArch64::LDRWroW:
    1030             :   case AArch64::LDRWroX:
    1031             :   case AArch64::LDRXroW:
    1032             :   case AArch64::LDRXroX:
    1033             :   case AArch64::PRFMroW:
    1034             :   case AArch64::PRFMroX:
    1035             :   case AArch64::STRBBroW:
    1036             :   case AArch64::STRBBroX:
    1037             :   case AArch64::STRBroW:
    1038             :   case AArch64::STRBroX:
    1039             :   case AArch64::STRDroW:
    1040             :   case AArch64::STRDroX:
    1041             :   case AArch64::STRHHroW:
    1042             :   case AArch64::STRHHroX:
    1043             :   case AArch64::STRHroW:
    1044             :   case AArch64::STRHroX:
    1045             :   case AArch64::STRQroW:
    1046             :   case AArch64::STRQroX:
    1047             :   case AArch64::STRSroW:
    1048             :   case AArch64::STRSroX:
    1049             :   case AArch64::STRWroW:
    1050             :   case AArch64::STRWroX:
    1051             :   case AArch64::STRXroW:
    1052             :   case AArch64::STRXroX: {
    1053          49 :     unsigned IsSigned = MI.getOperand(3).getImm();
    1054          49 :     return !IsSigned;
    1055             :   }
    1056             :   }
    1057             : }
    1058             : 
    1059      108423 : bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
    1060             :                                              unsigned &SrcReg, unsigned &DstReg,
    1061             :                                              unsigned &SubIdx) const {
    1062      216846 :   switch (MI.getOpcode()) {
    1063             :   default:
    1064             :     return false;
    1065         589 :   case AArch64::SBFMXri: // aka sxtw
    1066             :   case AArch64::UBFMXri: // aka uxtw
    1067             :     // Check for the 32 -> 64 bit extension case, these instructions can do
    1068             :     // much more.
    1069         589 :     if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
    1070             :       return false;
    1071             :     // This is a signed or unsigned 32 -> 64 bit extension.
    1072          37 :     SrcReg = MI.getOperand(1).getReg();
    1073          37 :     DstReg = MI.getOperand(0).getReg();
    1074          37 :     SubIdx = AArch64::sub_32;
    1075          37 :     return true;
    1076             :   }
    1077             : }
    1078             : 
    1079        5900 : bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
    1080             :     MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
    1081             :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    1082        5900 :   unsigned BaseRegA = 0, BaseRegB = 0;
    1083        5900 :   int64_t OffsetA = 0, OffsetB = 0;
    1084        5900 :   unsigned WidthA = 0, WidthB = 0;
    1085             : 
    1086             :   assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
    1087             :   assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
    1088             : 
    1089       17694 :   if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
    1090       17688 :       MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
    1091             :     return false;
    1092             : 
    1093             :   // Retrieve the base register, offset from the base register and width. Width
    1094             :   // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
    1095             :   // base registers are identical, and the offset of a lower memory access +
    1096             :   // the width doesn't overlap the offset of a higher memory access,
    1097             :   // then the memory accesses are different.
    1098        9902 :   if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
    1099        4218 :       getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
    1100        3799 :     if (BaseRegA == BaseRegB) {
    1101        2085 :       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
    1102        2085 :       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
    1103        2085 :       int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
    1104        2085 :       if (LowOffset + LowWidth <= HighOffset)
    1105             :         return true;
    1106             :     }
    1107             :   }
    1108             :   return false;
    1109             : }
    1110             : 
    1111             : /// analyzeCompare - For a comparison instruction, return the source registers
    1112             : /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
    1113             : /// Return true if the comparison instruction can be analyzed.
    1114        1399 : bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
    1115             :                                       unsigned &SrcReg2, int &CmpMask,
    1116             :                                       int &CmpValue) const {
    1117             :   // The first operand can be a frame index where we'd normally expect a
    1118             :   // register.
    1119             :   assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
    1120        2798 :   if (!MI.getOperand(1).isReg())
    1121             :     return false;
    1122             : 
    1123        2796 :   switch (MI.getOpcode()) {
    1124             :   default:
    1125             :     break;
    1126         765 :   case AArch64::SUBSWrr:
    1127             :   case AArch64::SUBSWrs:
    1128             :   case AArch64::SUBSWrx:
    1129             :   case AArch64::SUBSXrr:
    1130             :   case AArch64::SUBSXrs:
    1131             :   case AArch64::SUBSXrx:
    1132             :   case AArch64::ADDSWrr:
    1133             :   case AArch64::ADDSWrs:
    1134             :   case AArch64::ADDSWrx:
    1135             :   case AArch64::ADDSXrr:
    1136             :   case AArch64::ADDSXrs:
    1137             :   case AArch64::ADDSXrx:
    1138             :     // Replace SUBSWrr with SUBWrr if NZCV is not used.
    1139         765 :     SrcReg = MI.getOperand(1).getReg();
    1140         765 :     SrcReg2 = MI.getOperand(2).getReg();
    1141         765 :     CmpMask = ~0;
    1142         765 :     CmpValue = 0;
    1143         765 :     return true;
    1144         600 :   case AArch64::SUBSWri:
    1145             :   case AArch64::ADDSWri:
    1146             :   case AArch64::SUBSXri:
    1147             :   case AArch64::ADDSXri:
    1148         600 :     SrcReg = MI.getOperand(1).getReg();
    1149         600 :     SrcReg2 = 0;
    1150         600 :     CmpMask = ~0;
    1151             :     // FIXME: In order to convert CmpValue to 0 or 1
    1152         600 :     CmpValue = MI.getOperand(2).getImm() != 0;
    1153         600 :     return true;
    1154          33 :   case AArch64::ANDSWri:
    1155             :   case AArch64::ANDSXri:
    1156             :     // ANDS does not use the same encoding scheme as the others xxxS
    1157             :     // instructions.
    1158          33 :     SrcReg = MI.getOperand(1).getReg();
    1159          33 :     SrcReg2 = 0;
    1160          33 :     CmpMask = ~0;
    1161             :     // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
    1162             :     // while the type of CmpValue is int. When converting uint64_t to int,
    1163             :     // the high 32 bits of uint64_t will be lost.
    1164             :     // In fact it causes a bug in spec2006-483.xalancbmk
    1165             :     // CmpValue is only used to compare with zero in OptimizeCompareInstr
    1166          99 :     CmpValue = AArch64_AM::decodeLogicalImmediate(
    1167          33 :                    MI.getOperand(2).getImm(),
    1168          33 :                    MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
    1169          33 :     return true;
    1170             :   }
    1171             : 
    1172             :   return false;
    1173             : }
    1174             : 
    1175         470 : static bool UpdateOperandRegClass(MachineInstr &Instr) {
    1176         470 :   MachineBasicBlock *MBB = Instr.getParent();
    1177             :   assert(MBB && "Can't get MachineBasicBlock here");
    1178         470 :   MachineFunction *MF = MBB->getParent();
    1179             :   assert(MF && "Can't get MachineFunction here");
    1180         470 :   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
    1181         470 :   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
    1182         470 :   MachineRegisterInfo *MRI = &MF->getRegInfo();
    1183             : 
    1184        2087 :   for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
    1185             :        ++OpIdx) {
    1186        1617 :     MachineOperand &MO = Instr.getOperand(OpIdx);
    1187             :     const TargetRegisterClass *OpRegCstraints =
    1188        1617 :         Instr.getRegClassConstraint(OpIdx, TII, TRI);
    1189             : 
    1190             :     // If there's no constraint, there's nothing to do.
    1191        1617 :     if (!OpRegCstraints)
    1192         359 :       continue;
    1193             :     // If the operand is a frame index, there's nothing to do here.
    1194             :     // A frame index operand will resolve correctly during PEI.
    1195        1258 :     if (MO.isFI())
    1196           2 :       continue;
    1197             : 
    1198             :     assert(MO.isReg() &&
    1199             :            "Operand has register constraints without being a register!");
    1200             : 
    1201        1256 :     unsigned Reg = MO.getReg();
    1202        1256 :     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
    1203           0 :       if (!OpRegCstraints->contains(Reg))
    1204             :         return false;
    1205        2620 :     } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
    1206         108 :                !MRI->constrainRegClass(Reg, OpRegCstraints))
    1207             :       return false;
    1208             :   }
    1209             : 
    1210             :   return true;
    1211             : }
    1212             : 
    1213             : /// Return the opcode that does not set flags when possible - otherwise
    1214             : /// return the original opcode. The caller is responsible to do the actual
    1215             : /// substitution and legality checking.
    1216        1071 : static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
    1217             :   // Don't convert all compare instructions, because for some the zero register
    1218             :   // encoding becomes the sp register.
    1219             :   bool MIDefinesZeroReg = false;
    1220        2142 :   if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
    1221             :     MIDefinesZeroReg = true;
    1222             : 
    1223        2142 :   switch (MI.getOpcode()) {
    1224           0 :   default:
    1225           0 :     return MI.getOpcode();
    1226             :   case AArch64::ADDSWrr:
    1227             :     return AArch64::ADDWrr;
    1228           0 :   case AArch64::ADDSWri:
    1229           0 :     return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
    1230           0 :   case AArch64::ADDSWrs:
    1231           0 :     return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
    1232           0 :   case AArch64::ADDSWrx:
    1233           0 :     return AArch64::ADDWrx;
    1234           0 :   case AArch64::ADDSXrr:
    1235           0 :     return AArch64::ADDXrr;
    1236           0 :   case AArch64::ADDSXri:
    1237           0 :     return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
    1238           0 :   case AArch64::ADDSXrs:
    1239           0 :     return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
    1240           0 :   case AArch64::ADDSXrx:
    1241           0 :     return AArch64::ADDXrx;
    1242         424 :   case AArch64::SUBSWrr:
    1243         424 :     return AArch64::SUBWrr;
    1244         129 :   case AArch64::SUBSWri:
    1245         129 :     return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
    1246          23 :   case AArch64::SUBSWrs:
    1247          23 :     return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
    1248           8 :   case AArch64::SUBSWrx:
    1249           8 :     return AArch64::SUBWrx;
    1250         245 :   case AArch64::SUBSXrr:
    1251         245 :     return AArch64::SUBXrr;
    1252         218 :   case AArch64::SUBSXri:
    1253         218 :     return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
    1254          12 :   case AArch64::SUBSXrs:
    1255          12 :     return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
    1256          12 :   case AArch64::SUBSXrx:
    1257          12 :     return AArch64::SUBXrx;
    1258             :   }
    1259             : }
    1260             : 
    1261             : enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
    1262             : 
    1263             : /// True when condition flags are accessed (either by writing or reading)
    1264             : /// on the instruction trace starting at From and ending at To.
    1265             : ///
    1266             : /// Note: If From and To are from different blocks it's assumed CC are accessed
    1267             : ///       on the path.
    1268          32 : static bool areCFlagsAccessedBetweenInstrs(
    1269             :     MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
    1270             :     const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
    1271             :   // Early exit if To is at the beginning of the BB.
    1272          64 :   if (To == To->getParent()->begin())
    1273             :     return true;
    1274             : 
    1275             :   // Check whether the instructions are in the same basic block
    1276             :   // If not, assume the condition flags might get modified somewhere.
    1277          32 :   if (To->getParent() != From->getParent())
    1278             :     return true;
    1279             : 
    1280             :   // From must be above To.
    1281             :   assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
    1282             :                       [From](MachineInstr &MI) {
    1283             :                         return MI.getIterator() == From;
    1284             :                       }) != To->getParent()->rend());
    1285             : 
    1286             :   // We iterate backward starting \p To until we hit \p From.
    1287         126 :   for (--To; To != From; --To) {
    1288             :     const MachineInstr &Instr = *To;
    1289             : 
    1290         188 :     if (((AccessToCheck & AK_Write) &&
    1291         188 :          Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
    1292         188 :         ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
    1293             :       return true;
    1294             :   }
    1295             :   return false;
    1296             : }
    1297             : 
    1298             : /// Try to optimize a compare instruction. A compare instruction is an
    1299             : /// instruction which produces AArch64::NZCV. It can be truly compare
    1300             : /// instruction
    1301             : /// when there are no uses of its destination register.
    1302             : ///
    1303             : /// The following steps are tried in order:
    1304             : /// 1. Convert CmpInstr into an unconditional version.
    1305             : /// 2. Remove CmpInstr if above there is an instruction producing a needed
    1306             : ///    condition code or an instruction which can be converted into such an
    1307             : ///    instruction.
    1308             : ///    Only comparison with zero is supported.
    1309        1392 : bool AArch64InstrInfo::optimizeCompareInstr(
    1310             :     MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
    1311             :     int CmpValue, const MachineRegisterInfo *MRI) const {
    1312             :   assert(CmpInstr.getParent());
    1313             :   assert(MRI);
    1314             : 
    1315             :   // Replace SUBSWrr with SUBWrr if NZCV is not used.
    1316        1392 :   int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
    1317        1392 :   if (DeadNZCVIdx != -1) {
    1318         930 :     if (CmpInstr.definesRegister(AArch64::WZR) ||
    1319             :         CmpInstr.definesRegister(AArch64::XZR)) {
    1320           0 :       CmpInstr.eraseFromParent();
    1321           0 :       return true;
    1322             :     }
    1323         465 :     unsigned Opc = CmpInstr.getOpcode();
    1324         465 :     unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
    1325         465 :     if (NewOpc == Opc)
    1326             :       return false;
    1327         465 :     const MCInstrDesc &MCID = get(NewOpc);
    1328             :     CmpInstr.setDesc(MCID);
    1329         465 :     CmpInstr.RemoveOperand(DeadNZCVIdx);
    1330         465 :     bool succeeded = UpdateOperandRegClass(CmpInstr);
    1331             :     (void)succeeded;
    1332             :     assert(succeeded && "Some operands reg class are incompatible!");
    1333         465 :     return true;
    1334             :   }
    1335             : 
    1336             :   // Continue only if we have a "ri" where immediate is zero.
    1337             :   // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
    1338             :   // function.
    1339             :   assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
    1340         927 :   if (CmpValue != 0 || SrcReg2 != 0)
    1341             :     return false;
    1342             : 
    1343             :   // CmpInstr is a Compare instruction if destination register is not used.
    1344         254 :   if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
    1345             :     return false;
    1346             : 
    1347         118 :   return substituteCmpToZero(CmpInstr, SrcReg, MRI);
    1348             : }
    1349             : 
    1350             : /// Get opcode of S version of Instr.
    1351             : /// If Instr is S version its opcode is returned.
    1352             : /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
    1353             : /// or we are not interested in it.
    1354         149 : static unsigned sForm(MachineInstr &Instr) {
    1355         298 :   switch (Instr.getOpcode()) {
    1356             :   default:
    1357             :     return AArch64::INSTRUCTION_LIST_END;
    1358             : 
    1359           0 :   case AArch64::ADDSWrr:
    1360             :   case AArch64::ADDSWri:
    1361             :   case AArch64::ADDSXrr:
    1362             :   case AArch64::ADDSXri:
    1363             :   case AArch64::SUBSWrr:
    1364             :   case AArch64::SUBSWri:
    1365             :   case AArch64::SUBSXrr:
    1366             :   case AArch64::SUBSXri:
    1367           0 :     return Instr.getOpcode();
    1368             : 
    1369           0 :   case AArch64::ADDWrr:
    1370           0 :     return AArch64::ADDSWrr;
    1371           2 :   case AArch64::ADDWri:
    1372           2 :     return AArch64::ADDSWri;
    1373           0 :   case AArch64::ADDXrr:
    1374           0 :     return AArch64::ADDSXrr;
    1375           4 :   case AArch64::ADDXri:
    1376           4 :     return AArch64::ADDSXri;
    1377           0 :   case AArch64::ADCWr:
    1378           0 :     return AArch64::ADCSWr;
    1379           0 :   case AArch64::ADCXr:
    1380           0 :     return AArch64::ADCSXr;
    1381           6 :   case AArch64::SUBWrr:
    1382           6 :     return AArch64::SUBSWrr;
    1383           0 :   case AArch64::SUBWri:
    1384           0 :     return AArch64::SUBSWri;
    1385           4 :   case AArch64::SUBXrr:
    1386           4 :     return AArch64::SUBSXrr;
    1387          44 :   case AArch64::SUBXri:
    1388          44 :     return AArch64::SUBSXri;
    1389           0 :   case AArch64::SBCWr:
    1390           0 :     return AArch64::SBCSWr;
    1391           0 :   case AArch64::SBCXr:
    1392           0 :     return AArch64::SBCSXr;
    1393           2 :   case AArch64::ANDWri:
    1394           2 :     return AArch64::ANDSWri;
    1395           0 :   case AArch64::ANDXri:
    1396           0 :     return AArch64::ANDSXri;
    1397             :   }
    1398             : }
    1399             : 
    1400             : /// Check if AArch64::NZCV should be alive in successors of MBB.
    1401          31 : static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
    1402          73 :   for (auto *BB : MBB->successors())
    1403          42 :     if (BB->isLiveIn(AArch64::NZCV))
    1404             :       return true;
    1405             :   return false;
    1406             : }
    1407             : 
    1408             : namespace {
    1409             : 
    1410             : struct UsedNZCV {
    1411             :   bool N = false;
    1412             :   bool Z = false;
    1413             :   bool C = false;
    1414             :   bool V = false;
    1415             : 
    1416             :   UsedNZCV() = default;
    1417             : 
    1418             :   UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
    1419             :     this->N |= UsedFlags.N;
    1420             :     this->Z |= UsedFlags.Z;
    1421          35 :     this->C |= UsedFlags.C;
    1422          35 :     this->V |= UsedFlags.V;
    1423             :     return *this;
    1424             :   }
    1425             : };
    1426             : 
    1427             : } // end anonymous namespace
    1428             : 
    1429             : /// Find a condition code used by the instruction.
    1430             : /// Returns AArch64CC::Invalid if either the instruction does not use condition
    1431             : /// codes or we don't optimize CmpInstr in the presence of such instructions.
    1432          35 : static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
    1433          70 :   switch (Instr.getOpcode()) {
    1434             :   default:
    1435             :     return AArch64CC::Invalid;
    1436             : 
    1437          20 :   case AArch64::Bcc: {
    1438          20 :     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
    1439             :     assert(Idx >= 2);
    1440          40 :     return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
    1441             :   }
    1442             : 
    1443          15 :   case AArch64::CSINVWr:
    1444             :   case AArch64::CSINVXr:
    1445             :   case AArch64::CSINCWr:
    1446             :   case AArch64::CSINCXr:
    1447             :   case AArch64::CSELWr:
    1448             :   case AArch64::CSELXr:
    1449             :   case AArch64::CSNEGWr:
    1450             :   case AArch64::CSNEGXr:
    1451             :   case AArch64::FCSELSrrr:
    1452             :   case AArch64::FCSELDrrr: {
    1453          15 :     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
    1454             :     assert(Idx >= 1);
    1455          30 :     return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
    1456             :   }
    1457             :   }
    1458             : }
    1459             : 
    1460             : static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
    1461             :   assert(CC != AArch64CC::Invalid);
    1462             :   UsedNZCV UsedFlags;
    1463             :   switch (CC) {
    1464             :   default:
    1465             :     break;
    1466             : 
    1467             :   case AArch64CC::EQ: // Z set
    1468             :   case AArch64CC::NE: // Z clear
    1469             :     UsedFlags.Z = true;
    1470             :     break;
    1471             : 
    1472             :   case AArch64CC::HI: // Z clear and C set
    1473             :   case AArch64CC::LS: // Z set   or  C clear
    1474             :     UsedFlags.Z = true;
    1475             :     LLVM_FALLTHROUGH;
    1476             :   case AArch64CC::HS: // C set
    1477             :   case AArch64CC::LO: // C clear
    1478             :     UsedFlags.C = true;
    1479             :     break;
    1480             : 
    1481             :   case AArch64CC::MI: // N set
    1482             :   case AArch64CC::PL: // N clear
    1483             :     UsedFlags.N = true;
    1484             :     break;
    1485             : 
    1486             :   case AArch64CC::VS: // V set
    1487             :   case AArch64CC::VC: // V clear
    1488             :     UsedFlags.V = true;
    1489             :     break;
    1490             : 
    1491             :   case AArch64CC::GT: // Z clear, N and V the same
    1492             :   case AArch64CC::LE: // Z set,   N and V differ
    1493             :     UsedFlags.Z = true;
    1494             :     LLVM_FALLTHROUGH;
    1495             :   case AArch64CC::GE: // N and V the same
    1496             :   case AArch64CC::LT: // N and V differ
    1497             :     UsedFlags.N = true;
    1498             :     UsedFlags.V = true;
    1499             :     break;
    1500             :   }
    1501             :   return UsedFlags;
    1502             : }
    1503             : 
    1504             : static bool isADDSRegImm(unsigned Opcode) {
    1505          31 :   return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
    1506             : }
    1507             : 
    1508             : static bool isSUBSRegImm(unsigned Opcode) {
    1509          31 :   return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
    1510             : }
    1511             : 
    1512             : /// Check if CmpInstr can be substituted by MI.
    1513             : ///
    1514             : /// CmpInstr can be substituted:
    1515             : /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
    1516             : /// - and, MI and CmpInstr are from the same MachineBB
    1517             : /// - and, condition flags are not alive in successors of the CmpInstr parent
    1518             : /// - and, if MI opcode is the S form there must be no defs of flags between
    1519             : ///        MI and CmpInstr
    1520             : ///        or if MI opcode is not the S form there must be neither defs of flags
    1521             : ///        nor uses of flags between MI and CmpInstr.
    1522             : /// - and  C/V flags are not used after CmpInstr
    1523          31 : static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
    1524             :                                        const TargetRegisterInfo *TRI) {
    1525             :   assert(MI);
    1526             :   assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
    1527             :   assert(CmpInstr);
    1528             : 
    1529          31 :   const unsigned CmpOpcode = CmpInstr->getOpcode();
    1530          62 :   if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
    1531             :     return false;
    1532             : 
    1533          31 :   if (MI->getParent() != CmpInstr->getParent())
    1534             :     return false;
    1535             : 
    1536          31 :   if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
    1537             :     return false;
    1538             : 
    1539             :   AccessKind AccessToCheck = AK_Write;
    1540          62 :   if (sForm(*MI) != MI->getOpcode())
    1541             :     AccessToCheck = AK_All;
    1542          31 :   if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
    1543             :     return false;
    1544             : 
    1545             :   UsedNZCV NZCVUsedAfterCmp;
    1546          31 :   for (auto I = std::next(CmpInstr->getIterator()),
    1547          31 :             E = CmpInstr->getParent()->instr_end();
    1548         112 :        I != E; ++I) {
    1549             :     const MachineInstr &Instr = *I;
    1550          82 :     if (Instr.readsRegister(AArch64::NZCV, TRI)) {
    1551          35 :       AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
    1552          35 :       if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
    1553             :         return false;
    1554             :       NZCVUsedAfterCmp |= getUsedNZCV(CC);
    1555             :     }
    1556             : 
    1557          82 :     if (Instr.modifiesRegister(AArch64::NZCV, TRI))
    1558             :       break;
    1559             :   }
    1560             : 
    1561          31 :   return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
    1562             : }
    1563             : 
    1564             : /// Substitute an instruction comparing to zero with another instruction
    1565             : /// which produces needed condition flags.
    1566             : ///
    1567             : /// Return true on success.
    1568         118 : bool AArch64InstrInfo::substituteCmpToZero(
    1569             :     MachineInstr &CmpInstr, unsigned SrcReg,
    1570             :     const MachineRegisterInfo *MRI) const {
    1571             :   assert(MRI);
    1572             :   // Get the unique definition of SrcReg.
    1573         118 :   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
    1574         118 :   if (!MI)
    1575             :     return false;
    1576             : 
    1577             :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    1578             : 
    1579         118 :   unsigned NewOpc = sForm(*MI);
    1580         118 :   if (NewOpc == AArch64::INSTRUCTION_LIST_END)
    1581             :     return false;
    1582             : 
    1583          31 :   if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
    1584             :     return false;
    1585             : 
    1586             :   // Update the instruction to set NZCV.
    1587           5 :   MI->setDesc(get(NewOpc));
    1588           5 :   CmpInstr.eraseFromParent();
    1589           5 :   bool succeeded = UpdateOperandRegClass(*MI);
    1590             :   (void)succeeded;
    1591             :   assert(succeeded && "Some operands reg class are incompatible!");
    1592           5 :   MI->addRegisterDefined(AArch64::NZCV, TRI);
    1593           5 :   return true;
    1594             : }
    1595             : 
    1596       10235 : bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
    1597       20470 :   if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
    1598             :     return false;
    1599             : 
    1600          21 :   MachineBasicBlock &MBB = *MI.getParent();
    1601             :   DebugLoc DL = MI.getDebugLoc();
    1602          21 :   unsigned Reg = MI.getOperand(0).getReg();
    1603             :   const GlobalValue *GV =
    1604          21 :       cast<GlobalValue>((*MI.memoperands_begin())->getValue());
    1605          21 :   const TargetMachine &TM = MBB.getParent()->getTarget();
    1606          21 :   unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
    1607             :   const unsigned char MO_NC = AArch64II::MO_NC;
    1608             : 
    1609          21 :   if ((OpFlags & AArch64II::MO_GOT) != 0) {
    1610          39 :     BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
    1611             :         .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
    1612          39 :     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
    1613          13 :         .addReg(Reg, RegState::Kill)
    1614             :         .addImm(0)
    1615          13 :         .addMemOperand(*MI.memoperands_begin());
    1616           8 :   } else if (TM.getCodeModel() == CodeModel::Large) {
    1617           4 :     BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
    1618             :         .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
    1619             :         .addImm(0);
    1620           6 :     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
    1621           2 :         .addReg(Reg, RegState::Kill)
    1622             :         .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
    1623             :         .addImm(16);
    1624           6 :     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
    1625           2 :         .addReg(Reg, RegState::Kill)
    1626             :         .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
    1627             :         .addImm(32);
    1628           6 :     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
    1629           2 :         .addReg(Reg, RegState::Kill)
    1630             :         .addGlobalAddress(GV, 0, AArch64II::MO_G3)
    1631             :         .addImm(48);
    1632           6 :     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
    1633           2 :         .addReg(Reg, RegState::Kill)
    1634             :         .addImm(0)
    1635           2 :         .addMemOperand(*MI.memoperands_begin());
    1636             :   } else {
    1637          18 :     BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
    1638           6 :         .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
    1639           6 :     unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
    1640          18 :     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
    1641           6 :         .addReg(Reg, RegState::Kill)
    1642             :         .addGlobalAddress(GV, 0, LoFlags)
    1643           6 :         .addMemOperand(*MI.memoperands_begin());
    1644             :   }
    1645             : 
    1646          21 :   MBB.erase(MI);
    1647             : 
    1648             :   return true;
    1649             : }
    1650             : 
    1651             : /// Return true if this is this instruction has a non-zero immediate
    1652         401 : bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) {
    1653         802 :   switch (MI.getOpcode()) {
    1654             :   default:
    1655             :     break;
    1656         337 :   case AArch64::ADDSWrs:
    1657             :   case AArch64::ADDSXrs:
    1658             :   case AArch64::ADDWrs:
    1659             :   case AArch64::ADDXrs:
    1660             :   case AArch64::ANDSWrs:
    1661             :   case AArch64::ANDSXrs:
    1662             :   case AArch64::ANDWrs:
    1663             :   case AArch64::ANDXrs:
    1664             :   case AArch64::BICSWrs:
    1665             :   case AArch64::BICSXrs:
    1666             :   case AArch64::BICWrs:
    1667             :   case AArch64::BICXrs:
    1668             :   case AArch64::EONWrs:
    1669             :   case AArch64::EONXrs:
    1670             :   case AArch64::EORWrs:
    1671             :   case AArch64::EORXrs:
    1672             :   case AArch64::ORNWrs:
    1673             :   case AArch64::ORNXrs:
    1674             :   case AArch64::ORRWrs:
    1675             :   case AArch64::ORRXrs:
    1676             :   case AArch64::SUBSWrs:
    1677             :   case AArch64::SUBSXrs:
    1678             :   case AArch64::SUBWrs:
    1679             :   case AArch64::SUBXrs:
    1680         674 :     if (MI.getOperand(3).isImm()) {
    1681         337 :       unsigned val = MI.getOperand(3).getImm();
    1682         337 :       return (val != 0);
    1683           0 :     }
    1684             :     break;
    1685             :   }
    1686             :   return false;
    1687             : }
    1688             : 
    1689             : /// Return true if this is this instruction has a non-zero immediate
    1690          15 : bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) {
    1691          30 :   switch (MI.getOpcode()) {
    1692             :   default:
    1693             :     break;
    1694          15 :   case AArch64::ADDSWrx:
    1695             :   case AArch64::ADDSXrx:
    1696             :   case AArch64::ADDSXrx64:
    1697             :   case AArch64::ADDWrx:
    1698             :   case AArch64::ADDXrx:
    1699             :   case AArch64::ADDXrx64:
    1700             :   case AArch64::SUBSWrx:
    1701             :   case AArch64::SUBSXrx:
    1702             :   case AArch64::SUBSXrx64:
    1703             :   case AArch64::SUBWrx:
    1704             :   case AArch64::SUBXrx:
    1705             :   case AArch64::SUBXrx64:
    1706          30 :     if (MI.getOperand(3).isImm()) {
    1707          15 :       unsigned val = MI.getOperand(3).getImm();
    1708          15 :       return (val != 0);
    1709           0 :     }
    1710             :     break;
    1711             :   }
    1712             : 
    1713             :   return false;
    1714             : }
    1715             : 
    1716             : // Return true if this instruction simply sets its single destination register
    1717             : // to zero. This is equivalent to a register rename of the zero-register.
    1718          90 : bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
    1719         180 :   switch (MI.getOpcode()) {
    1720             :   default:
    1721             :     break;
    1722          46 :   case AArch64::MOVZWi:
    1723             :   case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
    1724          92 :     if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
    1725             :       assert(MI.getDesc().getNumOperands() == 3 &&
    1726             :              MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
    1727             :       return true;
    1728             :     }
    1729             :     break;
    1730          16 :   case AArch64::ANDWri: // and Rd, Rzr, #imm
    1731          16 :     return MI.getOperand(1).getReg() == AArch64::WZR;
    1732          28 :   case AArch64::ANDXri:
    1733          28 :     return MI.getOperand(1).getReg() == AArch64::XZR;
    1734           0 :   case TargetOpcode::COPY:
    1735           0 :     return MI.getOperand(1).getReg() == AArch64::WZR;
    1736             :   }
    1737             :   return false;
    1738             : }
    1739             : 
    1740             : // Return true if this instruction simply renames a general register without
    1741             : // modifying bits.
    1742       10252 : bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
    1743       20504 :   switch (MI.getOpcode()) {
    1744             :   default:
    1745             :     break;
    1746       10046 :   case TargetOpcode::COPY: {
    1747             :     // GPR32 copies will by lowered to ORRXrs
    1748       10046 :     unsigned DstReg = MI.getOperand(0).getReg();
    1749       13711 :     return (AArch64::GPR32RegClass.contains(DstReg) ||
    1750        9382 :             AArch64::GPR64RegClass.contains(DstReg));
    1751             :   }
    1752           0 :   case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
    1753           0 :     if (MI.getOperand(1).getReg() == AArch64::XZR) {
    1754             :       assert(MI.getDesc().getNumOperands() == 4 &&
    1755             :              MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
    1756             :       return true;
    1757             :     }
    1758             :     break;
    1759           0 :   case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
    1760           0 :     if (MI.getOperand(2).getImm() == 0) {
    1761             :       assert(MI.getDesc().getNumOperands() == 4 &&
    1762             :              MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
    1763             :       return true;
    1764             :     }
    1765             :     break;
    1766             :   }
    1767             :   return false;
    1768             : }
    1769             : 
    1770             : // Return true if this instruction simply renames a general register without
    1771             : // modifying bits.
    1772        8827 : bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
    1773       17654 :   switch (MI.getOpcode()) {
    1774             :   default:
    1775             :     break;
    1776        8621 :   case TargetOpcode::COPY: {
    1777             :     // FPR64 copies will by lowered to ORR.16b
    1778        8621 :     unsigned DstReg = MI.getOperand(0).getReg();
    1779        9749 :     return (AArch64::FPR64RegClass.contains(DstReg) ||
    1780        8313 :             AArch64::FPR128RegClass.contains(DstReg));
    1781             :   }
    1782           0 :   case AArch64::ORRv16i8:
    1783           0 :     if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
    1784             :       assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
    1785             :              "invalid ORRv16i8 operands");
    1786             :       return true;
    1787             :     }
    1788             :     break;
    1789             :   }
    1790             :   return false;
    1791             : }
    1792             : 
    1793        9430 : unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
    1794             :                                                int &FrameIndex) const {
    1795       18860 :   switch (MI.getOpcode()) {
    1796             :   default:
    1797             :     break;
    1798         533 :   case AArch64::LDRWui:
    1799             :   case AArch64::LDRXui:
    1800             :   case AArch64::LDRBui:
    1801             :   case AArch64::LDRHui:
    1802             :   case AArch64::LDRSui:
    1803             :   case AArch64::LDRDui:
    1804             :   case AArch64::LDRQui:
    1805        1417 :     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
    1806         884 :         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
    1807         351 :       FrameIndex = MI.getOperand(1).getIndex();
    1808         351 :       return MI.getOperand(0).getReg();
    1809             :     }
    1810             :     break;
    1811             :   }
    1812             : 
    1813             :   return 0;
    1814             : }
    1815             : 
    1816        4473 : unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
    1817             :                                               int &FrameIndex) const {
    1818        4473 :   switch (MI.getOpcode()) {
    1819             :   default:
    1820             :     break;
    1821         152 :   case AArch64::STRWui:
    1822             :   case AArch64::STRXui:
    1823             :   case AArch64::STRBui:
    1824             :   case AArch64::STRHui:
    1825             :   case AArch64::STRSui:
    1826             :   case AArch64::STRDui:
    1827             :   case AArch64::STRQui:
    1828         362 :     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
    1829         218 :         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
    1830          57 :       FrameIndex = MI.getOperand(1).getIndex();
    1831          57 :       return MI.getOperand(0).getReg();
    1832             :     }
    1833             :     break;
    1834             :   }
    1835             :   return 0;
    1836             : }
    1837             : 
    1838             : /// Return true if this is load/store scales or extends its register offset.
    1839             : /// This refers to scaling a dynamic index as opposed to scaled immediates.
    1840             : /// MI should be a memory op that allows scaled addressing.
    1841         679 : bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) {
    1842        1358 :   switch (MI.getOpcode()) {
    1843             :   default:
    1844             :     break;
    1845         679 :   case AArch64::LDRBBroW:
    1846             :   case AArch64::LDRBroW:
    1847             :   case AArch64::LDRDroW:
    1848             :   case AArch64::LDRHHroW:
    1849             :   case AArch64::LDRHroW:
    1850             :   case AArch64::LDRQroW:
    1851             :   case AArch64::LDRSBWroW:
    1852             :   case AArch64::LDRSBXroW:
    1853             :   case AArch64::LDRSHWroW:
    1854             :   case AArch64::LDRSHXroW:
    1855             :   case AArch64::LDRSWroW:
    1856             :   case AArch64::LDRSroW:
    1857             :   case AArch64::LDRWroW:
    1858             :   case AArch64::LDRXroW:
    1859             :   case AArch64::STRBBroW:
    1860             :   case AArch64::STRBroW:
    1861             :   case AArch64::STRDroW:
    1862             :   case AArch64::STRHHroW:
    1863             :   case AArch64::STRHroW:
    1864             :   case AArch64::STRQroW:
    1865             :   case AArch64::STRSroW:
    1866             :   case AArch64::STRWroW:
    1867             :   case AArch64::STRXroW:
    1868             :   case AArch64::LDRBBroX:
    1869             :   case AArch64::LDRBroX:
    1870             :   case AArch64::LDRDroX:
    1871             :   case AArch64::LDRHHroX:
    1872             :   case AArch64::LDRHroX:
    1873             :   case AArch64::LDRQroX:
    1874             :   case AArch64::LDRSBWroX:
    1875             :   case AArch64::LDRSBXroX:
    1876             :   case AArch64::LDRSHWroX:
    1877             :   case AArch64::LDRSHXroX:
    1878             :   case AArch64::LDRSWroX:
    1879             :   case AArch64::LDRSroX:
    1880             :   case AArch64::LDRWroX:
    1881             :   case AArch64::LDRXroX:
    1882             :   case AArch64::STRBBroX:
    1883             :   case AArch64::STRBroX:
    1884             :   case AArch64::STRDroX:
    1885             :   case AArch64::STRHHroX:
    1886             :   case AArch64::STRHroX:
    1887             :   case AArch64::STRQroX:
    1888             :   case AArch64::STRSroX:
    1889             :   case AArch64::STRWroX:
    1890             :   case AArch64::STRXroX:
    1891             : 
    1892         679 :     unsigned Val = MI.getOperand(3).getImm();
    1893             :     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
    1894         679 :     return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
    1895             :   }
    1896             :   return false;
    1897             : }
    1898             : 
    1899             : /// Check all MachineMemOperands for a hint to suppress pairing.
    1900       20317 : bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
    1901             :   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
    1902       12902 :     return MMO->getFlags() & MOSuppressPair;
    1903       20317 :   });
    1904             : }
    1905             : 
    1906             : /// Set a flag on the first MachineMemOperand to suppress pairing.
    1907           9 : void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
    1908           9 :   if (MI.memoperands_empty())
    1909             :     return;
    1910           9 :   (*MI.memoperands_begin())->setFlags(MOSuppressPair);
    1911             : }
    1912             : 
    1913             : /// Check all MachineMemOperands for a hint that the load/store is strided.
    1914         133 : bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
    1915             :   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
    1916          43 :     return MMO->getFlags() & MOStridedAccess;
    1917         133 :   });
    1918             : }
    1919             : 
    1920       32035 : bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
    1921       32035 :   switch (Opc) {
    1922             :   default:
    1923             :     return false;
    1924        1177 :   case AArch64::STURSi:
    1925             :   case AArch64::STURDi:
    1926             :   case AArch64::STURQi:
    1927             :   case AArch64::STURBBi:
    1928             :   case AArch64::STURHHi:
    1929             :   case AArch64::STURWi:
    1930             :   case AArch64::STURXi:
    1931             :   case AArch64::LDURSi:
    1932             :   case AArch64::LDURDi:
    1933             :   case AArch64::LDURQi:
    1934             :   case AArch64::LDURWi:
    1935             :   case AArch64::LDURXi:
    1936             :   case AArch64::LDURSWi:
    1937             :   case AArch64::LDURHHi:
    1938             :   case AArch64::LDURBBi:
    1939             :   case AArch64::LDURSBWi:
    1940             :   case AArch64::LDURSHWi:
    1941        1177 :     return true;
    1942             :   }
    1943             : }
    1944             : 
    1945       68778 : bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
    1946      137556 :   switch (MI.getOpcode()) {
    1947             :   default:
    1948             :     return false;
    1949             :   // Scaled instructions.
    1950        9546 :   case AArch64::STRSui:
    1951             :   case AArch64::STRDui:
    1952             :   case AArch64::STRQui:
    1953             :   case AArch64::STRXui:
    1954             :   case AArch64::STRWui:
    1955             :   case AArch64::LDRSui:
    1956             :   case AArch64::LDRDui:
    1957             :   case AArch64::LDRQui:
    1958             :   case AArch64::LDRXui:
    1959             :   case AArch64::LDRWui:
    1960             :   case AArch64::LDRSWui:
    1961             :   // Unscaled instructions.
    1962             :   case AArch64::STURSi:
    1963             :   case AArch64::STURDi:
    1964             :   case AArch64::STURQi:
    1965             :   case AArch64::STURWi:
    1966             :   case AArch64::STURXi:
    1967             :   case AArch64::LDURSi:
    1968             :   case AArch64::LDURDi:
    1969             :   case AArch64::LDURQi:
    1970             :   case AArch64::LDURWi:
    1971             :   case AArch64::LDURXi:
    1972             :   case AArch64::LDURSWi:
    1973        9546 :     return true;
    1974             :   }
    1975             : }
    1976             : 
    1977          23 : unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
    1978             :                                                    bool &Is64Bit) {
    1979          23 :   switch (Opc) {
    1980           0 :   default:
    1981           0 :     llvm_unreachable("Opcode has no flag setting equivalent!");
    1982             :   // 32-bit cases:
    1983           3 :   case AArch64::ADDWri:
    1984           3 :     Is64Bit = false;
    1985           3 :     return AArch64::ADDSWri;
    1986           4 :   case AArch64::ADDWrr:
    1987           4 :     Is64Bit = false;
    1988           4 :     return AArch64::ADDSWrr;
    1989           0 :   case AArch64::ADDWrs:
    1990           0 :     Is64Bit = false;
    1991           0 :     return AArch64::ADDSWrs;
    1992           0 :   case AArch64::ADDWrx:
    1993           0 :     Is64Bit = false;
    1994           0 :     return AArch64::ADDSWrx;
    1995           3 :   case AArch64::ANDWri:
    1996           3 :     Is64Bit = false;
    1997           3 :     return AArch64::ANDSWri;
    1998           0 :   case AArch64::ANDWrr:
    1999           0 :     Is64Bit = false;
    2000           0 :     return AArch64::ANDSWrr;
    2001           0 :   case AArch64::ANDWrs:
    2002           0 :     Is64Bit = false;
    2003           0 :     return AArch64::ANDSWrs;
    2004           1 :   case AArch64::BICWrr:
    2005           1 :     Is64Bit = false;
    2006           1 :     return AArch64::BICSWrr;
    2007           0 :   case AArch64::BICWrs:
    2008           0 :     Is64Bit = false;
    2009           0 :     return AArch64::BICSWrs;
    2010           0 :   case AArch64::SUBWri:
    2011           0 :     Is64Bit = false;
    2012           0 :     return AArch64::SUBSWri;
    2013           0 :   case AArch64::SUBWrr:
    2014           0 :     Is64Bit = false;
    2015           0 :     return AArch64::SUBSWrr;
    2016           0 :   case AArch64::SUBWrs:
    2017           0 :     Is64Bit = false;
    2018           0 :     return AArch64::SUBSWrs;
    2019           0 :   case AArch64::SUBWrx:
    2020           0 :     Is64Bit = false;
    2021           0 :     return AArch64::SUBSWrx;
    2022             :   // 64-bit cases:
    2023          10 :   case AArch64::ADDXri:
    2024          10 :     Is64Bit = true;
    2025          10 :     return AArch64::ADDSXri;
    2026           1 :   case AArch64::ADDXrr:
    2027           1 :     Is64Bit = true;
    2028           1 :     return AArch64::ADDSXrr;
    2029           0 :   case AArch64::ADDXrs:
    2030           0 :     Is64Bit = true;
    2031           0 :     return AArch64::ADDSXrs;
    2032           0 :   case AArch64::ADDXrx:
    2033           0 :     Is64Bit = true;
    2034           0 :     return AArch64::ADDSXrx;
    2035           1 :   case AArch64::ANDXri:
    2036           1 :     Is64Bit = true;
    2037           1 :     return AArch64::ANDSXri;
    2038           0 :   case AArch64::ANDXrr:
    2039           0 :     Is64Bit = true;
    2040           0 :     return AArch64::ANDSXrr;
    2041           0 :   case AArch64::ANDXrs:
    2042           0 :     Is64Bit = true;
    2043           0 :     return AArch64::ANDSXrs;
    2044           0 :   case AArch64::BICXrr:
    2045           0 :     Is64Bit = true;
    2046           0 :     return AArch64::BICSXrr;
    2047           0 :   case AArch64::BICXrs:
    2048           0 :     Is64Bit = true;
    2049           0 :     return AArch64::BICSXrs;
    2050           0 :   case AArch64::SUBXri:
    2051           0 :     Is64Bit = true;
    2052           0 :     return AArch64::SUBSXri;
    2053           0 :   case AArch64::SUBXrr:
    2054           0 :     Is64Bit = true;
    2055           0 :     return AArch64::SUBSXrr;
    2056           0 :   case AArch64::SUBXrs:
    2057           0 :     Is64Bit = true;
    2058           0 :     return AArch64::SUBSXrs;
    2059           0 :   case AArch64::SUBXrx:
    2060           0 :     Is64Bit = true;
    2061           0 :     return AArch64::SUBSXrx;
    2062             :   }
    2063             : }
    2064             : 
    2065             : // Is this a candidate for ld/st merging or pairing?  For example, we don't
    2066             : // touch volatiles or load/stores that have a hint to avoid pair formation.
    2067        9465 : bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
    2068             :   // If this is a volatile load/store, don't mess with it.
    2069        9465 :   if (MI.hasOrderedMemoryRef())
    2070             :     return false;
    2071             : 
    2072             :   // Make sure this is a reg+imm (as opposed to an address reloc).
    2073             :   assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
    2074       15564 :   if (!MI.getOperand(2).isImm())
    2075             :     return false;
    2076             : 
    2077             :   // Can't merge/pair if the instruction modifies the base register.
    2078             :   // e.g., ldr x0, [x0]
    2079        7098 :   unsigned BaseReg = MI.getOperand(1).getReg();
    2080             :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    2081        7098 :   if (MI.modifiesRegister(BaseReg, TRI))
    2082             :     return false;
    2083             : 
    2084             :   // Check if this load/store has a hint to avoid pair formation.
    2085             :   // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
    2086        6989 :   if (isLdStPairSuppressed(MI))
    2087             :     return false;
    2088             : 
    2089             :   // On some CPUs quad load/store pairs are slower than two single load/stores.
    2090        6978 :   if (Subtarget.isPaired128Slow()) {
    2091         244 :     switch (MI.getOpcode()) {
    2092             :     default:
    2093             :       break;
    2094             :     case AArch64::LDURQi:
    2095             :     case AArch64::STURQi:
    2096             :     case AArch64::LDRQui:
    2097             :     case AArch64::STRQui:
    2098             :       return false;
    2099             :     }
    2100             :   }
    2101             : 
    2102             :   return true;
    2103             : }
    2104             : 
    2105       13369 : bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
    2106             :     MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
    2107             :     const TargetRegisterInfo *TRI) const {
    2108             :   unsigned Width;
    2109       13369 :   return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
    2110             : }
    2111             : 
    2112       23402 : bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
    2113             :     MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
    2114             :     const TargetRegisterInfo *TRI) const {
    2115             :   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
    2116             :   // Handle only loads/stores with base register followed by immediate offset.
    2117       23402 :   if (LdSt.getNumExplicitOperands() == 3) {
    2118             :     // Non-paired instruction (e.g., ldr x1, [x0, #8]).
    2119       56231 :     if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
    2120             :       return false;
    2121        3922 :   } else if (LdSt.getNumExplicitOperands() == 4) {
    2122             :     // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
    2123        7982 :     if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
    2124             :         !LdSt.getOperand(3).isImm())
    2125             :       return false;
    2126             :   } else
    2127             :     return false;
    2128             : 
    2129             :   // Get the scaling factor for the instruction and set the width for the
    2130             :   // instruction.
    2131       13820 :   unsigned Scale = 0;
    2132             :   int64_t Dummy1, Dummy2;
    2133             : 
    2134             :   // If this returns false, then it's an instruction we don't want to handle.
    2135       27640 :   if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
    2136             :     return false;
    2137             : 
    2138             :   // Compute the offset. Offset is calculated as the immediate operand
    2139             :   // multiplied by the scaling factor. Unscaled instructions have scaling factor
    2140             :   // set to 1.
    2141       13500 :   if (LdSt.getNumExplicitOperands() == 3) {
    2142       12455 :     BaseReg = LdSt.getOperand(1).getReg();
    2143       12455 :     Offset = LdSt.getOperand(2).getImm() * Scale;
    2144             :   } else {
    2145             :     assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
    2146        1045 :     BaseReg = LdSt.getOperand(2).getReg();
    2147        1045 :     Offset = LdSt.getOperand(3).getImm() * Scale;
    2148             :   }
    2149             :   return true;
    2150             : }
    2151             : 
    2152             : MachineOperand &
    2153          38 : AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
    2154             :   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
    2155          38 :   MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
    2156             :   assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
    2157          38 :   return OfsOp;
    2158             : }
    2159             : 
    2160       13951 : bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
    2161             :                                     unsigned &Width, int64_t &MinOffset,
    2162             :                                     int64_t &MaxOffset) const {
    2163       13951 :   switch (Opcode) {
    2164             :   // Not a memory operation or something we want to handle.
    2165         320 :   default:
    2166         320 :     Scale = Width = 0;
    2167         320 :     MinOffset = MaxOffset = 0;
    2168         320 :     return false;
    2169          19 :   case AArch64::STRWpost:
    2170             :   case AArch64::LDRWpost:
    2171          19 :     Width = 32;
    2172          19 :     Scale = 4;
    2173          19 :     MinOffset = -256;
    2174          19 :     MaxOffset = 255;
    2175          19 :     break;
    2176         208 :   case AArch64::LDURQi:
    2177             :   case AArch64::STURQi:
    2178         208 :     Width = 16;
    2179         208 :     Scale = 1;
    2180         208 :     MinOffset = -256;
    2181         208 :     MaxOffset = 255;
    2182         208 :     break;
    2183         197 :   case AArch64::LDURXi:
    2184             :   case AArch64::LDURDi:
    2185             :   case AArch64::STURXi:
    2186             :   case AArch64::STURDi:
    2187         197 :     Width = 8;
    2188         197 :     Scale = 1;
    2189         197 :     MinOffset = -256;
    2190         197 :     MaxOffset = 255;
    2191         197 :     break;
    2192         220 :   case AArch64::LDURWi:
    2193             :   case AArch64::LDURSi:
    2194             :   case AArch64::LDURSWi:
    2195             :   case AArch64::STURWi:
    2196             :   case AArch64::STURSi:
    2197         220 :     Width = 4;
    2198         220 :     Scale = 1;
    2199         220 :     MinOffset = -256;
    2200         220 :     MaxOffset = 255;
    2201         220 :     break;
    2202         114 :   case AArch64::LDURHi:
    2203             :   case AArch64::LDURHHi:
    2204             :   case AArch64::LDURSHXi:
    2205             :   case AArch64::LDURSHWi:
    2206             :   case AArch64::STURHi:
    2207             :   case AArch64::STURHHi:
    2208         114 :     Width = 2;
    2209         114 :     Scale = 1;
    2210         114 :     MinOffset = -256;
    2211         114 :     MaxOffset = 255;
    2212         114 :     break;
    2213          90 :   case AArch64::LDURBi:
    2214             :   case AArch64::LDURBBi:
    2215             :   case AArch64::LDURSBXi:
    2216             :   case AArch64::LDURSBWi:
    2217             :   case AArch64::STURBi:
    2218             :   case AArch64::STURBBi:
    2219          90 :     Width = 1;
    2220          90 :     Scale = 1;
    2221          90 :     MinOffset = -256;
    2222          90 :     MaxOffset = 255;
    2223          90 :     break;
    2224         327 :   case AArch64::LDPQi:
    2225             :   case AArch64::LDNPQi:
    2226             :   case AArch64::STPQi:
    2227             :   case AArch64::STNPQi:
    2228         327 :     Scale = 16;
    2229         327 :     Width = 32;
    2230         327 :     MinOffset = -64;
    2231         327 :     MaxOffset = 63;
    2232         327 :     break;
    2233        3825 :   case AArch64::LDRQui:
    2234             :   case AArch64::STRQui:
    2235        3825 :     Scale = Width = 16;
    2236        3825 :     MinOffset = 0;
    2237        3825 :     MaxOffset = 4095;
    2238        3825 :     break;
    2239         537 :   case AArch64::LDPXi:
    2240             :   case AArch64::LDPDi:
    2241             :   case AArch64::LDNPXi:
    2242             :   case AArch64::LDNPDi:
    2243             :   case AArch64::STPXi:
    2244             :   case AArch64::STPDi:
    2245             :   case AArch64::STNPXi:
    2246             :   case AArch64::STNPDi:
    2247         537 :     Scale = 8;
    2248         537 :     Width = 16;
    2249         537 :     MinOffset = -64;
    2250         537 :     MaxOffset = 63;
    2251         537 :     break;
    2252        4732 :   case AArch64::LDRXui:
    2253             :   case AArch64::LDRDui:
    2254             :   case AArch64::STRXui:
    2255             :   case AArch64::STRDui:
    2256        4732 :     Scale = Width = 8;
    2257        4732 :     MinOffset = 0;
    2258        4732 :     MaxOffset = 4095;
    2259        4732 :     break;
    2260         170 :   case AArch64::LDPWi:
    2261             :   case AArch64::LDPSi:
    2262             :   case AArch64::LDNPWi:
    2263             :   case AArch64::LDNPSi:
    2264             :   case AArch64::STPWi:
    2265             :   case AArch64::STPSi:
    2266             :   case AArch64::STNPWi:
    2267             :   case AArch64::STNPSi:
    2268         170 :     Scale = 4;
    2269         170 :     Width = 8;
    2270         170 :     MinOffset = -64;
    2271         170 :     MaxOffset = 63;
    2272         170 :     break;
    2273        2228 :   case AArch64::LDRWui:
    2274             :   case AArch64::LDRSui:
    2275             :   case AArch64::LDRSWui:
    2276             :   case AArch64::STRWui:
    2277             :   case AArch64::STRSui:
    2278        2228 :     Scale = Width = 4;
    2279        2228 :     MinOffset = 0;
    2280        2228 :     MaxOffset = 4095;
    2281        2228 :     break;
    2282         241 :   case AArch64::LDRHui:
    2283             :   case AArch64::LDRHHui:
    2284             :   case AArch64::STRHui:
    2285             :   case AArch64::STRHHui:
    2286         241 :     Scale = Width = 2;
    2287         241 :     MinOffset = 0;
    2288         241 :     MaxOffset = 4095;
    2289         241 :     break;
    2290         723 :   case AArch64::LDRBui:
    2291             :   case AArch64::LDRBBui:
    2292             :   case AArch64::STRBui:
    2293             :   case AArch64::STRBBui:
    2294         723 :     Scale = Width = 1;
    2295         723 :     MinOffset = 0;
    2296         723 :     MaxOffset = 4095;
    2297         723 :     break;
    2298             :   }
    2299             : 
    2300             :   return true;
    2301             : }
    2302             : 
    2303             : // Scale the unscaled offsets.  Returns false if the unscaled offset can't be
    2304             : // scaled.
    2305          55 : static bool scaleOffset(unsigned Opc, int64_t &Offset) {
    2306             :   unsigned OffsetStride = 1;
    2307          55 :   switch (Opc) {
    2308             :   default:
    2309             :     return false;
    2310             :   case AArch64::LDURQi:
    2311             :   case AArch64::STURQi:
    2312             :     OffsetStride = 16;
    2313             :     break;
    2314          12 :   case AArch64::LDURXi:
    2315             :   case AArch64::LDURDi:
    2316             :   case AArch64::STURXi:
    2317             :   case AArch64::STURDi:
    2318             :     OffsetStride = 8;
    2319          12 :     break;
    2320          35 :   case AArch64::LDURWi:
    2321             :   case AArch64::LDURSi:
    2322             :   case AArch64::LDURSWi:
    2323             :   case AArch64::STURWi:
    2324             :   case AArch64::STURSi:
    2325             :     OffsetStride = 4;
    2326          35 :     break;
    2327             :   }
    2328             :   // If the byte-offset isn't a multiple of the stride, we can't scale this
    2329             :   // offset.
    2330          55 :   if (Offset % OffsetStride != 0)
    2331             :     return false;
    2332             : 
    2333             :   // Convert the byte-offset used by unscaled into an "element" offset used
    2334             :   // by the scaled pair load/store instructions.
    2335          48 :   Offset /= OffsetStride;
    2336          48 :   return true;
    2337             : }
    2338             : 
    2339             : static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
    2340         552 :   if (FirstOpc == SecondOpc)
    2341             :     return true;
    2342             :   // We can also pair sign-ext and zero-ext instructions.
    2343          74 :   switch (FirstOpc) {
    2344             :   default:
    2345             :     return false;
    2346           3 :   case AArch64::LDRWui:
    2347             :   case AArch64::LDURWi:
    2348           3 :     return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
    2349           4 :   case AArch64::LDRSWui:
    2350             :   case AArch64::LDURSWi:
    2351           4 :     return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
    2352             :   }
    2353             :   // These instructions can't be paired based on their opcodes.
    2354             :   return false;
    2355             : }
    2356             : 
    2357             : /// Detect opportunities for ldp/stp formation.
    2358             : ///
    2359             : /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
    2360        1530 : bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
    2361             :                                            unsigned BaseReg1,
    2362             :                                            MachineInstr &SecondLdSt,
    2363             :                                            unsigned BaseReg2,
    2364             :                                            unsigned NumLoads) const {
    2365        1530 :   if (BaseReg1 != BaseReg2)
    2366             :     return false;
    2367             : 
    2368             :   // Only cluster up to a single pair.
    2369         778 :   if (NumLoads > 1)
    2370             :     return false;
    2371             : 
    2372         623 :   if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
    2373             :     return false;
    2374             : 
    2375             :   // Can we pair these instructions based on their opcodes?
    2376         552 :   unsigned FirstOpc = FirstLdSt.getOpcode();
    2377         552 :   unsigned SecondOpc = SecondLdSt.getOpcode();
    2378           7 :   if (!canPairLdStOpc(FirstOpc, SecondOpc))
    2379             :     return false;
    2380             : 
    2381             :   // Can't merge volatiles or load/stores that have a hint to avoid pair
    2382             :   // formation, for example.
    2383         938 :   if (!isCandidateToMergeOrPair(FirstLdSt) ||
    2384         454 :       !isCandidateToMergeOrPair(SecondLdSt))
    2385             :     return false;
    2386             : 
    2387             :   // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
    2388         452 :   int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
    2389         452 :   if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
    2390             :     return false;
    2391             : 
    2392         445 :   int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
    2393         445 :   if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
    2394             :     return false;
    2395             : 
    2396             :   // Pairwise instructions have a 7-bit signed offset field.
    2397         445 :   if (Offset1 > 63 || Offset1 < -64)
    2398             :     return false;
    2399             : 
    2400             :   // The caller should already have ordered First/SecondLdSt by offset.
    2401             :   assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
    2402         431 :   return Offset1 + 1 == Offset2;
    2403             : }
    2404             : 
    2405          87 : static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
    2406             :                                             unsigned Reg, unsigned SubIdx,
    2407             :                                             unsigned State,
    2408             :                                             const TargetRegisterInfo *TRI) {
    2409          87 :   if (!SubIdx)
    2410           0 :     return MIB.addReg(Reg, State);
    2411             : 
    2412          87 :   if (TargetRegisterInfo::isPhysicalRegister(Reg))
    2413          87 :     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
    2414           0 :   return MIB.addReg(Reg, State, SubIdx);
    2415             : }
    2416             : 
    2417             : static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
    2418             :                                         unsigned NumRegs) {
    2419             :   // We really want the positive remainder mod 32 here, that happens to be
    2420             :   // easily obtainable with a mask.
    2421          11 :   return ((DestReg - SrcReg) & 0x1f) < NumRegs;
    2422             : }
    2423             : 
    2424          11 : void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
    2425             :                                         MachineBasicBlock::iterator I,
    2426             :                                         const DebugLoc &DL, unsigned DestReg,
    2427             :                                         unsigned SrcReg, bool KillSrc,
    2428             :                                         unsigned Opcode,
    2429             :                                         ArrayRef<unsigned> Indices) const {
    2430             :   assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
    2431             :   const TargetRegisterInfo *TRI = &getRegisterInfo();
    2432          11 :   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
    2433             :   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
    2434          11 :   unsigned NumRegs = Indices.size();
    2435             : 
    2436          11 :   int SubReg = 0, End = NumRegs, Incr = 1;
    2437          11 :   if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
    2438           4 :     SubReg = NumRegs - 1;
    2439             :     End = -1;
    2440             :     Incr = -1;
    2441             :   }
    2442             : 
    2443          69 :   for (; SubReg != End; SubReg += Incr) {
    2444          58 :     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
    2445          58 :     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
    2446          29 :     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
    2447          29 :     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
    2448             :   }
    2449          11 : }
    2450             : 
    2451        3427 : void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
    2452             :                                    MachineBasicBlock::iterator I,
    2453             :                                    const DebugLoc &DL, unsigned DestReg,
    2454             :                                    unsigned SrcReg, bool KillSrc) const {
    2455        7018 :   if (AArch64::GPR32spRegClass.contains(DestReg) &&
    2456         662 :       (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
    2457             :     const TargetRegisterInfo *TRI = &getRegisterInfo();
    2458             : 
    2459         852 :     if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
    2460             :       // If either operand is WSP, expand to ADD #0.
    2461           0 :       if (Subtarget.hasZeroCycleRegMove()) {
    2462             :         // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
    2463             :         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
    2464           0 :                                                      &AArch64::GPR64spRegClass);
    2465             :         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
    2466           0 :                                                     &AArch64::GPR64spRegClass);
    2467             :         // This instruction is reading and writing X registers.  This may upset
    2468             :         // the register scavenger and machine verifier, so we need to indicate
    2469             :         // that we are reading an undefined value from SrcRegX, but a proper
    2470             :         // value from SrcReg.
    2471           0 :         BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
    2472           0 :             .addReg(SrcRegX, RegState::Undef)
    2473             :             .addImm(0)
    2474             :             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
    2475           0 :             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
    2476             :       } else {
    2477           0 :         BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
    2478           0 :             .addReg(SrcReg, getKillRegState(KillSrc))
    2479             :             .addImm(0)
    2480             :             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
    2481             :       }
    2482         852 :     } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
    2483          94 :       BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
    2484             :           .addImm(0)
    2485             :           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
    2486             :     } else {
    2487         805 :       if (Subtarget.hasZeroCycleRegMove()) {
    2488             :         // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
    2489             :         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
    2490          76 :                                                      &AArch64::GPR64spRegClass);
    2491             :         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
    2492          76 :                                                     &AArch64::GPR64spRegClass);
    2493             :         // This instruction is reading and writing X registers.  This may upset
    2494             :         // the register scavenger and machine verifier, so we need to indicate
    2495             :         // that we are reading an undefined value from SrcRegX, but a proper
    2496             :         // value from SrcReg.
    2497         228 :         BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
    2498          76 :             .addReg(AArch64::XZR)
    2499          76 :             .addReg(SrcRegX, RegState::Undef)
    2500          76 :             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
    2501             :       } else {
    2502             :         // Otherwise, expand to ORR WZR.
    2503        2187 :         BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
    2504         729 :             .addReg(AArch64::WZR)
    2505         729 :             .addReg(SrcReg, getKillRegState(KillSrc));
    2506             :       }
    2507             :     }
    2508             :     return;
    2509             :   }
    2510             : 
    2511        6148 :   if (AArch64::GPR64spRegClass.contains(DestReg) &&
    2512         244 :       (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
    2513         878 :     if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
    2514             :       // If either operand is SP, expand to ADD #0.
    2515         345 :       BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
    2516         115 :           .addReg(SrcReg, getKillRegState(KillSrc))
    2517             :           .addImm(0)
    2518             :           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
    2519         763 :     } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
    2520          48 :       BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
    2521             :           .addImm(0)
    2522             :           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
    2523             :     } else {
    2524             :       // Otherwise, expand to ORR XZR.
    2525        2217 :       BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
    2526         739 :           .addReg(AArch64::XZR)
    2527         739 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2528             :     }
    2529             :     return;
    2530             :   }
    2531             : 
    2532             :   // Copy a DDDD register quad by copying the individual sub-registers.
    2533        3388 :   if (AArch64::DDDDRegClass.contains(DestReg) &&
    2534             :       AArch64::DDDDRegClass.contains(SrcReg)) {
    2535             :     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
    2536             :                                        AArch64::dsub2, AArch64::dsub3};
    2537           0 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
    2538             :                      Indices);
    2539           0 :     return;
    2540             :   }
    2541             : 
    2542             :   // Copy a DDD register triple by copying the individual sub-registers.
    2543        3390 :   if (AArch64::DDDRegClass.contains(DestReg) &&
    2544             :       AArch64::DDDRegClass.contains(SrcReg)) {
    2545             :     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
    2546             :                                        AArch64::dsub2};
    2547           1 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
    2548             :                      Indices);
    2549           1 :     return;
    2550             :   }
    2551             : 
    2552             :   // Copy a DD register pair by copying the individual sub-registers.
    2553        3390 :   if (AArch64::DDRegClass.contains(DestReg) &&
    2554             :       AArch64::DDRegClass.contains(SrcReg)) {
    2555             :     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
    2556           4 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
    2557             :                      Indices);
    2558           4 :     return;
    2559             :   }
    2560             : 
    2561             :   // Copy a QQQQ register quad by copying the individual sub-registers.
    2562        3386 :   if (AArch64::QQQQRegClass.contains(DestReg) &&
    2563             :       AArch64::QQQQRegClass.contains(SrcReg)) {
    2564             :     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
    2565             :                                        AArch64::qsub2, AArch64::qsub3};
    2566           2 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
    2567             :                      Indices);
    2568           2 :     return;
    2569             :   }
    2570             : 
    2571             :   // Copy a QQQ register triple by copying the individual sub-registers.
    2572        3382 :   if (AArch64::QQQRegClass.contains(DestReg) &&
    2573             :       AArch64::QQQRegClass.contains(SrcReg)) {
    2574             :     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
    2575             :                                        AArch64::qsub2};
    2576           2 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
    2577             :                      Indices);
    2578           2 :     return;
    2579             :   }
    2580             : 
    2581             :   // Copy a QQ register pair by copying the individual sub-registers.
    2582        3378 :   if (AArch64::QQRegClass.contains(DestReg) &&
    2583             :       AArch64::QQRegClass.contains(SrcReg)) {
    2584             :     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
    2585           2 :     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
    2586             :                      Indices);
    2587           2 :     return;
    2588             :   }
    2589             : 
    2590        3265 :   if (AArch64::FPR128RegClass.contains(DestReg) &&
    2591             :       AArch64::FPR128RegClass.contains(SrcReg)) {
    2592         317 :     if (Subtarget.hasNEON()) {
    2593         948 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2594         316 :           .addReg(SrcReg)
    2595         316 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2596             :     } else {
    2597           3 :       BuildMI(MBB, I, DL, get(AArch64::STRQpre))
    2598           1 :           .addReg(AArch64::SP, RegState::Define)
    2599           1 :           .addReg(SrcReg, getKillRegState(KillSrc))
    2600           1 :           .addReg(AArch64::SP)
    2601             :           .addImm(-16);
    2602           3 :       BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
    2603           1 :           .addReg(AArch64::SP, RegState::Define)
    2604           1 :           .addReg(DestReg, RegState::Define)
    2605           1 :           .addReg(AArch64::SP)
    2606             :           .addImm(16);
    2607             :     }
    2608             :     return;
    2609             :   }
    2610             : 
    2611        2214 :   if (AArch64::FPR64RegClass.contains(DestReg) &&
    2612             :       AArch64::FPR64RegClass.contains(SrcReg)) {
    2613         324 :     if (Subtarget.hasNEON()) {
    2614             :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
    2615             :                                        &AArch64::FPR128RegClass);
    2616         321 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
    2617             :                                       &AArch64::FPR128RegClass);
    2618         963 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2619         321 :           .addReg(SrcReg)
    2620         321 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2621             :     } else {
    2622           9 :       BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
    2623           3 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2624             :     }
    2625             :     return;
    2626             :   }
    2627             : 
    2628        2209 :   if (AArch64::FPR32RegClass.contains(DestReg) &&
    2629             :       AArch64::FPR32RegClass.contains(SrcReg)) {
    2630          28 :     if (Subtarget.hasNEON()) {
    2631             :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
    2632             :                                        &AArch64::FPR128RegClass);
    2633          27 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
    2634             :                                       &AArch64::FPR128RegClass);
    2635          81 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2636          27 :           .addReg(SrcReg)
    2637          27 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2638             :     } else {
    2639           3 :       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
    2640           1 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2641             :     }
    2642             :     return;
    2643             :   }
    2644             : 
    2645        1252 :   if (AArch64::FPR16RegClass.contains(DestReg) &&
    2646             :       AArch64::FPR16RegClass.contains(SrcReg)) {
    2647          40 :     if (Subtarget.hasNEON()) {
    2648             :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
    2649             :                                        &AArch64::FPR128RegClass);
    2650          40 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
    2651             :                                       &AArch64::FPR128RegClass);
    2652         120 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2653          40 :           .addReg(SrcReg)
    2654          40 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2655             :     } else {
    2656             :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
    2657             :                                        &AArch64::FPR32RegClass);
    2658           0 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
    2659             :                                       &AArch64::FPR32RegClass);
    2660           0 :       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
    2661           0 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2662             :     }
    2663             :     return;
    2664             :   }
    2665             : 
    2666        1130 :   if (AArch64::FPR8RegClass.contains(DestReg) &&
    2667             :       AArch64::FPR8RegClass.contains(SrcReg)) {
    2668           0 :     if (Subtarget.hasNEON()) {
    2669             :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
    2670             :                                        &AArch64::FPR128RegClass);
    2671           0 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
    2672             :                                       &AArch64::FPR128RegClass);
    2673           0 :       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
    2674           0 :           .addReg(SrcReg)
    2675           0 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2676             :     } else {
    2677             :       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
    2678             :                                        &AArch64::FPR32RegClass);
    2679           0 :       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
    2680             :                                       &AArch64::FPR32RegClass);
    2681           0 :       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
    2682           0 :           .addReg(SrcReg, getKillRegState(KillSrc));
    2683             :     }
    2684             :     return;
    2685             :   }
    2686             : 
    2687             :   // Copies between GPR64 and FPR64.
    2688         309 :   if (AArch64::FPR64RegClass.contains(DestReg) &&
    2689         154 :       AArch64::GPR64RegClass.contains(SrcReg)) {
    2690         462 :     BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
    2691         154 :         .addReg(SrcReg, getKillRegState(KillSrc));
    2692         154 :     return;
    2693             :   }
    2694        1777 :   if (AArch64::GPR64RegClass.contains(DestReg) &&
    2695             :       AArch64::FPR64RegClass.contains(SrcReg)) {
    2696         390 :     BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
    2697         130 :         .addReg(SrcReg, getKillRegState(KillSrc));
    2698         130 :     return;
    2699             :   }
    2700             :   // Copies between GPR32 and FPR32.
    2701        1031 :   if (AArch64::FPR32RegClass.contains(DestReg) &&
    2702         407 :       AArch64::GPR32RegClass.contains(SrcReg)) {
    2703        1221 :     BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
    2704         407 :         .addReg(SrcReg, getKillRegState(KillSrc));
    2705         407 :     return;
    2706             :   }
    2707         855 :   if (AArch64::GPR32RegClass.contains(DestReg) &&
    2708             :       AArch64::FPR32RegClass.contains(SrcReg)) {
    2709         852 :     BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
    2710         284 :         .addReg(SrcReg, getKillRegState(KillSrc));
    2711         284 :     return;
    2712             :   }
    2713             : 
    2714           2 :   if (DestReg == AArch64::NZCV) {
    2715             :     assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
    2716           3 :     BuildMI(MBB, I, DL, get(AArch64::MSR))
    2717             :         .addImm(AArch64SysReg::NZCV)
    2718           1 :         .addReg(SrcReg, getKillRegState(KillSrc))
    2719           1 :         .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
    2720           1 :     return;
    2721             :   }
    2722             : 
    2723           1 :   if (SrcReg == AArch64::NZCV) {
    2724             :     assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
    2725           3 :     BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
    2726             :         .addImm(AArch64SysReg::NZCV)
    2727           1 :         .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
    2728           1 :     return;
    2729             :   }
    2730             : 
    2731           0 :   llvm_unreachable("unimplemented reg-to-reg copy");
    2732             : }
    2733             : 
    2734         962 : void AArch64InstrInfo::storeRegToStackSlot(
    2735             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
    2736             :     bool isKill, int FI, const TargetRegisterClass *RC,
    2737             :     const TargetRegisterInfo *TRI) const {
    2738         962 :   DebugLoc DL;
    2739         962 :   if (MBBI != MBB.end())
    2740             :     DL = MBBI->getDebugLoc();
    2741         962 :   MachineFunction &MF = *MBB.getParent();
    2742         962 :   MachineFrameInfo &MFI = MF.getFrameInfo();
    2743             :   unsigned Align = MFI.getObjectAlignment(FI);
    2744             : 
    2745         962 :   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
    2746         962 :   MachineMemOperand *MMO = MF.getMachineMemOperand(
    2747         962 :       PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
    2748             :   unsigned Opc = 0;
    2749             :   bool Offset = true;
    2750         962 :   switch (TRI->getSpillSize(*RC)) {
    2751           0 :   case 1:
    2752           0 :     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
    2753             :       Opc = AArch64::STRBui;
    2754             :     break;
    2755           0 :   case 2:
    2756           0 :     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
    2757             :       Opc = AArch64::STRHui;
    2758             :     break;
    2759         139 :   case 4:
    2760         278 :     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
    2761             :       Opc = AArch64::STRWui;
    2762         126 :       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
    2763          33 :         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
    2764             :       else
    2765             :         assert(SrcReg != AArch64::WSP);
    2766          26 :     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
    2767             :       Opc = AArch64::STRSui;
    2768             :     break;
    2769         553 :   case 8:
    2770        1106 :     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
    2771             :       Opc = AArch64::STRXui;
    2772         409 :       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
    2773          78 :         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
    2774             :       else
    2775             :         assert(SrcReg != AArch64::SP);
    2776         288 :     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
    2777             :       Opc = AArch64::STRDui;
    2778             :     break;
    2779         263 :   case 16:
    2780         526 :     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
    2781             :       Opc = AArch64::STRQui;
    2782           0 :     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
    2783             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2784             :       Opc = AArch64::ST1Twov1d;
    2785             :       Offset = false;
    2786           0 :     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
    2787           0 :       BuildMI(MBB, MBBI, DL, get(AArch64::STPXi))
    2788             :           .addReg(TRI->getSubReg(SrcReg, AArch64::sube64),
    2789           0 :                   getKillRegState(isKill))
    2790             :           .addReg(TRI->getSubReg(SrcReg, AArch64::subo64),
    2791           0 :                   getKillRegState(isKill))
    2792             :           .addFrameIndex(FI)
    2793             :           .addImm(0)
    2794             :           .addMemOperand(MMO);
    2795             :       return;
    2796             :     }
    2797             :     break;
    2798           0 :   case 24:
    2799           0 :     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
    2800             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2801             :       Opc = AArch64::ST1Threev1d;
    2802             :       Offset = false;
    2803             :     }
    2804             :     break;
    2805           3 :   case 32:
    2806           6 :     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
    2807             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2808             :       Opc = AArch64::ST1Fourv1d;
    2809             :       Offset = false;
    2810           6 :     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
    2811             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2812             :       Opc = AArch64::ST1Twov2d;
    2813             :       Offset = false;
    2814             :     }
    2815             :     break;
    2816           2 :   case 48:
    2817           4 :     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
    2818             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2819             :       Opc = AArch64::ST1Threev2d;
    2820             :       Offset = false;
    2821             :     }
    2822             :     break;
    2823           2 :   case 64:
    2824           4 :     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
    2825             :       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
    2826             :       Opc = AArch64::ST1Fourv2d;
    2827             :       Offset = false;
    2828             :     }
    2829             :     break;
    2830             :   }
    2831             :   assert(Opc && "Unknown register class");
    2832             : 
    2833        2886 :   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
    2834         962 :                                      .addReg(SrcReg, getKillRegState(isKill))
    2835         962 :                                      .addFrameIndex(FI);
    2836             : 
    2837         962 :   if (Offset)
    2838             :     MI.addImm(0);
    2839             :   MI.addMemOperand(MMO);
    2840             : }
    2841             : 
    2842         898 : void AArch64InstrInfo::loadRegFromStackSlot(
    2843             :     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
    2844             :     int FI, const TargetRegisterClass *RC,
    2845             :     const TargetRegisterInfo *TRI) const {
    2846         898 :   DebugLoc DL;
    2847         898 :   if (MBBI != MBB.end())
    2848             :     DL = MBBI->getDebugLoc();
    2849         898 :   MachineFunction &MF = *MBB.getParent();
    2850         898 :   MachineFrameInfo &MFI = MF.getFrameInfo();
    2851             :   unsigned Align = MFI.getObjectAlignment(FI);
    2852         898 :   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
    2853         898 :   MachineMemOperand *MMO = MF.getMachineMemOperand(
    2854         898 :       PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
    2855             : 
    2856             :   unsigned Opc = 0;
    2857             :   bool Offset = true;
    2858         898 :   switch (TRI->getSpillSize(*RC)) {
    2859           0 :   case 1:
    2860           0 :     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
    2861             :       Opc = AArch64::LDRBui;
    2862             :     break;
    2863           0 :   case 2:
    2864           0 :     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
    2865             :       Opc = AArch64::LDRHui;
    2866             :     break;
    2867          76 :   case 4:
    2868         152 :     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
    2869             :       Opc = AArch64::LDRWui;
    2870          67 :       if (TargetRegisterInfo::isVirtualRegister(DestReg))
    2871          34 :         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
    2872             :       else
    2873             :         assert(DestReg != AArch64::WSP);
    2874          18 :     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
    2875             :       Opc = AArch64::LDRSui;
    2876             :     break;
    2877         450 :   case 8:
    2878         900 :     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
    2879             :       Opc = AArch64::LDRXui;
    2880         347 :       if (TargetRegisterInfo::isVirtualRegister(DestReg))
    2881          68 :         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
    2882             :       else
    2883             :         assert(DestReg != AArch64::SP);
    2884         206 :     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
    2885             :       Opc = AArch64::LDRDui;
    2886             :     break;
    2887         365 :   case 16:
    2888         730 :     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
    2889             :       Opc = AArch64::LDRQui;
    2890           0 :     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
    2891             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2892             :       Opc = AArch64::LD1Twov1d;
    2893             :       Offset = false;
    2894           0 :     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
    2895           0 :       BuildMI(MBB, MBBI, DL, get(AArch64::LDPXi))
    2896             :           .addReg(TRI->getSubReg(DestReg, AArch64::sube64),
    2897           0 :                   getDefRegState(true))
    2898             :           .addReg(TRI->getSubReg(DestReg, AArch64::subo64),
    2899           0 :                   getDefRegState(true))
    2900             :           .addFrameIndex(FI)
    2901             :           .addImm(0)
    2902             :           .addMemOperand(MMO);
    2903             :       return;
    2904             :     }
    2905             :     break;
    2906           0 :   case 24:
    2907           0 :     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
    2908             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2909             :       Opc = AArch64::LD1Threev1d;
    2910             :       Offset = false;
    2911             :     }
    2912             :     break;
    2913           3 :   case 32:
    2914           6 :     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
    2915             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2916             :       Opc = AArch64::LD1Fourv1d;
    2917             :       Offset = false;
    2918           6 :     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
    2919             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2920             :       Opc = AArch64::LD1Twov2d;
    2921             :       Offset = false;
    2922             :     }
    2923             :     break;
    2924           2 :   case 48:
    2925           4 :     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
    2926             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2927             :       Opc = AArch64::LD1Threev2d;
    2928             :       Offset = false;
    2929             :     }
    2930             :     break;
    2931           2 :   case 64:
    2932           4 :     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
    2933             :       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
    2934             :       Opc = AArch64::LD1Fourv2d;
    2935             :       Offset = false;
    2936             :     }
    2937             :     break;
    2938             :   }
    2939             :   assert(Opc && "Unknown register class");
    2940             : 
    2941        2694 :   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
    2942         898 :                                      .addReg(DestReg, getDefRegState(true))
    2943         898 :                                      .addFrameIndex(FI);
    2944         898 :   if (Offset)
    2945             :     MI.addImm(0);
    2946             :   MI.addMemOperand(MMO);
    2947             : }
    2948             : 
    2949       15406 : void llvm::emitFrameOffset(MachineBasicBlock &MBB,
    2950             :                            MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
    2951             :                            unsigned DestReg, unsigned SrcReg, int Offset,
    2952             :                            const TargetInstrInfo *TII,
    2953             :                            MachineInstr::MIFlag Flag, bool SetNZCV) {
    2954       15406 :   if (DestReg == SrcReg && Offset == 0)
    2955             :     return;
    2956             : 
    2957             :   assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
    2958             :          "SP increment/decrement not 16-byte aligned");
    2959             : 
    2960             :   bool isSub = Offset < 0;
    2961        2000 :   if (isSub)
    2962         789 :     Offset = -Offset;
    2963             : 
    2964             :   // FIXME: If the offset won't fit in 24-bits, compute the offset into a
    2965             :   // scratch register.  If DestReg is a virtual register, use it as the
    2966             :   // scratch register; otherwise, create a new virtual register (to be
    2967             :   // replaced by the scavenger at the end of PEI).  That case can be optimized
    2968             :   // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
    2969             :   // register can be loaded with offset%8 and the add/sub can use an extending
    2970             :   // instruction with LSL#3.
    2971             :   // Currently the function handles any offsets but generates a poor sequence
    2972             :   // of code.
    2973             :   //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
    2974             : 
    2975             :   unsigned Opc;
    2976        2000 :   if (SetNZCV)
    2977           3 :     Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
    2978             :   else
    2979        1997 :     Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
    2980             :   const unsigned MaxEncoding = 0xfff;
    2981             :   const unsigned ShiftSize = 12;
    2982             :   const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
    2983        2033 :   while (((unsigned)Offset) >= (1 << ShiftSize)) {
    2984             :     unsigned ThisVal;
    2985          41 :     if (((unsigned)Offset) > MaxEncodableValue) {
    2986             :       ThisVal = MaxEncodableValue;
    2987             :     } else {
    2988          31 :       ThisVal = Offset & MaxEncodableValue;
    2989             :     }
    2990             :     assert((ThisVal >> ShiftSize) <= MaxEncoding &&
    2991             :            "Encoding cannot handle value that big");
    2992         123 :     BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
    2993          41 :         .addReg(SrcReg)
    2994          41 :         .addImm(ThisVal >> ShiftSize)
    2995             :         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
    2996             :         .setMIFlag(Flag);
    2997             : 
    2998             :     SrcReg = DestReg;
    2999          41 :     Offset -= ThisVal;
    3000          41 :     if (Offset == 0)
    3001             :       return;
    3002             :   }
    3003        5976 :   BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
    3004        1992 :       .addReg(SrcReg)
    3005        1992 :       .addImm(Offset)
    3006             :       .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
    3007             :       .setMIFlag(Flag);
    3008             : }
    3009             : 
    3010         746 : MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
    3011             :     MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
    3012             :     MachineBasicBlock::iterator InsertPt, int FrameIndex,
    3013             :     LiveIntervals *LIS) const {
    3014             :   // This is a bit of a hack. Consider this instruction:
    3015             :   //
    3016             :   //   %0 = COPY %sp; GPR64all:%0
    3017             :   //
    3018             :   // We explicitly chose GPR64all for the virtual register so such a copy might
    3019             :   // be eliminated by RegisterCoalescer. However, that may not be possible, and
    3020             :   // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
    3021             :   // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
    3022             :   //
    3023             :   // To prevent that, we are going to constrain the %0 register class here.
    3024             :   //
    3025             :   // <rdar://problem/11522048>
    3026             :   //
    3027             :   if (MI.isFullCopy()) {
    3028         381 :     unsigned DstReg = MI.getOperand(0).getReg();
    3029         381 :     unsigned SrcReg = MI.getOperand(1).getReg();
    3030         382 :     if (SrcReg == AArch64::SP &&
    3031             :         TargetRegisterInfo::isVirtualRegister(DstReg)) {
    3032           1 :       MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
    3033           1 :       return nullptr;
    3034             :     }
    3035         381 :     if (DstReg == AArch64::SP &&
    3036             :         TargetRegisterInfo::isVirtualRegister(SrcReg)) {
    3037           1 :       MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
    3038           1 :       return nullptr;
    3039             :     }
    3040             :   }
    3041             : 
    3042             :   // Handle the case where a copy is being spilled or filled but the source
    3043             :   // and destination register class don't match.  For example:
    3044             :   //
    3045             :   //   %0 = COPY %xzr; GPR64common:%0
    3046             :   //
    3047             :   // In this case we can still safely fold away the COPY and generate the
    3048             :   // following spill code:
    3049             :   //
    3050             :   //   STRXui %xzr, %stack.0
    3051             :   //
    3052             :   // This also eliminates spilled cross register class COPYs (e.g. between x and
    3053             :   // d regs) of the same size.  For example:
    3054             :   //
    3055             :   //   %0 = COPY %1; GPR64:%0, FPR64:%1
    3056             :   //
    3057             :   // will be filled as
    3058             :   //
    3059             :   //   LDRDui %0, fi<#0>
    3060             :   //
    3061             :   // instead of
    3062             :   //
    3063             :   //   LDRXui %Temp, fi<#0>
    3064             :   //   %0 = FMOV %Temp
    3065             :   //
    3066        1273 :   if (MI.isCopy() && Ops.size() == 1 &&
    3067             :       // Make sure we're only folding the explicit COPY defs/uses.
    3068         819 :       (Ops[0] == 0 || Ops[0] == 1)) {
    3069             :     bool IsSpill = Ops[0] == 0;
    3070             :     bool IsFill = !IsSpill;
    3071         529 :     const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
    3072         529 :     const MachineRegisterInfo &MRI = MF.getRegInfo();
    3073         529 :     MachineBasicBlock &MBB = *MI.getParent();
    3074         529 :     const MachineOperand &DstMO = MI.getOperand(0);
    3075             :     const MachineOperand &SrcMO = MI.getOperand(1);
    3076         529 :     unsigned DstReg = DstMO.getReg();
    3077         529 :     unsigned SrcReg = SrcMO.getReg();
    3078             :     // This is slightly expensive to compute for physical regs since
    3079             :     // getMinimalPhysRegClass is slow.
    3080         379 :     auto getRegClass = [&](unsigned Reg) {
    3081             :       return TargetRegisterInfo::isVirtualRegister(Reg)
    3082         537 :                  ? MRI.getRegClass(Reg)
    3083         537 :                  : TRI.getMinimalPhysRegClass(Reg);
    3084         908 :     };
    3085             : 
    3086         986 :     if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
    3087             :       assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
    3088             :                  TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
    3089             :              "Mismatched register size in non subreg COPY");
    3090         379 :       if (IsSpill)
    3091         338 :         storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
    3092             :                             getRegClass(SrcReg), &TRI);
    3093             :       else
    3094         210 :         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
    3095             :                              getRegClass(DstReg), &TRI);
    3096         830 :       return &*--InsertPt;
    3097             :     }
    3098             : 
    3099             :     // Handle cases like spilling def of:
    3100             :     //
    3101             :     //   %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
    3102             :     //
    3103             :     // where the physical register source can be widened and stored to the full
    3104             :     // virtual reg destination stack slot, in this case producing:
    3105             :     //
    3106             :     //   STRXui %xzr, %stack.0
    3107             :     //
    3108         290 :     if (IsSpill && DstMO.isUndef() &&
    3109             :         TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
    3110             :       assert(SrcMO.getSubReg() == 0 &&
    3111             :              "Unexpected subreg on physical register");
    3112             :       const TargetRegisterClass *SpillRC;
    3113             :       unsigned SpillSubreg;
    3114          70 :       switch (DstMO.getSubReg()) {
    3115             :       default:
    3116             :         SpillRC = nullptr;
    3117             :         break;
    3118          47 :       case AArch64::sub_32:
    3119             :       case AArch64::ssub:
    3120          94 :         if (AArch64::GPR32RegClass.contains(SrcReg)) {
    3121             :           SpillRC = &AArch64::GPR64RegClass;
    3122             :           SpillSubreg = AArch64::sub_32;
    3123          88 :         } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
    3124             :           SpillRC = &AArch64::FPR64RegClass;
    3125             :           SpillSubreg = AArch64::ssub;
    3126             :         } else
    3127             :           SpillRC = nullptr;
    3128             :         break;
    3129          23 :       case AArch64::dsub:
    3130          46 :         if (AArch64::FPR64RegClass.contains(SrcReg)) {
    3131             :           SpillRC = &AArch64::FPR128RegClass;
    3132             :           SpillSubreg = AArch64::dsub;
    3133             :         } else
    3134             :           SpillRC = nullptr;
    3135             :         break;
    3136             :       }
    3137             : 
    3138             :       if (SpillRC)
    3139          70 :         if (unsigned WidenedSrcReg =
    3140          70 :                 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
    3141          70 :           storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
    3142             :                               FrameIndex, SpillRC, &TRI);
    3143          70 :           return &*--InsertPt;
    3144             :         }
    3145             :     }
    3146             : 
    3147             :     // Handle cases like filling use of:
    3148             :     //
    3149             :     //   %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
    3150             :     //
    3151             :     // where we can load the full virtual reg source stack slot, into the subreg
    3152             :     // destination, in this case producing:
    3153             :     //
    3154             :     //   LDRWui %0:sub_32<def,read-undef>, %stack.0
    3155             :     //
    3156         162 :     if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
    3157             :       const TargetRegisterClass *FillRC;
    3158           2 :       switch (DstMO.getSubReg()) {
    3159             :       default:
    3160             :         FillRC = nullptr;
    3161             :         break;
    3162             :       case AArch64::sub_32:
    3163             :         FillRC = &AArch64::GPR32RegClass;
    3164             :         break;
    3165           1 :       case AArch64::ssub:
    3166             :         FillRC = &AArch64::FPR32RegClass;
    3167           1 :         break;
    3168           0 :       case AArch64::dsub:
    3169             :         FillRC = &AArch64::FPR64RegClass;
    3170           0 :         break;
    3171             :       }
    3172             : 
    3173             :       if (FillRC) {
    3174             :         assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
    3175             :                    TRI.getRegSizeInBits(*FillRC) &&
    3176             :                "Mismatched regclass size on folded subreg COPY");
    3177           2 :         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
    3178             :         MachineInstr &LoadMI = *--InsertPt;
    3179           2 :         MachineOperand &LoadDst = LoadMI.getOperand(0);
    3180             :         assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
    3181             :         LoadDst.setSubReg(DstMO.getSubReg());
    3182             :         LoadDst.setIsUndef();
    3183           2 :         return &LoadMI;
    3184             :       }
    3185             :     }
    3186             :   }
    3187             : 
    3188             :   // Cannot fold.
    3189             :   return nullptr;
    3190             : }
    3191             : 
    3192        7265 : int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
    3193             :                                     bool *OutUseUnscaledOp,
    3194             :                                     unsigned *OutUnscaledOp,
    3195             :                                     int *EmittableOffset) {
    3196             :   int Scale = 1;
    3197             :   bool IsSigned = false;
    3198             :   // The ImmIdx should be changed case by case if it is not 2.
    3199             :   unsigned ImmIdx = 2;
    3200             :   unsigned UnscaledOp = 0;
    3201             :   // Set output values in case of early exit.
    3202        7265 :   if (EmittableOffset)
    3203        3073 :     *EmittableOffset = 0;
    3204        7265 :   if (OutUseUnscaledOp)
    3205        3073 :     *OutUseUnscaledOp = false;
    3206        7265 :   if (OutUnscaledOp)
    3207        3073 :     *OutUnscaledOp = 0;
    3208       14530 :   switch (MI.getOpcode()) {
    3209           0 :   default:
    3210           0 :     llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
    3211             :   // Vector spills/fills can't take an immediate offset.
    3212             :   case AArch64::LD1Twov2d:
    3213             :   case AArch64::LD1Threev2d:
    3214             :   case AArch64::LD1Fourv2d:
    3215             :   case AArch64::LD1Twov1d:
    3216             :   case AArch64::LD1Threev1d:
    3217             :   case AArch64::LD1Fourv1d:
    3218             :   case AArch64::ST1Twov2d:
    3219             :   case AArch64::ST1Threev2d:
    3220             :   case AArch64::ST1Fourv2d:
    3221             :   case AArch64::ST1Twov1d:
    3222             :   case AArch64::ST1Threev1d:
    3223             :   case AArch64::ST1Fourv1d:
    3224             :     return AArch64FrameOffsetCannotUpdate;
    3225             :   case AArch64::PRFMui:
    3226             :     Scale = 8;
    3227             :     UnscaledOp = AArch64::PRFUMi;
    3228             :     break;
    3229        1192 :   case AArch64::LDRXui:
    3230             :     Scale = 8;
    3231             :     UnscaledOp = AArch64::LDURXi;
    3232        1192 :     break;
    3233         467 :   case AArch64::LDRWui:
    3234             :     Scale = 4;
    3235             :     UnscaledOp = AArch64::LDURWi;
    3236         467 :     break;
    3237           0 :   case AArch64::LDRBui:
    3238             :     Scale = 1;
    3239             :     UnscaledOp = AArch64::LDURBi;
    3240           0 :     break;
    3241           4 :   case AArch64::LDRHui:
    3242             :     Scale = 2;
    3243             :     UnscaledOp = AArch64::LDURHi;
    3244           4 :     break;
    3245          81 :   case AArch64::LDRSui:
    3246             :     Scale = 4;
    3247             :     UnscaledOp = AArch64::LDURSi;
    3248          81 :     break;
    3249         360 :   case AArch64::LDRDui:
    3250             :     Scale = 8;
    3251             :     UnscaledOp = AArch64::LDURDi;
    3252         360 :     break;
    3253         961 :   case AArch64::LDRQui:
    3254             :     Scale = 16;
    3255             :     UnscaledOp = AArch64::LDURQi;
    3256         961 :     break;
    3257          48 :   case AArch64::LDRBBui:
    3258             :     Scale = 1;
    3259             :     UnscaledOp = AArch64::LDURBBi;
    3260          48 :     break;
    3261          33 :   case AArch64::LDRHHui:
    3262             :     Scale = 2;
    3263             :     UnscaledOp = AArch64::LDURHHi;
    3264          33 :     break;
    3265           0 :   case AArch64::LDRSBXui:
    3266             :     Scale = 1;
    3267             :     UnscaledOp = AArch64::LDURSBXi;
    3268           0 :     break;
    3269          15 :   case AArch64::LDRSBWui:
    3270             :     Scale = 1;
    3271             :     UnscaledOp = AArch64::LDURSBWi;
    3272          15 :     break;
    3273           0 :   case AArch64::LDRSHXui:
    3274             :     Scale = 2;
    3275             :     UnscaledOp = AArch64::LDURSHXi;
    3276           0 :     break;
    3277          12 :   case AArch64::LDRSHWui:
    3278             :     Scale = 2;
    3279             :     UnscaledOp = AArch64::LDURSHWi;
    3280          12 :     break;
    3281           6 :   case AArch64::LDRSWui:
    3282             :     Scale = 4;
    3283             :     UnscaledOp = AArch64::LDURSWi;
    3284           6 :     break;
    3285             : 
    3286        1488 :   case AArch64::STRXui:
    3287             :     Scale = 8;
    3288             :     UnscaledOp = AArch64::STURXi;
    3289        1488 :     break;
    3290         984 :   case AArch64::STRWui:
    3291             :     Scale = 4;
    3292             :     UnscaledOp = AArch64::STURWi;
    3293         984 :     break;
    3294           0 :   case AArch64::STRBui:
    3295             :     Scale = 1;
    3296             :     UnscaledOp = AArch64::STURBi;
    3297           0 :     break;
    3298           0 :   case AArch64::STRHui:
    3299             :     Scale = 2;
    3300             :     UnscaledOp = AArch64::STURHi;
    3301           0 :     break;
    3302          48 :   case AArch64::STRSui:
    3303             :     Scale = 4;
    3304             :     UnscaledOp = AArch64::STURSi;
    3305          48 :     break;
    3306         426 :   case AArch64::STRDui:
    3307             :     Scale = 8;
    3308             :     UnscaledOp = AArch64::STURDi;
    3309         426 :     break;
    3310        1006 :   case AArch64::STRQui:
    3311             :     Scale = 16;
    3312             :     UnscaledOp = AArch64::STURQi;
    3313        1006 :     break;
    3314          44 :   case AArch64::STRBBui:
    3315             :     Scale = 1;
    3316             :     UnscaledOp = AArch64::STURBBi;
    3317          44 :     break;
    3318          27 :   case AArch64::STRHHui:
    3319             :     Scale = 2;
    3320             :     UnscaledOp = AArch64::STURHHi;
    3321          27 :     break;
    3322             : 
    3323           6 :   case AArch64::LDPXi:
    3324             :   case AArch64::LDPDi:
    3325             :   case AArch64::STPXi:
    3326             :   case AArch64::STPDi:
    3327             :   case AArch64::LDNPXi:
    3328             :   case AArch64::LDNPDi:
    3329             :   case AArch64::STNPXi:
    3330             :   case AArch64::STNPDi:
    3331             :     ImmIdx = 3;
    3332             :     IsSigned = true;
    3333             :     Scale = 8;
    3334           6 :     break;
    3335           0 :   case AArch64::LDPQi:
    3336             :   case AArch64::STPQi:
    3337             :   case AArch64::LDNPQi:
    3338             :   case AArch64::STNPQi:
    3339             :     ImmIdx = 3;
    3340             :     IsSigned = true;
    3341             :     Scale = 16;
    3342           0 :     break;
    3343           0 :   case AArch64::LDPWi:
    3344             :   case AArch64::LDPSi:
    3345             :   case AArch64::STPWi:
    3346             :   case AArch64::STPSi:
    3347             :   case AArch64::LDNPWi:
    3348             :   case AArch64::LDNPSi:
    3349             :   case AArch64::STNPWi:
    3350             :   case AArch64::STNPSi:
    3351             :     ImmIdx = 3;
    3352             :     IsSigned = true;
    3353             :     Scale = 4;
    3354           0 :     break;
    3355             : 
    3356          10 :   case AArch64::LDURXi:
    3357             :   case AArch64::LDURWi:
    3358             :   case AArch64::LDURBi:
    3359             :   case AArch64::LDURHi:
    3360             :   case AArch64::LDURSi:
    3361             :   case AArch64::LDURDi:
    3362             :   case AArch64::LDURQi:
    3363             :   case AArch64::LDURHHi:
    3364             :   case AArch64::LDURBBi:
    3365             :   case AArch64::LDURSBXi:
    3366             :   case AArch64::LDURSBWi:
    3367             :   case AArch64::LDURSHXi:
    3368             :   case AArch64::LDURSHWi:
    3369             :   case AArch64::LDURSWi:
    3370             :   case AArch64::STURXi:
    3371             :   case AArch64::STURWi:
    3372             :   case AArch64::STURBi:
    3373             :   case AArch64::STURHi:
    3374             :   case AArch64::STURSi:
    3375             :   case AArch64::STURDi:
    3376             :   case AArch64::STURQi:
    3377             :   case AArch64::STURBBi:
    3378             :   case AArch64::STURHHi:
    3379             :     Scale = 1;
    3380          10 :     break;
    3381             :   }
    3382             : 
    3383       14484 :   Offset += MI.getOperand(ImmIdx).getImm() * Scale;
    3384             : 
    3385             :   bool useUnscaledOp = false;
    3386             :   // If the offset doesn't match the scale, we rewrite the instruction to
    3387             :   // use the unscaled instruction instead. Likewise, if we have a negative
    3388             :   // offset (and have an unscaled op to use).
    3389        7242 :   if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
    3390             :     useUnscaledOp = true;
    3391             : 
    3392             :   // Use an unscaled addressing mode if the instruction has a negative offset
    3393             :   // (or if the instruction is already using an unscaled addressing mode).
    3394             :   unsigned MaskBits;
    3395        7242 :   if (IsSigned) {
    3396             :     // ldp/stp instructions.
    3397             :     MaskBits = 7;
    3398           6 :     Offset /= Scale;
    3399        7236 :   } else if (UnscaledOp == 0 || useUnscaledOp) {
    3400             :     MaskBits = 9;
    3401             :     IsSigned = true;
    3402             :     Scale = 1;
    3403             :   } else {
    3404             :     MaskBits = 12;
    3405             :     IsSigned = false;
    3406        6861 :     Offset /= Scale;
    3407             :   }
    3408             : 
    3409             :   // Attempt to fold address computation.
    3410        7242 :   int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
    3411        7242 :   int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
    3412        7242 :   if (Offset >= MinOff && Offset <= MaxOff) {
    3413        7063 :     if (EmittableOffset)
    3414        3057 :       *EmittableOffset = Offset;
    3415        7063 :     Offset = 0;
    3416             :   } else {
    3417         179 :     int NewOff = Offset < 0 ? MinOff : MaxOff;
    3418         179 :     if (EmittableOffset)
    3419           1 :       *EmittableOffset = NewOff;
    3420         179 :     Offset = (Offset - NewOff) * Scale;
    3421             :   }
    3422        7242 :   if (OutUseUnscaledOp)
    3423        3058 :     *OutUseUnscaledOp = useUnscaledOp;
    3424        7242 :   if (OutUnscaledOp)
    3425        3058 :     *OutUnscaledOp = UnscaledOp;
    3426        7242 :   return AArch64FrameOffsetCanUpdate |
    3427        7242 :          (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
    3428             : }
    3429             : 
    3430        3259 : bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
    3431             :                                     unsigned FrameReg, int &Offset,
    3432             :                                     const AArch64InstrInfo *TII) {
    3433        3259 :   unsigned Opcode = MI.getOpcode();
    3434        3259 :   unsigned ImmIdx = FrameRegIdx + 1;
    3435             : 
    3436        3259 :   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
    3437         372 :     Offset += MI.getOperand(ImmIdx).getImm();
    3438         372 :     emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
    3439             :                     MI.getOperand(0).getReg(), FrameReg, Offset, TII,
    3440             :                     MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
    3441         186 :     MI.eraseFromParent();
    3442         186 :     Offset = 0;
    3443         186 :     return true;
    3444             :   }
    3445             : 
    3446             :   int NewOffset;
    3447             :   unsigned UnscaledOp;
    3448             :   bool UseUnscaledOp;
    3449             :   int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
    3450        3073 :                                          &UnscaledOp, &NewOffset);
    3451        3073 :   if (Status & AArch64FrameOffsetCanUpdate) {
    3452        3058 :     if (Status & AArch64FrameOffsetIsLegal)
    3453             :       // Replace the FrameIndex with FrameReg.
    3454        6114 :       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
    3455        3058 :     if (UseUnscaledOp)
    3456         175 :       MI.setDesc(TII->get(UnscaledOp));
    3457             : 
    3458        6116 :     MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
    3459        3058 :     return Offset == 0;
    3460             :   }
    3461             : 
    3462             :   return false;
    3463             : }
    3464             : 
    3465           0 : void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
    3466             :   NopInst.setOpcode(AArch64::HINT);
    3467           0 :   NopInst.addOperand(MCOperand::createImm(0));
    3468           0 : }
    3469             : 
    3470             : // AArch64 supports MachineCombiner.
    3471       13317 : bool AArch64InstrInfo::useMachineCombiner() const { return true; }
    3472             : 
    3473             : // True when Opc sets flag
    3474             : static bool isCombineInstrSettingFlag(unsigned Opc) {
    3475        2651 :   switch (Opc) {
    3476             :   case AArch64::ADDSWrr:
    3477             :   case AArch64::ADDSWri:
    3478             :   case AArch64::ADDSXrr:
    3479             :   case AArch64::ADDSXri:
    3480             :   case AArch64::SUBSWrr:
    3481             :   case AArch64::SUBSXrr:
    3482             :   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
    3483             :   case AArch64::SUBSWri:
    3484             :   case AArch64::SUBSXri:
    3485             :     return true;
    3486             :   default:
    3487             :     break;
    3488             :   }
    3489             :   return false;
    3490             : }
    3491             : 
    3492             : // 32b Opcodes that can be combined with a MUL
    3493             : static bool isCombineInstrCandidate32(unsigned Opc) {
    3494      112539 :   switch (Opc) {
    3495             :   case AArch64::ADDWrr:
    3496             :   case AArch64::ADDWri:
    3497             :   case AArch64::SUBWrr:
    3498             :   case AArch64::ADDSWrr:
    3499             :   case AArch64::ADDSWri:
    3500             :   case AArch64::SUBSWrr:
    3501             :   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
    3502             :   case AArch64::SUBWri:
    3503             :   case AArch64::SUBSWri:
    3504             :     return true;
    3505             :   default:
    3506             :     break;
    3507             :   }
    3508             :   return false;
    3509             : }
    3510             : 
    3511             : // 64b Opcodes that can be combined with a MUL
    3512             : static bool isCombineInstrCandidate64(unsigned Opc) {
    3513      111191 :   switch (Opc) {
    3514             :   case AArch64::ADDXrr:
    3515             :   case AArch64::ADDXri:
    3516             :   case AArch64::SUBXrr:
    3517             :   case AArch64::ADDSXrr:
    3518             :   case AArch64::ADDSXri:
    3519             :   case AArch64::SUBSXrr:
    3520             :   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
    3521             :   case AArch64::SUBXri:
    3522             :   case AArch64::SUBSXri:
    3523             :     return true;
    3524             :   default:
    3525             :     break;
    3526             :   }
    3527             :   return false;
    3528             : }
    3529             : 
    3530             : // FP Opcodes that can be combined with a FMUL
    3531      112349 : static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
    3532      224698 :   switch (Inst.getOpcode()) {
    3533             :   default:
    3534      111771 :     break;
    3535         578 :   case AArch64::FADDSrr:
    3536             :   case AArch64::FADDDrr:
    3537             :   case AArch64::FADDv2f32:
    3538             :   case AArch64::FADDv2f64:
    3539             :   case AArch64::FADDv4f32:
    3540             :   case AArch64::FSUBSrr:
    3541             :   case AArch64::FSUBDrr:
    3542             :   case AArch64::FSUBv2f32:
    3543             :   case AArch64::FSUBv2f64:
    3544             :   case AArch64::FSUBv4f32:
    3545         578 :     TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
    3546        1005 :     return (Options.UnsafeFPMath ||
    3547        1005 :             Options.AllowFPOpFusion == FPOpFusion::Fast);
    3548             :   }
    3549      111771 :   return false;
    3550             : }
    3551             : 
    3552             : // Opcodes that can be combined with a MUL
    3553      112539 : static bool isCombineInstrCandidate(unsigned Opc) {
    3554      112539 :   return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
    3555             : }
    3556             : 
    3557             : //
    3558             : // Utility routine that checks if \param MO is defined by an
    3559             : // \param CombineOpc instruction in the basic block \param MBB
    3560        3576 : static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
    3561             :                        unsigned CombineOpc, unsigned ZeroReg = 0,
    3562             :                        bool CheckZeroReg = false) {
    3563        3576 :   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    3564             :   MachineInstr *MI = nullptr;
    3565             : 
    3566        7020 :   if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
    3567        3444 :     MI = MRI.getUniqueVRegDef(MO.getReg());
    3568             :   // And it needs to be in the trace (otherwise, it won't have a depth).
    3569        6684 :   if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
    3570             :     return false;
    3571             :   // Must only used by the user we combine with.
    3572         280 :   if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
    3573             :     return false;
    3574             : 
    3575         266 :   if (CheckZeroReg) {
    3576             :     assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
    3577             :            MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
    3578             :            MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
    3579             :     // The third input reg must be zero.
    3580         198 :     if (MI->getOperand(3).getReg() != ZeroReg)
    3581             :       return false;
    3582             :   }
    3583             : 
    3584             :   return true;
    3585             : }
    3586             : 
    3587             : //
    3588             : // Is \param MO defined by an integer multiply and can be combined?
    3589             : static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
    3590             :                               unsigned MulOpc, unsigned ZeroReg) {
    3591        2857 :   return canCombine(MBB, MO, MulOpc, ZeroReg, true);
    3592             : }
    3593             : 
    3594             : //
    3595             : // Is \param MO defined by a floating-point multiply and can be combined?
    3596             : static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
    3597             :                                unsigned MulOpc) {
    3598         719 :   return canCombine(MBB, MO, MulOpc);
    3599             : }
    3600             : 
    3601             : // TODO: There are many more machine instruction opcodes to match:
    3602             : //       1. Other data types (integer, vectors)
    3603             : //       2. Other math / logic operations (xor, or)
    3604             : //       3. Other forms of the same operation (intrinsics and other variants)
    3605      112294 : bool AArch64InstrInfo::isAssociativeAndCommutative(
    3606             :     const MachineInstr &Inst) const {
    3607      224588 :   switch (Inst.getOpcode()) {
    3608         885 :   case AArch64::FADDDrr:
    3609             :   case AArch64::FADDSrr:
    3610             :   case AArch64::FADDv2f32:
    3611             :   case AArch64::FADDv2f64:
    3612             :   case AArch64::FADDv4f32:
    3613             :   case AArch64::FMULDrr:
    3614             :   case AArch64::FMULSrr:
    3615             :   case AArch64::FMULX32:
    3616             :   case AArch64::FMULX64:
    3617             :   case AArch64::FMULXv2f32:
    3618             :   case AArch64::FMULXv2f64:
    3619             :   case AArch64::FMULXv4f32:
    3620             :   case AArch64::FMULv2f32:
    3621             :   case AArch64::FMULv2f64:
    3622             :   case AArch64::FMULv4f32:
    3623         885 :     return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
    3624             :   default:
    3625             :     return false;
    3626             :   }
    3627             : }
    3628             : 
    3629             : /// Find instructions that can be turned into madd.
    3630      112539 : static bool getMaddPatterns(MachineInstr &Root,
    3631             :                             SmallVectorImpl<MachineCombinerPattern> &Patterns) {
    3632      112539 :   unsigned Opc = Root.getOpcode();
    3633      112539 :   MachineBasicBlock &MBB = *Root.getParent();
    3634             :   bool Found = false;
    3635             : 
    3636      112539 :   if (!isCombineInstrCandidate(Opc))
    3637             :     return false;
    3638             :   if (isCombineInstrSettingFlag(Opc)) {
    3639        1358 :     int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
    3640             :     // When NZCV is live bail out.
    3641        1358 :     if (Cmp_NZCV == -1)
    3642             :       return false;
    3643         606 :     unsigned NewOpc = convertToNonFlagSettingOpc(Root);
    3644             :     // When opcode can't change bail out.
    3645             :     // CHECKME: do we miss any cases for opcode conversion?
    3646         606 :     if (NewOpc == Opc)
    3647             :       return false;
    3648             :     Opc = NewOpc;
    3649             :   }
    3650             : 
    3651        1899 :   switch (Opc) {
    3652             :   default:
    3653             :     break;
    3654         287 :   case AArch64::ADDWrr:
    3655             :     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
    3656             :            "ADDWrr does not have register operands");
    3657         574 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
    3658             :                           AArch64::WZR)) {
    3659           1 :       Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
    3660             :       Found = true;
    3661             :     }
    3662         574 :     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
    3663             :                           AArch64::WZR)) {
    3664           2 :       Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
    3665             :       Found = true;
    3666             :     }
    3667             :     break;
    3668         247 :   case AArch64::ADDXrr:
    3669         494 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
    3670             :                           AArch64::XZR)) {
    3671           7 :       Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
    3672             :       Found = true;
    3673             :     }
    3674         494 :     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
    3675             :                           AArch64::XZR)) {
    3676          39 :       Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
    3677             :       Found = true;
    3678             :     }
    3679             :     break;
    3680         281 :   case AArch64::SUBWrr:
    3681         562 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
    3682             :                           AArch64::WZR)) {
    3683           0 :       Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
    3684             :       Found = true;
    3685             :     }
    3686         562 :     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
    3687             :                           AArch64::WZR)) {
    3688         122 :       Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
    3689             :       Found = true;
    3690             :     }
    3691             :     break;
    3692         143 :   case AArch64::SUBXrr:
    3693         286 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
    3694             :                           AArch64::XZR)) {
    3695           0 :       Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
    3696             :       Found = true;
    3697             :     }
    3698         286 :     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
    3699             :                           AArch64::XZR)) {
    3700          15 :       Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
    3701             :       Found = true;
    3702             :     }
    3703             :     break;
    3704         191 :   case AArch64::ADDWri:
    3705         382 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
    3706             :                           AArch64::WZR)) {
    3707           2 :       Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
    3708             :       Found = true;
    3709             :     }
    3710             :     break;
    3711         546 :   case AArch64::ADDXri:
    3712        1092 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
    3713             :                           AArch64::XZR)) {
    3714           1 :       Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
    3715             :       Found = true;
    3716             :     }
    3717             :     break;
    3718          75 :   case AArch64::SUBWri:
    3719         150 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
    3720             :                           AArch64::WZR)) {
    3721           0 :       Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
    3722             :       Found = true;
    3723             :     }
    3724             :     break;
    3725         129 :   case AArch64::SUBXri:
    3726         258 :     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
    3727             :                           AArch64::XZR)) {
    3728           1 :       Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
    3729             :       Found = true;
    3730             :     }
    3731             :     break;
    3732             :   }
    3733             :   return Found;
    3734             : }
    3735             : /// Floating-Point Support
    3736             : 
    3737             : /// Find instructions that can be turned into madd.
    3738      112349 : static bool getFMAPatterns(MachineInstr &Root,
    3739             :                            SmallVectorImpl<MachineCombinerPattern> &Patterns) {
    3740             : 
    3741      112349 :   if (!isCombineInstrCandidateFP(Root))
    3742             :     return false;
    3743             : 
    3744         183 :   MachineBasicBlock &MBB = *Root.getParent();
    3745             :   bool Found = false;
    3746             : 
    3747         366 :   switch (Root.getOpcode()) {
    3748             :   default:
    3749             :     assert(false && "Unsupported FP instruction in combiner\n");
    3750             :     break;
    3751          55 :   case AArch64::FADDSrr:
    3752             :     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
    3753             :            "FADDWrr does not have register operands");
    3754         110 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
    3755           1 :       Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
    3756             :       Found = true;
    3757         108 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3758             :                                   AArch64::FMULv1i32_indexed)) {
    3759           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
    3760             :       Found = true;
    3761             :     }
    3762         110 :     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
    3763           0 :       Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
    3764             :       Found = true;
    3765         110 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3766             :                                   AArch64::FMULv1i32_indexed)) {
    3767           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
    3768             :       Found = true;
    3769             :     }
    3770             :     break;
    3771          30 :   case AArch64::FADDDrr:
    3772          60 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
    3773           1 :       Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
    3774             :       Found = true;
    3775          58 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3776             :                                   AArch64::FMULv1i64_indexed)) {
    3777           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
    3778             :       Found = true;
    3779             :     }
    3780          60 :     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
    3781           2 :       Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
    3782             :       Found = true;
    3783          56 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3784             :                                   AArch64::FMULv1i64_indexed)) {
    3785           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
    3786             :       Found = true;
    3787             :     }
    3788             :     break;
    3789           5 :   case AArch64::FADDv2f32:
    3790          10 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3791             :                            AArch64::FMULv2i32_indexed)) {
    3792           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
    3793             :       Found = true;
    3794           8 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3795             :                                   AArch64::FMULv2f32)) {
    3796           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
    3797             :       Found = true;
    3798             :     }
    3799          10 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3800             :                            AArch64::FMULv2i32_indexed)) {
    3801           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
    3802             :       Found = true;
    3803          10 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3804             :                                   AArch64::FMULv2f32)) {
    3805           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
    3806             :       Found = true;
    3807             :     }
    3808             :     break;
    3809          10 :   case AArch64::FADDv2f64:
    3810          20 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3811             :                            AArch64::FMULv2i64_indexed)) {
    3812           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
    3813             :       Found = true;
    3814          18 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3815             :                                   AArch64::FMULv2f64)) {
    3816           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
    3817             :       Found = true;
    3818             :     }
    3819          20 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3820             :                            AArch64::FMULv2i64_indexed)) {
    3821           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
    3822             :       Found = true;
    3823          20 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3824             :                                   AArch64::FMULv2f64)) {
    3825           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
    3826             :       Found = true;
    3827             :     }
    3828             :     break;
    3829          31 :   case AArch64::FADDv4f32:
    3830          62 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3831             :                            AArch64::FMULv4i32_indexed)) {
    3832           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
    3833             :       Found = true;
    3834          60 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3835             :                                   AArch64::FMULv4f32)) {
    3836           1 :       Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
    3837             :       Found = true;
    3838             :     }
    3839          62 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3840             :                            AArch64::FMULv4i32_indexed)) {
    3841           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
    3842             :       Found = true;
    3843          62 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3844             :                                   AArch64::FMULv4f32)) {
    3845           0 :       Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
    3846             :       Found = true;
    3847             :     }
    3848             :     break;
    3849             : 
    3850           6 :   case AArch64::FSUBSrr:
    3851          12 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
    3852           0 :       Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
    3853             :       Found = true;
    3854             :     }
    3855          12 :     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
    3856           0 :       Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
    3857             :       Found = true;
    3858          12 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3859             :                                   AArch64::FMULv1i32_indexed)) {
    3860           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
    3861             :       Found = true;
    3862             :     }
    3863          12 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
    3864           2 :       Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
    3865             :       Found = true;
    3866             :     }
    3867             :     break;
    3868           4 :   case AArch64::FSUBDrr:
    3869           8 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
    3870           0 :       Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
    3871             :       Found = true;
    3872             :     }
    3873           8 :     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
    3874           0 :       Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
    3875             :       Found = true;
    3876           8 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3877             :                                   AArch64::FMULv1i64_indexed)) {
    3878           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
    3879             :       Found = true;
    3880             :     }
    3881           8 :     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
    3882           2 :       Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
    3883             :       Found = true;
    3884             :     }
    3885             :     break;
    3886          14 :   case AArch64::FSUBv2f32:
    3887          28 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3888             :                            AArch64::FMULv2i32_indexed)) {
    3889           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
    3890             :       Found = true;
    3891          24 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3892             :                                   AArch64::FMULv2f32)) {
    3893           6 :       Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
    3894             :       Found = true;
    3895             :     }
    3896          28 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3897             :                            AArch64::FMULv2i32_indexed)) {
    3898           0 :       Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
    3899             :       Found = true;
    3900          28 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3901             :                                   AArch64::FMULv2f32)) {
    3902           8 :       Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
    3903             :       Found = true;
    3904             :     }
    3905             :     break;
    3906          16 :   case AArch64::FSUBv2f64:
    3907          32 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3908             :                            AArch64::FMULv2i64_indexed)) {
    3909           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
    3910             :       Found = true;
    3911          28 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3912             :                                   AArch64::FMULv2f64)) {
    3913           6 :       Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
    3914             :       Found = true;
    3915             :     }
    3916          32 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3917             :                            AArch64::FMULv2i64_indexed)) {
    3918           0 :       Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
    3919             :       Found = true;
    3920          32 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3921             :                                   AArch64::FMULv2f64)) {
    3922           8 :       Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
    3923             :       Found = true;
    3924             :     }
    3925             :     break;
    3926          12 :   case AArch64::FSUBv4f32:
    3927          24 :     if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3928             :                            AArch64::FMULv4i32_indexed)) {
    3929           2 :       Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
    3930             :       Found = true;
    3931          20 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
    3932             :                                   AArch64::FMULv4f32)) {
    3933           6 :       Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
    3934             :       Found = true;
    3935             :     }
    3936          24 :     if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3937             :                            AArch64::FMULv4i32_indexed)) {
    3938           0 :       Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
    3939             :       Found = true;
    3940          24 :     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
    3941             :                                   AArch64::FMULv4f32)) {
    3942           8 :       Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
    3943             :       Found = true;
    3944             :     }
    3945             :     break;
    3946             :   }
    3947             :   return Found;
    3948             : }
    3949             : 
    3950             : /// Return true when a code sequence can improve throughput. It
    3951             : /// should be called only for instructions in loops.
    3952             : /// \param Pattern - combiner pattern
    3953          39 : bool AArch64InstrInfo::isThroughputPattern(
    3954             :     MachineCombinerPattern Pattern) const {
    3955             :   switch (Pattern) {
    3956             :   default:
    3957             :     break;
    3958             :   case MachineCombinerPattern::FMULADDS_OP1:
    3959             :   case MachineCombinerPattern::FMULADDS_OP2:
    3960             :   case MachineCombinerPattern::FMULSUBS_OP1:
    3961             :   case MachineCombinerPattern::FMULSUBS_OP2:
    3962             :   case MachineCombinerPattern::FMULADDD_OP1:
    3963             :   case MachineCombinerPattern::FMULADDD_OP2:
    3964             :   case MachineCombinerPattern::FMULSUBD_OP1:
    3965             :   case MachineCombinerPattern::FMULSUBD_OP2:
    3966             :   case MachineCombinerPattern::FNMULSUBS_OP1:
    3967             :   case MachineCombinerPattern::FNMULSUBD_OP1:
    3968             :   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
    3969             :   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
    3970             :   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
    3971             :   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
    3972             :   case MachineCombinerPattern::FMLAv2f32_OP2:
    3973             :   case MachineCombinerPattern::FMLAv2f32_OP1:
    3974             :   case MachineCombinerPattern::FMLAv2f64_OP1:
    3975             :   case MachineCombinerPattern::FMLAv2f64_OP2:
    3976             :   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
    3977             :   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
    3978             :   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
    3979             :   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
    3980             :   case MachineCombinerPattern::FMLAv4f32_OP1:
    3981             :   case MachineCombinerPattern::FMLAv4f32_OP2:
    3982             :   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
    3983             :   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
    3984             :   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
    3985             :   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
    3986             :   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
    3987             :   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
    3988             :   case MachineCombinerPattern::FMLSv2f32_OP2:
    3989             :   case MachineCombinerPattern::FMLSv2f64_OP2:
    3990             :   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
    3991             :   case MachineCombinerPattern::FMLSv4f32_OP2:
    3992             :     return true;
    3993             :   } // end switch (Pattern)
    3994          15 :   return false;
    3995             : }
    3996             : /// Return true when there is potentially a faster code sequence for an
    3997             : /// instruction chain ending in \p Root. All potential patterns are listed in
    3998             : /// the \p Pattern vector. Pattern should be sorted in priority order since the
    3999             : /// pattern evaluator stops checking as soon as it finds a faster sequence.
    4000             : 
    4001      112539 : bool AArch64InstrInfo::getMachineCombinerPatterns(
    4002             :     MachineInstr &Root,
    4003             :     SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
    4004             :   // Integer patterns
    4005      112539 :   if (getMaddPatterns(Root, Patterns))
    4006             :     return true;
    4007             :   // Floating point patterns
    4008      112349 :   if (getFMAPatterns(Root, Patterns))
    4009             :     return true;
    4010             : 
    4011      112294 :   return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
    4012             : }
    4013             : 
    4014             : enum class FMAInstKind { Default, Indexed, Accumulator };
    4015             : /// genFusedMultiply - Generate fused multiply instructions.
    4016             : /// This function supports both integer and floating point instructions.
    4017             : /// A typical example:
    4018             : ///  F|MUL I=A,B,0
    4019             : ///  F|ADD R,I,C
    4020             : ///  ==> F|MADD R,A,B,C
    4021             : /// \param MF Containing MachineFunction
    4022             : /// \param MRI Register information
    4023             : /// \param TII Target information
    4024             : /// \param Root is the F|ADD instruction
    4025             : /// \param [out] InsInstrs is a vector of machine instructions and will
    4026             : /// contain the generated madd instruction
    4027             : /// \param IdxMulOpd is index of operand in Root that is the result of
    4028             : /// the F|MUL. In the example above IdxMulOpd is 1.
    4029             : /// \param MaddOpc the opcode fo the f|madd instruction
    4030             : /// \param RC Register class of operands
    4031             : /// \param kind of fma instruction (addressing mode) to be generated
    4032             : /// \param ReplacedAddend is the result register from the instruction
    4033             : /// replacing the non-combined operand, if any.
    4034             : static MachineInstr *
    4035         279 : genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
    4036             :                  const TargetInstrInfo *TII, MachineInstr &Root,
    4037             :                  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
    4038             :                  unsigned MaddOpc, const TargetRegisterClass *RC,
    4039             :                  FMAInstKind kind = FMAInstKind::Default,
    4040             :                  const unsigned *ReplacedAddend = nullptr) {
    4041             :   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
    4042             : 
    4043         279 :   unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
    4044         558 :   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
    4045         279 :   unsigned ResultReg = Root.getOperand(0).getReg();
    4046         279 :   unsigned SrcReg0 = MUL->getOperand(1).getReg();
    4047             :   bool Src0IsKill = MUL->getOperand(1).isKill();
    4048         279 :   unsigned SrcReg1 = MUL->getOperand(2).getReg();
    4049             :   bool Src1IsKill = MUL->getOperand(2).isKill();
    4050             : 
    4051             :   unsigned SrcReg2;
    4052             :   bool Src2IsKill;
    4053         279 :   if (ReplacedAddend) {
    4054             :     // If we just generated a new addend, we must be it's only use.
    4055          36 :     SrcReg2 = *ReplacedAddend;
    4056             :     Src2IsKill = true;
    4057             :   } else {
    4058         243 :     SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
    4059             :     Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
    4060             :   }
    4061             : 
    4062         279 :   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
    4063         279 :     MRI.constrainRegClass(ResultReg, RC);
    4064         279 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
    4065         279 :     MRI.constrainRegClass(SrcReg0, RC);
    4066         279 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
    4067         279 :     MRI.constrainRegClass(SrcReg1, RC);
    4068         279 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
    4069         279 :     MRI.constrainRegClass(SrcReg2, RC);
    4070             : 
    4071             :   MachineInstrBuilder MIB;
    4072         279 :   if (kind == FMAInstKind::Default)
    4073         585 :     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
    4074         195 :               .addReg(SrcReg0, getKillRegState(Src0IsKill))
    4075         195 :               .addReg(SrcReg1, getKillRegState(Src1IsKill))
    4076         195 :               .addReg(SrcReg2, getKillRegState(Src2IsKill));
    4077          84 :   else if (kind == FMAInstKind::Indexed)
    4078          45 :     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
    4079          15 :               .addReg(SrcReg2, getKillRegState(Src2IsKill))
    4080          15 :               .addReg(SrcReg0, getKillRegState(Src0IsKill))
    4081          15 :               .addReg(SrcReg1, getKillRegState(Src1IsKill))
    4082          15 :               .addImm(MUL->getOperand(3).getImm());
    4083          69 :   else if (kind == FMAInstKind::Accumulator)
    4084         207 :     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
    4085          69 :               .addReg(SrcReg2, getKillRegState(Src2IsKill))
    4086          69 :               .addReg(SrcReg0, getKillRegState(Src0IsKill))
    4087          69 :               .addReg(SrcReg1, getKillRegState(Src1IsKill));
    4088             :   else
    4089             :     assert(false && "Invalid FMA instruction kind \n");
    4090             :   // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
    4091         279 :   InsInstrs.push_back(MIB);
    4092         279 :   return MUL;
    4093             : }
    4094             : 
    4095             : /// genMaddR - Generate madd instruction and combine mul and add using
    4096             : /// an extra virtual register
    4097             : /// Example - an ADD intermediate needs to be stored in a register:
    4098             : ///   MUL I=A,B,0
    4099             : ///   ADD R,I,Imm
    4100             : ///   ==> ORR  V, ZR, Imm
    4101             : ///   ==> MADD R,A,B,V
    4102             : /// \param MF Containing MachineFunction
    4103             : /// \param MRI Register information
    4104             : /// \param TII Target information
    4105             : /// \param Root is the ADD instruction
    4106             : /// \param [out] InsInstrs is a vector of machine instructions and will
    4107             : /// contain the generated madd instruction
    4108             : /// \param IdxMulOpd is index of operand in Root that is the result of
    4109             : /// the MUL. In the example above IdxMulOpd is 1.
    4110             : /// \param MaddOpc the opcode fo the madd instruction
    4111             : /// \param VR is a virtual register that holds the value of an ADD operand
    4112             : /// (V in the example above).
    4113             : /// \param RC Register class of operands
    4114           3 : static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
    4115             :                               const TargetInstrInfo *TII, MachineInstr &Root,
    4116             :                               SmallVectorImpl<MachineInstr *> &InsInstrs,
    4117             :                               unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
    4118             :                               const TargetRegisterClass *RC) {
    4119             :   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
    4120             : 
    4121           6 :   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
    4122           3 :   unsigned ResultReg = Root.getOperand(0).getReg();
    4123           3 :   unsigned SrcReg0 = MUL->getOperand(1).getReg();
    4124             :   bool Src0IsKill = MUL->getOperand(1).isKill();
    4125           3 :   unsigned SrcReg1 = MUL->getOperand(2).getReg();
    4126             :   bool Src1IsKill = MUL->getOperand(2).isKill();
    4127             : 
    4128           3 :   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
    4129           3 :     MRI.constrainRegClass(ResultReg, RC);
    4130           3 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
    4131           3 :     MRI.constrainRegClass(SrcReg0, RC);
    4132           3 :   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
    4133           3 :     MRI.constrainRegClass(SrcReg1, RC);
    4134           3 :   if (TargetRegisterInfo::isVirtualRegister(VR))
    4135           3 :     MRI.constrainRegClass(VR, RC);
    4136             : 
    4137             :   MachineInstrBuilder MIB =
    4138           9 :       BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
    4139           3 :           .addReg(SrcReg0, getKillRegState(Src0IsKill))
    4140           3 :           .addReg(SrcReg1, getKillRegState(Src1IsKill))
    4141           3 :           .addReg(VR);
    4142             :   // Insert the MADD
    4143           3 :   InsInstrs.push_back(MIB);
    4144           3 :   return MUL;
    4145             : }
    4146             : 
    4147             : /// When getMachineCombinerPatterns() finds potential patterns,
    4148             : /// this function generates the instructions that could replace the
    4149             : /// original code sequence
    4150         465 : void AArch64InstrInfo::genAlternativeCodeSequence(
    4151             :     MachineInstr &Root, MachineCombinerPattern Pattern,
    4152             :     SmallVectorImpl<MachineInstr *> &InsInstrs,
    4153             :     SmallVectorImpl<MachineInstr *> &DelInstrs,
    4154             :     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
    4155         465 :   MachineBasicBlock &MBB = *Root.getParent();
    4156         465 :   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    4157             :   MachineFunction &MF = *MBB.getParent();
    4158         465 :   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
    4159             : 
    4160             :   MachineInstr *MUL;
    4161             :   const TargetRegisterClass *RC;
    4162             :   unsigned Opc;
    4163         465 :   switch (Pattern) {
    4164         182 :   default:
    4165             :     // Reassociate instructions.
    4166         182 :     TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
    4167             :                                                 DelInstrs, InstrIdxForVirtReg);
    4168         182 :     return;
    4169           8 :   case MachineCombinerPattern::MULADDW_OP1:
    4170             :   case MachineCombinerPattern::MULADDX_OP1:
    4171             :     // MUL I=A,B,0
    4172             :     // ADD R,I,C
    4173             :     // ==> MADD R,A,B,C
    4174             :     // --- Create(MADD);
    4175           8 :     if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
    4176             :       Opc = AArch64::MADDWrrr;
    4177             :       RC = &AArch64::GPR32RegClass;
    4178             :     } else {
    4179             :       Opc = AArch64::MADDXrrr;
    4180             :       RC = &AArch64::GPR64RegClass;
    4181             :     }
    4182           8 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    4183           8 :     break;
    4184          42 :   case MachineCombinerPattern::MULADDW_OP2:
    4185             :   case MachineCombinerPattern::MULADDX_OP2:
    4186             :     // MUL I=A,B,0
    4187             :     // ADD R,C,I
    4188             :     // ==> MADD R,A,B,C
    4189             :     // --- Create(MADD);
    4190          42 :     if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
    4191             :       Opc = AArch64::MADDWrrr;
    4192             :       RC = &AArch64::GPR32RegClass;
    4193             :     } else {
    4194             :       Opc = AArch64::MADDXrrr;
    4195             :       RC = &AArch64::GPR64RegClass;
    4196             :     }
    4197          42 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    4198          42 :     break;
    4199           3 :   case MachineCombinerPattern::MULADDWI_OP1:
    4200             :   case MachineCombinerPattern::MULADDXI_OP1: {
    4201             :     // MUL I=A,B,0
    4202             :     // ADD R,I,Imm
    4203             :     // ==> ORR  V, ZR, Imm
    4204             :     // ==> MADD R,A,B,V
    4205             :     // --- Create(MADD);
    4206             :     const TargetRegisterClass *OrrRC;
    4207             :     unsigned BitSize, OrrOpc, ZeroReg;
    4208           3 :     if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
    4209             :       OrrOpc = AArch64::ORRWri;
    4210             :       OrrRC = &AArch64::GPR32spRegClass;
    4211             :       BitSize = 32;
    4212             :       ZeroReg = AArch64::WZR;
    4213             :       Opc = AArch64::MADDWrrr;
    4214             :       RC = &AArch64::GPR32RegClass;
    4215             :     } else {
    4216             :       OrrOpc = AArch64::ORRXri;
    4217             :       OrrRC = &AArch64::GPR64spRegClass;
    4218             :       BitSize = 64;
    4219             :       ZeroReg = AArch64::XZR;
    4220             :       Opc = AArch64::MADDXrrr;
    4221             :       RC = &AArch64::GPR64RegClass;
    4222             :     }
    4223           3 :     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
    4224           3 :     uint64_t Imm = Root.getOperand(2).getImm();
    4225             : 
    4226           3 :     if (Root.getOperand(3).isImm()) {
    4227           3 :       unsigned Val = Root.getOperand(3).getImm();
    4228           3 :       Imm = Imm << Val;
    4229             :     }
    4230           3 :     uint64_t UImm = SignExtend64(Imm, BitSize);
    4231             :     uint64_t Encoding;
    4232           3 :     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
    4233             :       MachineInstrBuilder MIB1 =
    4234           6 :           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
    4235           2 :               .addReg(ZeroReg)
    4236           4 :               .addImm(Encoding);
    4237           2 :       InsInstrs.push_back(MIB1);
    4238           2 :       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4239           2 :       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
    4240             :     }
    4241             :     break;
    4242             :   }
    4243           0 :   case MachineCombinerPattern::MULSUBW_OP1:
    4244             :   case MachineCombinerPattern::MULSUBX_OP1: {
    4245             :     // MUL I=A,B,0
    4246             :     // SUB R,I, C
    4247             :     // ==> SUB  V, 0, C
    4248             :     // ==> MADD R,A,B,V // = -C + A*B
    4249             :     // --- Create(MADD);
    4250             :     const TargetRegisterClass *SubRC;
    4251             :     unsigned SubOpc, ZeroReg;
    4252           0 :     if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
    4253             :       SubOpc = AArch64::SUBWrr;
    4254             :       SubRC = &AArch64::GPR32spRegClass;
    4255             :       ZeroReg = AArch64::WZR;
    4256             :       Opc = AArch64::MADDWrrr;
    4257             :       RC = &AArch64::GPR32RegClass;
    4258             :     } else {
    4259             :       SubOpc = AArch64::SUBXrr;
    4260             :       SubRC = &AArch64::GPR64spRegClass;
    4261             :       ZeroReg = AArch64::XZR;
    4262             :       Opc = AArch64::MADDXrrr;
    4263             :       RC = &AArch64::GPR64RegClass;
    4264             :     }
    4265           0 :     unsigned NewVR = MRI.createVirtualRegister(SubRC);
    4266             :     // SUB NewVR, 0, C
    4267             :     MachineInstrBuilder MIB1 =
    4268           0 :         BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
    4269           0 :             .addReg(ZeroReg)
    4270           0 :             .add(Root.getOperand(2));
    4271           0 :     InsInstrs.push_back(MIB1);
    4272           0 :     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4273           0 :     MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
    4274             :     break;
    4275             :   }
    4276         137 :   case MachineCombinerPattern::MULSUBW_OP2:
    4277             :   case MachineCombinerPattern::MULSUBX_OP2:
    4278             :     // MUL I=A,B,0
    4279             :     // SUB R,C,I
    4280             :     // ==> MSUB R,A,B,C (computes C - A*B)
    4281             :     // --- Create(MSUB);
    4282         137 :     if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
    4283             :       Opc = AArch64::MSUBWrrr;
    4284             :       RC = &AArch64::GPR32RegClass;
    4285             :     } else {
    4286             :       Opc = AArch64::MSUBXrrr;
    4287             :       RC = &AArch64::GPR64RegClass;
    4288             :     }
    4289         137 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    4290         137 :     break;
    4291           1 :   case MachineCombinerPattern::MULSUBWI_OP1:
    4292             :   case MachineCombinerPattern::MULSUBXI_OP1: {
    4293             :     // MUL I=A,B,0
    4294             :     // SUB R,I, Imm
    4295             :     // ==> ORR  V, ZR, -Imm
    4296             :     // ==> MADD R,A,B,V // = -Imm + A*B
    4297             :     // --- Create(MADD);
    4298             :     const TargetRegisterClass *OrrRC;
    4299             :     unsigned BitSize, OrrOpc, ZeroReg;
    4300           1 :     if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
    4301             :       OrrOpc = AArch64::ORRWri;
    4302             :       OrrRC = &AArch64::GPR32spRegClass;
    4303             :       BitSize = 32;
    4304             :       ZeroReg = AArch64::WZR;
    4305             :       Opc = AArch64::MADDWrrr;
    4306             :       RC = &AArch64::GPR32RegClass;
    4307             :     } else {
    4308             :       OrrOpc = AArch64::ORRXri;
    4309             :       OrrRC = &AArch64::GPR64spRegClass;
    4310             :       BitSize = 64;
    4311             :       ZeroReg = AArch64::XZR;
    4312             :       Opc = AArch64::MADDXrrr;
    4313             :       RC = &AArch64::GPR64RegClass;
    4314             :     }
    4315           1 :     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
    4316           1 :     uint64_t Imm = Root.getOperand(2).getImm();
    4317           1 :     if (Root.getOperand(3).isImm()) {
    4318           1 :       unsigned Val = Root.getOperand(3).getImm();
    4319           1 :       Imm = Imm << Val;
    4320             :     }
    4321           2 :     uint64_t UImm = SignExtend64(-Imm, BitSize);
    4322             :     uint64_t Encoding;
    4323           1 :     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
    4324             :       MachineInstrBuilder MIB1 =
    4325           3 :           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
    4326           1 :               .addReg(ZeroReg)
    4327           2 :               .addImm(Encoding);
    4328           1 :       InsInstrs.push_back(MIB1);
    4329           1 :       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4330           1 :       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
    4331             :     }
    4332             :     break;
    4333             :   }
    4334             :   // Floating Point Support
    4335           2 :   case MachineCombinerPattern::FMULADDS_OP1:
    4336             :   case MachineCombinerPattern::FMULADDD_OP1:
    4337             :     // MUL I=A,B,0
    4338             :     // ADD R,I,C
    4339             :     // ==> MADD R,A,B,C
    4340             :     // --- Create(MADD);
    4341           2 :     if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
    4342             :       Opc = AArch64::FMADDSrrr;
    4343             :       RC = &AArch64::FPR32RegClass;
    4344             :     } else {
    4345             :       Opc = AArch64::FMADDDrrr;
    4346             :       RC = &AArch64::FPR64RegClass;
    4347             :     }
    4348           2 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    4349           2 :     break;
    4350           2 :   case MachineCombinerPattern::FMULADDS_OP2:
    4351             :   case MachineCombinerPattern::FMULADDD_OP2:
    4352             :     // FMUL I=A,B,0
    4353             :     // FADD R,C,I
    4354             :     // ==> FMADD R,A,B,C
    4355             :     // --- Create(FMADD);
    4356           2 :     if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
    4357             :       Opc = AArch64::FMADDSrrr;
    4358             :       RC = &AArch64::FPR32RegClass;
    4359             :     } else {
    4360             :       Opc = AArch64::FMADDDrrr;
    4361             :       RC = &AArch64::FPR64RegClass;
    4362             :     }
    4363           2 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    4364           2 :     break;
    4365             : 
    4366           1 :   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
    4367             :     Opc = AArch64::FMLAv1i32_indexed;
    4368             :     RC = &AArch64::FPR32RegClass;
    4369           1 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4370             :                            FMAInstKind::Indexed);
    4371           1 :     break;
    4372           0 :   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
    4373             :     Opc = AArch64::FMLAv1i32_indexed;
    4374             :     RC = &AArch64::FPR32RegClass;
    4375           0 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4376             :                            FMAInstKind::Indexed);
    4377           0 :     break;
    4378             : 
    4379           1 :   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
    4380             :     Opc = AArch64::FMLAv1i64_indexed;
    4381             :     RC = &AArch64::FPR64RegClass;
    4382           1 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4383             :                            FMAInstKind::Indexed);
    4384           1 :     break;
    4385           0 :   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
    4386             :     Opc = AArch64::FMLAv1i64_indexed;
    4387             :     RC = &AArch64::FPR64RegClass;
    4388           0 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4389             :                            FMAInstKind::Indexed);
    4390           0 :     break;
    4391             : 
    4392           2 :   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
    4393             :   case MachineCombinerPattern::FMLAv2f32_OP1:
    4394             :     RC = &AArch64::FPR64RegClass;
    4395           2 :     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
    4396             :       Opc = AArch64::FMLAv2i32_indexed;
    4397           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4398             :                              FMAInstKind::Indexed);
    4399             :     } else {
    4400             :       Opc = AArch64::FMLAv2f32;
    4401           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4402             :                              FMAInstKind::Accumulator);
    4403             :     }
    4404             :     break;
    4405           0 :   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
    4406             :   case MachineCombinerPattern::FMLAv2f32_OP2:
    4407             :     RC = &AArch64::FPR64RegClass;
    4408           0 :     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
    4409             :       Opc = AArch64::FMLAv2i32_indexed;
    4410           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4411             :                              FMAInstKind::Indexed);
    4412             :     } else {
    4413             :       Opc = AArch64::FMLAv2f32;
    4414           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4415             :                              FMAInstKind::Accumulator);
    4416             :     }
    4417             :     break;
    4418             : 
    4419           2 :   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
    4420             :   case MachineCombinerPattern::FMLAv2f64_OP1:
    4421             :     RC = &AArch64::FPR128RegClass;
    4422           2 :     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
    4423             :       Opc = AArch64::FMLAv2i64_indexed;
    4424           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4425             :                              FMAInstKind::Indexed);
    4426             :     } else {
    4427             :       Opc = AArch64::FMLAv2f64;
    4428           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4429             :                              FMAInstKind::Accumulator);
    4430             :     }
    4431             :     break;
    4432           0 :   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
    4433             :   case MachineCombinerPattern::FMLAv2f64_OP2:
    4434             :     RC = &AArch64::FPR128RegClass;
    4435           0 :     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
    4436             :       Opc = AArch64::FMLAv2i64_indexed;
    4437           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4438             :                              FMAInstKind::Indexed);
    4439             :     } else {
    4440             :       Opc = AArch64::FMLAv2f64;
    4441           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4442             :                              FMAInstKind::Accumulator);
    4443             :     }
    4444             :     break;
    4445             : 
    4446           2 :   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
    4447             :   case MachineCombinerPattern::FMLAv4f32_OP1:
    4448             :     RC = &AArch64::FPR128RegClass;
    4449           2 :     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
    4450             :       Opc = AArch64::FMLAv4i32_indexed;
    4451           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4452             :                              FMAInstKind::Indexed);
    4453             :     } else {
    4454             :       Opc = AArch64::FMLAv4f32;
    4455           1 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4456             :                              FMAInstKind::Accumulator);
    4457             :     }
    4458             :     break;
    4459             : 
    4460           0 :   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
    4461             :   case MachineCombinerPattern::FMLAv4f32_OP2:
    4462             :     RC = &AArch64::FPR128RegClass;
    4463           0 :     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
    4464             :       Opc = AArch64::FMLAv4i32_indexed;
    4465           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4466             :                              FMAInstKind::Indexed);
    4467             :     } else {
    4468             :       Opc = AArch64::FMLAv4f32;
    4469           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4470             :                              FMAInstKind::Accumulator);
    4471             :     }
    4472             :     break;
    4473             : 
    4474           0 :   case MachineCombinerPattern::FMULSUBS_OP1:
    4475             :   case MachineCombinerPattern::FMULSUBD_OP1: {
    4476             :     // FMUL I=A,B,0
    4477             :     // FSUB R,I,C
    4478             :     // ==> FNMSUB R,A,B,C // = -C + A*B
    4479             :     // --- Create(FNMSUB);
    4480           0 :     if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
    4481             :       Opc = AArch64::FNMSUBSrrr;
    4482             :       RC = &AArch64::FPR32RegClass;
    4483             :     } else {
    4484             :       Opc = AArch64::FNMSUBDrrr;
    4485             :       RC = &AArch64::FPR64RegClass;
    4486             :     }
    4487           0 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    4488           0 :     break;
    4489             :   }
    4490             : 
    4491           4 :   case MachineCombinerPattern::FNMULSUBS_OP1:
    4492             :   case MachineCombinerPattern::FNMULSUBD_OP1: {
    4493             :     // FNMUL I=A,B,0
    4494             :     // FSUB R,I,C
    4495             :     // ==> FNMADD R,A,B,C // = -A*B - C
    4496             :     // --- Create(FNMADD);
    4497           4 :     if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
    4498             :       Opc = AArch64::FNMADDSrrr;
    4499             :       RC = &AArch64::FPR32RegClass;
    4500             :     } else {
    4501             :       Opc = AArch64::FNMADDDrrr;
    4502             :       RC = &AArch64::FPR64RegClass;
    4503             :     }
    4504           4 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
    4505           4 :     break;
    4506             :   }
    4507             : 
    4508           0 :   case MachineCombinerPattern::FMULSUBS_OP2:
    4509             :   case MachineCombinerPattern::FMULSUBD_OP2: {
    4510             :     // FMUL I=A,B,0
    4511             :     // FSUB R,C,I
    4512             :     // ==> FMSUB R,A,B,C (computes C - A*B)
    4513             :     // --- Create(FMSUB);
    4514           0 :     if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
    4515             :       Opc = AArch64::FMSUBSrrr;
    4516             :       RC = &AArch64::FPR32RegClass;
    4517             :     } else {
    4518             :       Opc = AArch64::FMSUBDrrr;
    4519             :       RC = &AArch64::FPR64RegClass;
    4520             :     }
    4521           0 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
    4522           0 :     break;
    4523             :   }
    4524             : 
    4525           2 :   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
    4526             :     Opc = AArch64::FMLSv1i32_indexed;
    4527             :     RC = &AArch64::FPR32RegClass;
    4528           2 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4529             :                            FMAInstKind::Indexed);
    4530           2 :     break;
    4531             : 
    4532           2 :   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
    4533             :     Opc = AArch64::FMLSv1i64_indexed;
    4534             :     RC = &AArch64::FPR64RegClass;
    4535           2 :     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4536             :                            FMAInstKind::Indexed);
    4537           2 :     break;
    4538             : 
    4539          12 :   case MachineCombinerPattern::FMLSv2f32_OP2:
    4540             :   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
    4541             :     RC = &AArch64::FPR64RegClass;
    4542          12 :     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
    4543             :       Opc = AArch64::FMLSv2i32_indexed;
    4544           2 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4545             :                              FMAInstKind::Indexed);
    4546             :     } else {
    4547             :       Opc = AArch64::FMLSv2f32;
    4548          10 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4549             :                              FMAInstKind::Accumulator);
    4550             :     }
    4551             :     break;
    4552             : 
    4553          12 :   case MachineCombinerPattern::FMLSv2f64_OP2:
    4554             :   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
    4555             :     RC = &AArch64::FPR128RegClass;
    4556          12 :     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
    4557             :       Opc = AArch64::FMLSv2i64_indexed;
    4558           2 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4559             :                              FMAInstKind::Indexed);
    4560             :     } else {
    4561             :       Opc = AArch64::FMLSv2f64;
    4562          10 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4563             :                              FMAInstKind::Accumulator);
    4564             :     }
    4565             :     break;
    4566             : 
    4567          12 :   case MachineCombinerPattern::FMLSv4f32_OP2:
    4568             :   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
    4569             :     RC = &AArch64::FPR128RegClass;
    4570          12 :     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
    4571             :       Opc = AArch64::FMLSv4i32_indexed;
    4572           2 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4573             :                              FMAInstKind::Indexed);
    4574             :     } else {
    4575             :       Opc = AArch64::FMLSv4f32;
    4576          10 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
    4577             :                              FMAInstKind::Accumulator);
    4578             :     }
    4579             :     break;
    4580          12 :   case MachineCombinerPattern::FMLSv2f32_OP1:
    4581             :   case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
    4582             :     RC = &AArch64::FPR64RegClass;
    4583          12 :     unsigned NewVR = MRI.createVirtualRegister(RC);
    4584             :     MachineInstrBuilder MIB1 =
    4585          24 :         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
    4586          12 :             .add(Root.getOperand(2));
    4587          12 :     InsInstrs.push_back(MIB1);
    4588          24 :     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4589          12 :     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
    4590             :       Opc = AArch64::FMLAv2i32_indexed;
    4591           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4592             :                              FMAInstKind::Indexed, &NewVR);
    4593             :     } else {
    4594             :       Opc = AArch64::FMLAv2f32;
    4595          12 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4596             :                              FMAInstKind::Accumulator, &NewVR);
    4597             :     }
    4598             :     break;
    4599             :   }
    4600          12 :   case MachineCombinerPattern::FMLSv4f32_OP1:
    4601             :   case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
    4602             :     RC = &AArch64::FPR128RegClass;
    4603          12 :     unsigned NewVR = MRI.createVirtualRegister(RC);
    4604             :     MachineInstrBuilder MIB1 =
    4605          24 :         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
    4606          12 :             .add(Root.getOperand(2));
    4607          12 :     InsInstrs.push_back(MIB1);
    4608          24 :     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4609          12 :     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
    4610             :       Opc = AArch64::FMLAv4i32_indexed;
    4611           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4612             :                              FMAInstKind::Indexed, &NewVR);
    4613             :     } else {
    4614             :       Opc = AArch64::FMLAv4f32;
    4615          12 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4616             :                              FMAInstKind::Accumulator, &NewVR);
    4617             :     }
    4618             :     break;
    4619             :   }
    4620          12 :   case MachineCombinerPattern::FMLSv2f64_OP1:
    4621             :   case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
    4622             :     RC = &AArch64::FPR128RegClass;
    4623          12 :     unsigned NewVR = MRI.createVirtualRegister(RC);
    4624             :     MachineInstrBuilder MIB1 =
    4625          24 :         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
    4626          12 :             .add(Root.getOperand(2));
    4627          12 :     InsInstrs.push_back(MIB1);
    4628          24 :     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
    4629          12 :     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
    4630             :       Opc = AArch64::FMLAv2i64_indexed;
    4631           0 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4632             :                              FMAInstKind::Indexed, &NewVR);
    4633             :     } else {
    4634             :       Opc = AArch64::FMLAv2f64;
    4635          12 :       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
    4636             :                              FMAInstKind::Accumulator, &NewVR);
    4637             :     }
    4638             :     break;
    4639             :   }
    4640             :   } // end switch (Pattern)
    4641             :   // Record MUL and ADD/SUB for deletion
    4642         283 :   DelInstrs.push_back(MUL);
    4643         283 :   DelInstrs.push_back(&Root);
    4644             : }
    4645             : 
    4646             : /// Replace csincr-branch sequence by simple conditional branch
    4647             : ///
    4648             : /// Examples:
    4649             : /// 1. \code
    4650             : ///   csinc  w9, wzr, wzr, <condition code>
    4651             : ///   tbnz   w9, #0, 0x44
    4652             : ///    \endcode
    4653             : /// to
    4654             : ///    \code
    4655             : ///   b.<inverted condition code>
    4656             : ///    \endcode
    4657             : ///
    4658             : /// 2. \code
    4659             : ///   csinc w9, wzr, wzr, <condition code>
    4660             : ///   tbz   w9, #0, 0x44
    4661             : ///    \endcode
    4662             : /// to
    4663             : ///    \code
    4664             : ///   b.<condition code>
    4665             : ///    \endcode
    4666             : ///
    4667             : /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
    4668             : /// compare's constant operand is power of 2.
    4669             : ///
    4670             : /// Examples:
    4671             : ///    \code
    4672             : ///   and  w8, w8, #0x400
    4673             : ///   cbnz w8, L1
    4674             : ///    \endcode
    4675             : /// to
    4676             : ///    \code
    4677             : ///   tbnz w8, #10, L1
    4678             : ///    \endcode
    4679             : ///
    4680             : /// \param  MI Conditional Branch
    4681             : /// \return True when the simple conditional branch is generated
    4682             : ///
    4683         949 : bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
    4684             :   bool IsNegativeBranch = false;
    4685             :   bool IsTestAndBranch = false;
    4686             :   unsigned TargetBBInMI = 0;
    4687        1898 :   switch (MI.getOpcode()) {
    4688           0 :   default:
    4689           0 :     llvm_unreachable("Unknown branch instruction?");
    4690             :   case AArch64::Bcc:
    4691             :     return false;
    4692             :   case AArch64::CBZW:
    4693             :   case AArch64::CBZX:
    4694             :     TargetBBInMI = 1;
    4695             :     break;
    4696         184 :   case AArch64::CBNZW:
    4697             :   case AArch64::CBNZX:
    4698             :     TargetBBInMI = 1;
    4699             :     IsNegativeBranch = true;
    4700         184 :     break;
    4701          80 :   case AArch64::TBZW:
    4702             :   case AArch64::TBZX:
    4703             :     TargetBBInMI = 2;
    4704             :     IsTestAndBranch = true;
    4705          80 :     break;
    4706          30 :   case AArch64::TBNZW:
    4707             :   case AArch64::TBNZX:
    4708             :     TargetBBInMI = 2;
    4709             :     IsNegativeBranch = true;
    4710             :     IsTestAndBranch = true;
    4711          30 :     break;
    4712             :   }
    4713             :   // So we increment a zero register and test for bits other
    4714             :   // than bit 0? Conservatively bail out in case the verifier
    4715             :   // missed this case.
    4716         386 :   if (IsTestAndBranch && MI.getOperand(1).getImm())
    4717             :     return false;
    4718             : 
    4719             :   // Find Definition.
    4720             :   assert(MI.getParent() && "Incomplete machine instruciton\n");
    4721         347 :   MachineBasicBlock *MBB = MI.getParent();
    4722         347 :   MachineFunction *MF = MBB->getParent();
    4723         347 :   MachineRegisterInfo *MRI = &MF->getRegInfo();
    4724         347 :   unsigned VReg = MI.getOperand(0).getReg();
    4725         347 :   if (!TargetRegisterInfo::isVirtualRegister(VReg))
    4726             :     return false;
    4727             : 
    4728         347 :   MachineInstr *DefMI = MRI->getVRegDef(VReg);
    4729             : 
    4730             :   // Look through COPY instructions to find definition.
    4731         437 :   while (DefMI->isCopy()) {
    4732         199 :     unsigned CopyVReg = DefMI->getOperand(1).getReg();
    4733         199 :     if (!MRI->hasOneNonDBGUse(CopyVReg))
    4734             :       return false;
    4735         142 :     if (!MRI->hasOneDef(CopyVReg))
    4736             :       return false;
    4737          45 :     DefMI = MRI->getVRegDef(CopyVReg);
    4738             :   }
    4739             : 
    4740         193 :   switch (DefMI->getOpcode()) {
    4741             :   default:
    4742             :     return false;
    4743             :   // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
    4744           7 :   case AArch64::ANDWri:
    4745             :   case AArch64::ANDXri: {
    4746           7 :     if (IsTestAndBranch)
    4747             :       return false;
    4748           7 :     if (DefMI->getParent() != MBB)
    4749             :       return false;
    4750           6 :     if (!MRI->hasOneNonDBGUse(VReg))
    4751             :       return false;
    4752             : 
    4753           5 :     bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
    4754          10 :     uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
    4755          10 :         DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
    4756             :     if (!isPowerOf2_64(Mask))
    4757             :       return false;
    4758             : 
    4759             :     MachineOperand &MO = DefMI->getOperand(1);
    4760           5 :     unsigned NewReg = MO.getReg();
    4761           5 :     if (!TargetRegisterInfo::isVirtualRegister(NewReg))
    4762             :       return false;
    4763             : 
    4764             :     assert(!MRI->def_empty(NewReg) && "Register must be defined.");
    4765             : 
    4766             :     MachineBasicBlock &RefToMBB = *MBB;
    4767           5 :     MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
    4768             :     DebugLoc DL = MI.getDebugLoc();
    4769             :     unsigned Imm = Log2_64(Mask);
    4770             :     unsigned Opc = (Imm < 32)
    4771           5 :                        ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
    4772             :                        : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
    4773          15 :     MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
    4774           5 :                               .addReg(NewReg)
    4775           5 :                               .addImm(Imm)
    4776           5 :                               .addMBB(TBB);
    4777             :     // Register lives on to the CBZ now.
    4778             :     MO.setIsKill(false);
    4779             : 
    4780             :     // For immediate smaller than 32, we need to use the 32-bit
    4781             :     // variant (W) in all cases. Indeed the 64-bit variant does not
    4782             :     // allow to encode them.
    4783             :     // Therefore, if the input register is 64-bit, we need to take the
    4784             :     // 32-bit sub-part.
    4785           5 :     if (!Is32Bit && Imm < 32)
    4786           3 :       NewMI->getOperand(0).setSubReg(AArch64::sub_32);
    4787           5 :     MI.eraseFromParent();
    4788             :     return true;
    4789             :   }
    4790             :   // Look for CSINC
    4791           1 :   case AArch64::CSINCWr:
    4792             :   case AArch64::CSINCXr: {
    4793           2 :     if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
    4794           1 :           DefMI->getOperand(2).getReg() == AArch64::WZR) &&
    4795           0 :         !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
    4796           0 :           DefMI->getOperand(2).getReg() == AArch64::XZR))
    4797             :       return false;
    4798             : 
    4799           1 :     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
    4800             :       return false;
    4801             : 
    4802           1 :     AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
    4803             :     // Convert only when the condition code is not modified between
    4804             :     // the CSINC and the branch. The CC may be used by other
    4805             :     // instructions in between.
    4806           1 :     if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
    4807             :       return false;
    4808             :     MachineBasicBlock &RefToMBB = *MBB;
    4809           2 :     MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
    4810             :     DebugLoc DL = MI.getDebugLoc();
    4811           1 :     if (IsNegativeBranch)
    4812             :       CC = AArch64CC::getInvertedCondCode(CC);
    4813           2 :     BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
    4814           1 :     MI.eraseFromParent();
    4815             :     return true;
    4816             :   }
    4817             :   }
    4818             : }
    4819             : 
    4820             : std::pair<unsigned, unsigned>
    4821        2397 : AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
    4822             :   const unsigned Mask = AArch64II::MO_FRAGMENT;
    4823        2397 :   return std::make_pair(TF & Mask, TF & ~Mask);
    4824             : }
    4825             : 
    4826             : ArrayRef<std::pair<unsigned, const char *>>
    4827        2404 : AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
    4828             :   using namespace AArch64II;
    4829             : 
    4830             :   static const std::pair<unsigned, const char *> TargetFlags[] = {
    4831             :       {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
    4832             :       {MO_G3, "aarch64-g3"},     {MO_G2, "aarch64-g2"},
    4833             :       {MO_G1, "aarch64-g1"},     {MO_G0, "aarch64-g0"},
    4834             :       {MO_HI12, "aarch64-hi12"}};
    4835        2404 :   return makeArrayRef(TargetFlags);
    4836             : }
    4837             : 
    4838             : ArrayRef<std::pair<unsigned, const char *>>
    4839        1209 : AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
    4840             :   using namespace AArch64II;
    4841             : 
    4842             :   static const std::pair<unsigned, const char *> TargetFlags[] = {
    4843             :       {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}};
    4844        1209 :   return makeArrayRef(TargetFlags);
    4845             : }
    4846             : 
    4847             : ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
    4848          42 : AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
    4849             :   static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
    4850             :       {{MOSuppressPair, "aarch64-suppress-pair"},
    4851             :        {MOStridedAccess, "aarch64-strided-access"}};
    4852          42 :   return makeArrayRef(TargetFlags);
    4853             : }
    4854             : 
    4855             :   /// Constants defining how certain sequences should be outlined.
    4856             :   /// This encompasses how an outlined function should be called, and what kind of
    4857             :   /// frame should be emitted for that outlined function.
    4858             :   ///
    4859             :   /// \p MachineOutlinerDefault implies that the function should be called with
    4860             :   /// a save and restore of LR to the stack.
    4861             :   ///
    4862             :   /// That is,
    4863             :   ///
    4864             :   /// I1     Save LR                    OUTLINED_FUNCTION:
    4865             :   /// I2 --> BL OUTLINED_FUNCTION       I1
    4866             :   /// I3     Restore LR                 I2
    4867             :   ///                                   I3
    4868             :   ///                                   RET
    4869             :   ///
    4870             :   /// * Call construction overhead: 3 (save + BL + restore)
    4871             :   /// * Frame construction overhead: 1 (ret)
    4872             :   /// * Requires stack fixups? Yes
    4873             :   ///
    4874             :   /// \p MachineOutlinerTailCall implies that the function is being created from
    4875             :   /// a sequence of instructions ending in a return.
    4876             :   ///
    4877             :   /// That is,
    4878             :   ///
    4879             :   /// I1                             OUTLINED_FUNCTION:
    4880             :   /// I2 --> B OUTLINED_FUNCTION     I1
    4881             :   /// RET                            I2
    4882             :   ///                                RET
    4883             :   ///
    4884             :   /// * Call construction overhead: 1 (B)
    4885             :   /// * Frame construction overhead: 0 (Return included in sequence)
    4886             :   /// * Requires stack fixups? No
    4887             :   ///
    4888             :   /// \p MachineOutlinerNoLRSave implies that the function should be called using
    4889             :   /// a BL instruction, but doesn't require LR to be saved and restored. This
    4890             :   /// happens when LR is known to be dead.
    4891             :   ///
    4892             :   /// That is,
    4893             :   ///
    4894             :   /// I1                                OUTLINED_FUNCTION:
    4895             :   /// I2 --> BL OUTLINED_FUNCTION       I1
    4896             :   /// I3                                I2
    4897             :   ///                                   I3
    4898             :   ///                                   RET
    4899             :   ///
    4900             :   /// * Call construction overhead: 1 (BL)
    4901             :   /// * Frame construction overhead: 1 (RET)
    4902             :   /// * Requires stack fixups? No
    4903             :   ///
    4904             :   /// \p MachineOutlinerThunk implies that the function is being created from
    4905             :   /// a sequence of instructions ending in a call. The outlined function is
    4906             :   /// called with a BL instruction, and the outlined function tail-calls the
    4907             :   /// original call destination.
    4908             :   ///
    4909             :   /// That is,
    4910             :   ///
    4911             :   /// I1                                OUTLINED_FUNCTION:
    4912             :   /// I2 --> BL OUTLINED_FUNCTION       I1
    4913             :   /// BL f                              I2
    4914             :   ///                                   B f
    4915             :   /// * Call construction overhead: 1 (BL)
    4916             :   /// * Frame construction overhead: 0
    4917             :   /// * Requires stack fixups? No
    4918             :   ///
    4919             : enum MachineOutlinerClass {
    4920             :   MachineOutlinerDefault,  /// Emit a save, restore, call, and return.
    4921             :   MachineOutlinerTailCall, /// Only emit a branch.
    4922             :   MachineOutlinerNoLRSave, /// Emit a call and return.
    4923             :   MachineOutlinerThunk,    /// Emit a call and tail-call.
    4924             : };
    4925             : 
    4926             : enum MachineOutlinerMBBFlags {
    4927             :   LRUnavailableSomewhere = 0x2,
    4928             :   HasCalls = 0x4
    4929             : };
    4930             : 
    4931         103 : bool AArch64InstrInfo::canOutlineWithoutLRSave(
    4932             :     MachineBasicBlock::iterator &CallInsertionPt) const {
    4933             :   // Was LR saved in the function containing this basic block?
    4934         103 :   MachineBasicBlock &MBB = *(CallInsertionPt->getParent());
    4935         103 :   LiveRegUnits LRU(getRegisterInfo());
    4936         103 :   LRU.addLiveOuts(MBB);
    4937             : 
    4938             :   // Get liveness information from the end of the block to the end of the
    4939             :   // prospective outlined region.
    4940         103 :   std::for_each(MBB.rbegin(),
    4941             :                 (MachineBasicBlock::reverse_iterator)CallInsertionPt,
    4942         673 :                 [&LRU](MachineInstr &MI) { LRU.stepBackward(MI); });
    4943             : 
    4944             :   // If the link register is available at this point, then we can safely outline
    4945             :   // the region without saving/restoring LR. Otherwise, we must emit a save and
    4946             :   // restore.
    4947         206 :   return LRU.available(AArch64::LR);
    4948             : }
    4949             : 
    4950             : outliner::TargetCostInfo
    4951         104 : AArch64InstrInfo::getOutlininingCandidateInfo(
    4952             :     std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
    4953             :   unsigned SequenceSize = std::accumulate(
    4954         104 :       RepeatedSequenceLocs[0].front(),
    4955         104 :       std::next(RepeatedSequenceLocs[0].back()),
    4956             :       0, [this](unsigned Sum, const MachineInstr &MI) {
    4957         600 :         return Sum + getInstSizeInBytes(MI);
    4958         704 :       });
    4959             :   unsigned CallID = MachineOutlinerDefault;
    4960             :   unsigned FrameID = MachineOutlinerDefault;
    4961             :   unsigned NumBytesForCall = 12;
    4962             :   unsigned NumBytesToCreateFrame = 4;
    4963             : 
    4964             :   auto DoesntNeedLRSave = 
    4965         103 :     [this](outliner::Candidate &I) {return canOutlineWithoutLRSave(I.back());};
    4966             : 
    4967         208 :   unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
    4968             : 
    4969             :   // If the last instruction in any candidate is a terminator, then we should
    4970             :   // tail call all of the candidates.
    4971         104 :   if (RepeatedSequenceLocs[0].back()->isTerminator()) {
    4972             :     CallID = MachineOutlinerTailCall;
    4973             :     FrameID = MachineOutlinerTailCall;
    4974             :     NumBytesForCall = 4;
    4975             :     NumBytesToCreateFrame = 0;
    4976             :   }
    4977             : 
    4978         100 :   else if (LastInstrOpcode == AArch64::BL || LastInstrOpcode == AArch64::BLR) {
    4979             :     // FIXME: Do we need to check if the code after this uses the value of LR?
    4980             :     CallID = MachineOutlinerThunk;
    4981             :     FrameID = MachineOutlinerThunk;
    4982             :     NumBytesForCall = 4;
    4983             :     NumBytesToCreateFrame = 0;
    4984             :   }
    4985             : 
    4986          86 :   else if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
    4987             :                        DoesntNeedLRSave)) {
    4988             :     CallID = MachineOutlinerNoLRSave;
    4989             :     FrameID = MachineOutlinerNoLRSave;
    4990             :     NumBytesForCall = 4;
    4991             :     NumBytesToCreateFrame = 4;
    4992             :   }
    4993             : 
    4994             :   // Check if the range contains a call. These require a save + restore of the
    4995             :   // link register.
    4996         104 :   if (std::any_of(RepeatedSequenceLocs[0].front(),
    4997         104 :                   RepeatedSequenceLocs[0].back(),
    4998             :                   [](const MachineInstr &MI) { return MI.isCall(); }))
    4999           7 :     NumBytesToCreateFrame += 8; // Save + restore the link register.
    5000             : 
    5001             :   // Handle the last instruction separately. If this is a tail call, then the
    5002             :   // last instruction is a call. We don't want to save + restore in this case.
    5003             :   // However, it could be possible that the last instruction is a call without
    5004             :   // it being valid to tail call this sequence. We should consider this as well.
    5005         194 :   else if (FrameID != MachineOutlinerThunk &&
    5006         273 :            FrameID != MachineOutlinerTailCall &&
    5007          79 :            RepeatedSequenceLocs[0].back()->isCall())
    5008           0 :     NumBytesToCreateFrame += 8;
    5009             : 
    5010             :   return outliner::TargetCostInfo(SequenceSize, NumBytesForCall,
    5011         104 :                              NumBytesToCreateFrame, CallID, FrameID);
    5012             : }
    5013             : 
    5014          37 : bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
    5015             :     MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
    5016          37 :   const Function &F = MF.getFunction();
    5017             : 
    5018             :   // Can F be deduplicated by the linker? If it can, don't outline from it.
    5019          70 :   if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
    5020             :     return false;
    5021             : 
    5022             :   // Don't outline from functions with section markings; the program could
    5023             :   // expect that all the code is in the named section.
    5024             :   // FIXME: Allow outlining from multiple functions with the same section
    5025             :   // marking.
    5026          35 :   if (F.hasSection())
    5027             :     return false;
    5028             : 
    5029             :   // Outlining from functions with redzones is unsafe since the outliner may
    5030             :   // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
    5031             :   // outline from it.
    5032          32 :   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
    5033          64 :   if (!AFI || AFI->hasRedZone().getValueOr(true))
    5034             :     return false;
    5035             : 
    5036             :   // It's safe to outline from MF.
    5037          31 :   return true;
    5038             : }
    5039             : 
    5040             : unsigned
    5041          36 : AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const {
    5042             :   unsigned Flags = 0x0;
    5043             :   // Check if there's a call inside this MachineBasicBlock. If there is, then
    5044             :   // set a flag.
    5045          36 :   if (std::any_of(MBB.begin(), MBB.end(),
    5046             :                   [](MachineInstr &MI) { return MI.isCall(); }))
    5047             :     Flags |= MachineOutlinerMBBFlags::HasCalls;
    5048             : 
    5049             :   // Check if LR is available through all of the MBB. If it's not, then set
    5050             :   // a flag.
    5051          36 :   LiveRegUnits LRU(getRegisterInfo());
    5052          36 :   LRU.addLiveOuts(MBB);
    5053             : 
    5054          36 :   std::for_each(MBB.rbegin(),
    5055             :                 MBB.rend(),
    5056         477 :                 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
    5057             : 
    5058          36 :   if (!LRU.available(AArch64::LR)) 
    5059          34 :       Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
    5060             : 
    5061          36 :   return Flags;
    5062             : }
    5063             : 
    5064             : outliner::InstrType
    5065         441 : AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
    5066             :                                    unsigned Flags) const {
    5067             :   MachineInstr &MI = *MIT;
    5068         441 :   MachineBasicBlock *MBB = MI.getParent();
    5069         441 :   MachineFunction *MF = MBB->getParent();
    5070         441 :   AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
    5071             : 
    5072             :   // Don't outline LOHs.
    5073         441 :   if (FuncInfo->getLOHRelated().count(&MI))
    5074             :     return outliner::InstrType::Illegal;
    5075             : 
    5076             :   // Don't allow debug values to impact outlining type.
    5077             :   if (MI.isDebugInstr() || MI.isIndirectDebugValue())
    5078             :     return outliner::InstrType::Invisible;
    5079             : 
    5080             :   // At this point, KILL instructions don't really tell us much so we can go
    5081             :   // ahead and skip over them.
    5082         441 :   if (MI.isKill())
    5083             :     return outliner::InstrType::Invisible;
    5084             :   
    5085             :   // Is this a terminator for a basic block?
    5086         440 :   if (MI.isTerminator()) {
    5087             : 
    5088             :     // Is this the end of a function?
    5089          62 :     if (MI.getParent()->succ_empty())
    5090             :       return outliner::InstrType::Legal;
    5091             :     
    5092             :     // It's not, so don't outline it.
    5093           0 :     return outliner::InstrType::Illegal;
    5094             :   }
    5095             : 
    5096             :   // Make sure none of the operands are un-outlinable.
    5097        2719 :   for (const MachineOperand &MOP : MI.operands()) {
    5098        1217 :     if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
    5099             :         MOP.isTargetIndex())
    5100             :       return outliner::InstrType::Illegal;
    5101             : 
    5102             :     // If it uses LR or W30 explicitly, then don't touch it.
    5103        2625 :     if (MOP.isReg() && !MOP.isImplicit() &&
    5104        1385 :         (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
    5105             :       return outliner::InstrType::Illegal;
    5106             :   }
    5107             : 
    5108             :   // Special cases for instructions that can always be outlined, but will fail
    5109             :   // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
    5110             :   // be outlined because they don't require a *specific* value to be in LR.
    5111         694 :   if (MI.getOpcode() == AArch64::ADRP)
    5112             :     return outliner::InstrType::Legal;
    5113             : 
    5114             :   // If MI is a call we might be able to outline it. We don't want to outline
    5115             :   // any calls that rely on the position of items on the stack. When we outline
    5116             :   // something containing a call, we have to emit a save and restore of LR in
    5117             :   // the outlined function. Currently, this always happens by saving LR to the
    5118             :   // stack. Thus, if we outline, say, half the parameters for a function call
    5119             :   // plus the call, then we'll break the callee's expectations for the layout
    5120             :   // of the stack.
    5121             :   //
    5122             :   // FIXME: Allow calls to functions which construct a stack frame, as long
    5123             :   // as they don't access arguments on the stack.
    5124             :   // FIXME: Figure out some way to analyze functions defined in other modules.
    5125             :   // We should be able to compute the memory usage based on the IR calling
    5126             :   // convention, even if we can't see the definition.
    5127         334 :   if (MI.isCall()) {
    5128             :     // Get the function associated with the call. Look at each operand and find
    5129             :     // the one that represents the callee and get its name.
    5130             :     const Function *Callee = nullptr;
    5131          14 :     for (const MachineOperand &MOP : MI.operands()) {
    5132          14 :       if (MOP.isGlobal()) {
    5133          14 :         Callee = dyn_cast<Function>(MOP.getGlobal());
    5134             :         break;
    5135             :       }
    5136             :     }
    5137             : 
    5138             :     // Never outline calls to mcount.  There isn't any rule that would require
    5139             :     // this, but the Linux kernel's "ftrace" feature depends on it.
    5140          14 :     if (Callee && Callee->getName() == "\01_mcount")
    5141             :       return outliner::InstrType::Illegal;
    5142             : 
    5143             :     // If we don't know anything about the callee, assume it depends on the
    5144             :     // stack layout of the caller. In that case, it's only legal to outline
    5145             :     // as a tail-call.  Whitelist the call instructions we know about so we
    5146             :     // don't get unexpected results with call pseudo-instructions.
    5147             :     auto UnknownCallOutlineType = outliner::InstrType::Illegal;
    5148          28 :     if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
    5149             :       UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
    5150             : 
    5151          14 :     if (!Callee)
    5152             :       return UnknownCallOutlineType;
    5153             : 
    5154             :     // We have a function we have information about. Check it if it's something
    5155             :     // can safely outline.
    5156          14 :     MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
    5157             : 
    5158             :     // We don't know what's going on with the callee at all. Don't touch it.
    5159          14 :     if (!CalleeMF)
    5160             :       return UnknownCallOutlineType;
    5161             : 
    5162             :     // Check if we know anything about the callee saves on the function. If we
    5163             :     // don't, then don't touch it, since that implies that we haven't
    5164             :     // computed anything about its stack frame yet.
    5165          14 :     MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
    5166          22 :     if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
    5167             :         MFI.getNumObjects() > 0)
    5168             :       return UnknownCallOutlineType;
    5169             : 
    5170             :     // At this point, we can say that CalleeMF ought to not pass anything on the
    5171             :     // stack. Therefore, we can outline it.
    5172             :     return outliner::InstrType::Legal;
    5173             :   }
    5174             : 
    5175             :   // Don't outline positions.
    5176             :   if (MI.isPosition())
    5177             :     return outliner::InstrType::Illegal;
    5178             : 
    5179             :   // Don't touch the link register or W30.
    5180         640 :   if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
    5181             :       MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
    5182             :     return outliner::InstrType::Illegal;
    5183             : 
    5184             :   // Does this use the stack?
    5185         921 :   if (MI.modifiesRegister(AArch64::SP, &RI) ||
    5186             :       MI.readsRegister(AArch64::SP, &RI)) {
    5187             :     // True if there is no chance that any outlined candidate from this range
    5188             :     // could require stack fixups. That is, both
    5189             :     // * LR is available in the range (No save/restore around call)
    5190             :     // * The range doesn't include calls (No save/restore in outlined frame)
    5191             :     // are true.
    5192             :     // FIXME: This is very restrictive; the flags check the whole block,
    5193             :     // not just the bit we will try to outline.
    5194             :     bool MightNeedStackFixUp =
    5195         139 :         (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
    5196             :                   MachineOutlinerMBBFlags::HasCalls));
    5197             : 
    5198             :     // If this instruction is in a range where it *never* needs to be fixed
    5199             :     // up, then we can *always* outline it. This is true even if it's not
    5200             :     // possible to fix that instruction up.
    5201             :     //
    5202             :     // Why? Consider two equivalent instructions I1, I2 where both I1 and I2
    5203             :     // use SP. Suppose that I1 sits within a range that definitely doesn't
    5204             :     // need stack fixups, while I2 sits in a range that does.
    5205             :     //
    5206             :     // First, I1 can be outlined as long as we *never* fix up the stack in
    5207             :     // any sequence containing it. I1 is already a safe instruction in the
    5208             :     // original program, so as long as we don't modify it we're good to go.
    5209             :     // So this leaves us with showing that outlining I2 won't break our
    5210             :     // program.
    5211             :     //
    5212             :     // Suppose I1 and I2 belong to equivalent candidate sequences. When we
    5213             :     // look at I2, we need to see if it can be fixed up. Suppose I2, (and
    5214             :     // thus I1) cannot be fixed up. Then I2 will be assigned an unique
    5215             :     // integer label; thus, I2 cannot belong to any candidate sequence (a
    5216             :     // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up
    5217             :     // as well, so we're good. Thus, I1 is always safe to outline.
    5218             :     //
    5219             :     // This gives us two things: first off, it buys us some more instructions
    5220             :     // for our search space by deeming stack instructions illegal only when
    5221             :     // they can't be fixed up AND we might have to fix them up. Second off,
    5222             :     // This allows us to catch tricky instructions like, say,
    5223             :     // %xi = ADDXri %sp, n, 0. We can't safely outline these since they might
    5224             :     // be paired with later SUBXris, which might *not* end up being outlined.
    5225             :     // If we mess with the stack to save something, then an ADDXri messes with
    5226             :     // it *after*, then we aren't going to restore the right something from
    5227             :     // the stack if we don't outline the corresponding SUBXri first. ADDXris and
    5228             :     // SUBXris are extremely common in prologue/epilogue code, so supporting
    5229             :     // them in the outliner can be a pretty big win!
    5230         139 :     if (!MightNeedStackFixUp)
    5231             :       return outliner::InstrType::Legal;
    5232             : 
    5233             :     // Any modification of SP will break our code to save/restore LR.
    5234             :     // FIXME: We could handle some instructions which add a constant offset to
    5235             :     // SP, with a bit more work.
    5236         137 :     if (MI.modifiesRegister(AArch64::SP, &RI))
    5237             :       return outliner::InstrType::Illegal;
    5238             : 
    5239             :     // At this point, we have a stack instruction that we might need to fix
    5240             :     // up. We'll handle it if it's a load or store.
    5241          98 :     if (MI.mayLoadOrStore()) {
    5242             :       unsigned Base;  // Filled with the base regiser of MI.
    5243             :       int64_t Offset; // Filled with the offset of MI.
    5244             :       unsigned DummyWidth;
    5245             : 
    5246             :       // Does it allow us to offset the base register and is the base SP?
    5247         186 :       if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
    5248          93 :           Base != AArch64::SP)
    5249             :         return outliner::InstrType::Illegal;
    5250             : 
    5251             :       // Find the minimum/maximum offset for this instruction and check if
    5252             :       // fixing it up would be in range.
    5253             :       int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction.
    5254             :       unsigned Scale;               // The scale to multiply the offsets by.
    5255         186 :       getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
    5256             : 
    5257             :       // TODO: We should really test what happens if an instruction overflows.
    5258             :       // This is tricky to test with IR tests, but when the outliner is moved
    5259             :       // to a MIR test, it really ought to be checked.
    5260          93 :       Offset += 16; // Update the offset to what it would be if we outlined.
    5261          93 :       if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
    5262             :         return outliner::InstrType::Illegal;
    5263             : 
    5264             :       // It's in range, so we can outline it.
    5265          93 :       return outliner::InstrType::Legal;
    5266             :     }
    5267             : 
    5268             :     // FIXME: Add handling for instructions like "add x0, sp, #8".
    5269             : 
    5270             :     // We can't fix it up, so don't outline it.
    5271             :     return outliner::InstrType::Illegal;
    5272             :   }
    5273             : 
    5274             :   return outliner::InstrType::Legal;
    5275             : }
    5276             : 
    5277           9 : void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
    5278         114 :   for (MachineInstr &MI : MBB) {
    5279             :     unsigned Base, Width;
    5280             :     int64_t Offset;
    5281             : 
    5282             :     // Is this a load or store with an immediate offset with SP as the base?
    5283         192 :     if (!MI.mayLoadOrStore() ||
    5284         134 :         !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
    5285          38 :         Base != AArch64::SP)
    5286          58 :       continue;
    5287             : 
    5288             :     // It is, so we have to fix it up.
    5289             :     unsigned Scale;
    5290             :     int64_t Dummy1, Dummy2;
    5291             : 
    5292          38 :     MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
    5293             :     assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
    5294          76 :     getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
    5295             :     assert(Scale != 0 && "Unexpected opcode!");
    5296             : 
    5297             :     // We've pushed the return address to the stack, so add 16 to the offset.
    5298             :     // This is safe, since we already checked if it would overflow when we
    5299             :     // checked if this instruction was legal to outline.
    5300          38 :     int64_t NewImm = (Offset + 16) / Scale;
    5301             :     StackOffsetOperand.setImm(NewImm);
    5302             :   }
    5303           9 : }
    5304             : 
    5305          14 : void AArch64InstrInfo::insertOutlinerEpilogue(
    5306             :     MachineBasicBlock &MBB, MachineFunction &MF,
    5307             :     const outliner::TargetCostInfo &TCI) const {
    5308             :   // For thunk outlining, rewrite the last instruction from a call to a
    5309             :   // tail-call.
    5310          14 :   if (TCI.FrameConstructionID == MachineOutlinerThunk) {
    5311             :     MachineInstr *Call = &*--MBB.instr_end();
    5312             :     unsigned TailOpcode;
    5313           6 :     if (Call->getOpcode() == AArch64::BL) {
    5314             :       TailOpcode = AArch64::TCRETURNdi;
    5315             :     } else {
    5316             :       assert(Call->getOpcode() == AArch64::BLR);
    5317             :       TailOpcode = AArch64::TCRETURNri;
    5318             :     }
    5319           9 :     MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
    5320           3 :                             .add(Call->getOperand(0))
    5321             :                             .addImm(0);
    5322             :     MBB.insert(MBB.end(), TC);
    5323           3 :     Call->eraseFromParent();
    5324             :   }
    5325             : 
    5326             :   // Is there a call in the outlined range?
    5327         107 :   auto IsNonTailCall = [](MachineInstr &MI) {
    5328         113 :     return MI.isCall() && !MI.isReturn();
    5329         107 :   };
    5330          14 :   if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
    5331             :     // Fix up the instructions in the range, since we're going to modify the
    5332             :     // stack.
    5333             :     assert(TCI.FrameConstructionID != MachineOutlinerDefault &&
    5334             :            "Can only fix up stack references once");
    5335           2 :     fixupPostOutline(MBB);
    5336             : 
    5337             :     // LR has to be a live in so that we can save it.
    5338             :     MBB.addLiveIn(AArch64::LR);
    5339             : 
    5340             :     MachineBasicBlock::iterator It = MBB.begin();
    5341             :     MachineBasicBlock::iterator Et = MBB.end();
    5342             : 
    5343           2 :     if (TCI.FrameConstructionID == MachineOutlinerTailCall ||
    5344             :         TCI.FrameConstructionID == MachineOutlinerThunk)
    5345             :       Et = std::prev(MBB.end());
    5346             : 
    5347             :     // Insert a save before the outlined region
    5348          10 :     MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
    5349           2 :                                 .addReg(AArch64::SP, RegState::Define)
    5350           2 :                                 .addReg(AArch64::LR)
    5351           2 :                                 .addReg(AArch64::SP)
    5352           2 :                                 .addImm(-16);
    5353             :     It = MBB.insert(It, STRXpre);
    5354             : 
    5355           2 :     const TargetSubtargetInfo &STI = MF.getSubtarget();
    5356           2 :     const MCRegisterInfo *MRI = STI.getRegisterInfo();
    5357           2 :     unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
    5358             : 
    5359             :     // Add a CFI saying the stack was moved 16 B down.
    5360             :     int64_t StackPosEntry =
    5361           2 :         MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16));
    5362           8 :     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
    5363             :         .addCFIIndex(StackPosEntry)
    5364             :         .setMIFlags(MachineInstr::FrameSetup);
    5365             : 
    5366             :     // Add a CFI saying that the LR that we want to find is now 16 B higher than
    5367             :     // before.
    5368             :     int64_t LRPosEntry =
    5369           2 :         MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
    5370           8 :     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
    5371             :         .addCFIIndex(LRPosEntry)
    5372             :         .setMIFlags(MachineInstr::FrameSetup);
    5373             : 
    5374             :     // Insert a restore before the terminator for the function.
    5375          10 :     MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
    5376           2 :                                  .addReg(AArch64::SP, RegState::Define)
    5377           2 :                                  .addReg(AArch64::LR, RegState::Define)
    5378           2 :                                  .addReg(AArch64::SP)
    5379           2 :                                  .addImm(16);
    5380             :     Et = MBB.insert(Et, LDRXpost);
    5381             :   }
    5382             : 
    5383             :   // If this is a tail call outlined function, then there's already a return.
    5384          14 :   if (TCI.FrameConstructionID == MachineOutlinerTailCall ||
    5385             :       TCI.FrameConstructionID == MachineOutlinerThunk)
    5386             :     return;
    5387             : 
    5388             :   // It's not a tail call, so we have to insert the return ourselves.
    5389          50 :   MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
    5390          10 :                           .addReg(AArch64::LR, RegState::Undef);
    5391             :   MBB.insert(MBB.end(), ret);
    5392             : 
    5393             :   // Did we have to modify the stack by saving the link register?
    5394          10 :   if (TCI.FrameConstructionID == MachineOutlinerNoLRSave)
    5395             :     return;
    5396             : 
    5397             :   // We modified the stack.
    5398             :   // Walk over the basic block and fix up all the stack accesses.
    5399           7 :   fixupPostOutline(MBB);
    5400             : }
    5401             : 
    5402          14 : void AArch64InstrInfo::insertOutlinerPrologue(
    5403             :     MachineBasicBlock &MBB, MachineFunction &MF,
    5404          14 :     const outliner::TargetCostInfo &TCI) const {}
    5405             : 
    5406          32 : MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
    5407             :     Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
    5408             :     MachineFunction &MF, const outliner::TargetCostInfo &TCI) const {
    5409             : 
    5410             :   // Are we tail calling?
    5411          32 :   if (TCI.CallConstructionID == MachineOutlinerTailCall) {
    5412             :     // If yes, then we can just branch to the label.
    5413           8 :     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
    5414           2 :                             .addGlobalAddress(M.getNamedValue(MF.getName()))
    5415             :                             .addImm(0));
    5416           2 :     return It;
    5417             :   }
    5418             : 
    5419             :   // Are we saving the link register?
    5420          30 :   if (TCI.CallConstructionID == MachineOutlinerNoLRSave ||
    5421             :       TCI.CallConstructionID == MachineOutlinerThunk) {
    5422             :     // No, so just insert the call.
    5423          56 :     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
    5424          14 :                             .addGlobalAddress(M.getNamedValue(MF.getName())));
    5425          14 :     return It;
    5426             :   }
    5427             : 
    5428             :   // We want to return the spot where we inserted the call.
    5429             :   MachineBasicBlock::iterator CallPt;
    5430             : 
    5431             :   // We have a default call. Save the link register.
    5432          80 :   MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
    5433          16 :                               .addReg(AArch64::SP, RegState::Define)
    5434          16 :                               .addReg(AArch64::LR)
    5435          16 :                               .addReg(AArch64::SP)
    5436          16 :                               .addImm(-16);
    5437          16 :   It = MBB.insert(It, STRXpre);
    5438             :   It++;
    5439             : 
    5440             :   // Insert the call.
    5441          64 :   It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
    5442          16 :                           .addGlobalAddress(M.getNamedValue(MF.getName())));
    5443          16 :   CallPt = It;
    5444             :   It++;
    5445             : 
    5446             :   // Restore the link register.
    5447          80 :   MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
    5448          16 :                                .addReg(AArch64::SP, RegState::Define)
    5449          16 :                                .addReg(AArch64::LR, RegState::Define)
    5450          16 :                                .addReg(AArch64::SP)
    5451          16 :                                .addImm(16);
    5452          16 :   It = MBB.insert(It, LDRXpost);
    5453             : 
    5454          16 :   return CallPt;
    5455      303507 : }

Generated by: LCOV version 1.13