LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64FastISel.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1859 1980 93.9 %
Date: 2018-02-20 03:34:22 Functions: 76 78 97.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file defines the AArch64-specific support for the FastISel class. Some
      11             : // of the target-specific code is generated by tablegen in the file
      12             : // AArch64GenFastISel.inc, which is #included here.
      13             : //
      14             : //===----------------------------------------------------------------------===//
      15             : 
      16             : #include "AArch64.h"
      17             : #include "AArch64CallingConvention.h"
      18             : #include "AArch64RegisterInfo.h"
      19             : #include "AArch64Subtarget.h"
      20             : #include "MCTargetDesc/AArch64AddressingModes.h"
      21             : #include "Utils/AArch64BaseInfo.h"
      22             : #include "llvm/ADT/APFloat.h"
      23             : #include "llvm/ADT/APInt.h"
      24             : #include "llvm/ADT/DenseMap.h"
      25             : #include "llvm/ADT/SmallVector.h"
      26             : #include "llvm/Analysis/BranchProbabilityInfo.h"
      27             : #include "llvm/CodeGen/CallingConvLower.h"
      28             : #include "llvm/CodeGen/FastISel.h"
      29             : #include "llvm/CodeGen/FunctionLoweringInfo.h"
      30             : #include "llvm/CodeGen/ISDOpcodes.h"
      31             : #include "llvm/CodeGen/MachineBasicBlock.h"
      32             : #include "llvm/CodeGen/MachineConstantPool.h"
      33             : #include "llvm/CodeGen/MachineFrameInfo.h"
      34             : #include "llvm/CodeGen/MachineInstr.h"
      35             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      36             : #include "llvm/CodeGen/MachineMemOperand.h"
      37             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      38             : #include "llvm/CodeGen/MachineValueType.h"
      39             : #include "llvm/CodeGen/RuntimeLibcalls.h"
      40             : #include "llvm/CodeGen/ValueTypes.h"
      41             : #include "llvm/IR/Argument.h"
      42             : #include "llvm/IR/Attributes.h"
      43             : #include "llvm/IR/BasicBlock.h"
      44             : #include "llvm/IR/CallingConv.h"
      45             : #include "llvm/IR/Constant.h"
      46             : #include "llvm/IR/Constants.h"
      47             : #include "llvm/IR/DataLayout.h"
      48             : #include "llvm/IR/DerivedTypes.h"
      49             : #include "llvm/IR/Function.h"
      50             : #include "llvm/IR/GetElementPtrTypeIterator.h"
      51             : #include "llvm/IR/GlobalValue.h"
      52             : #include "llvm/IR/InstrTypes.h"
      53             : #include "llvm/IR/Instruction.h"
      54             : #include "llvm/IR/Instructions.h"
      55             : #include "llvm/IR/IntrinsicInst.h"
      56             : #include "llvm/IR/Intrinsics.h"
      57             : #include "llvm/IR/Operator.h"
      58             : #include "llvm/IR/Type.h"
      59             : #include "llvm/IR/User.h"
      60             : #include "llvm/IR/Value.h"
      61             : #include "llvm/MC/MCInstrDesc.h"
      62             : #include "llvm/MC/MCRegisterInfo.h"
      63             : #include "llvm/MC/MCSymbol.h"
      64             : #include "llvm/Support/AtomicOrdering.h"
      65             : #include "llvm/Support/Casting.h"
      66             : #include "llvm/Support/CodeGen.h"
      67             : #include "llvm/Support/Compiler.h"
      68             : #include "llvm/Support/ErrorHandling.h"
      69             : #include "llvm/Support/MathExtras.h"
      70             : #include <algorithm>
      71             : #include <cassert>
      72             : #include <cstdint>
      73             : #include <iterator>
      74             : #include <utility>
      75             : 
      76             : using namespace llvm;
      77             : 
      78             : namespace {
      79             : 
      80        1192 : class AArch64FastISel final : public FastISel {
      81             :   class Address {
      82             :   public:
      83             :     using BaseKind = enum {
      84             :       RegBase,
      85             :       FrameIndexBase
      86             :     };
      87             : 
      88             :   private:
      89             :     BaseKind Kind = RegBase;
      90             :     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
      91             :     union {
      92             :       unsigned Reg;
      93             :       int FI;
      94             :     } Base;
      95             :     unsigned OffsetReg = 0;
      96             :     unsigned Shift = 0;
      97             :     int64_t Offset = 0;
      98             :     const GlobalValue *GV = nullptr;
      99             : 
     100             :   public:
     101         842 :     Address() { Base.Reg = 0; }
     102             : 
     103         132 :     void setKind(BaseKind K) { Kind = K; }
     104             :     BaseKind getKind() const { return Kind; }
     105          84 :     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
     106             :     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
     107             :     bool isRegBase() const { return Kind == RegBase; }
     108             :     bool isFIBase() const { return Kind == FrameIndexBase; }
     109             : 
     110             :     void setReg(unsigned Reg) {
     111             :       assert(isRegBase() && "Invalid base register access!");
     112        1030 :       Base.Reg = Reg;
     113             :     }
     114             : 
     115             :     unsigned getReg() const {
     116             :       assert(isRegBase() && "Invalid base register access!");
     117             :       return Base.Reg;
     118             :     }
     119             : 
     120             :     void setOffsetReg(unsigned Reg) {
     121         507 :       OffsetReg = Reg;
     122             :     }
     123             : 
     124             :     unsigned getOffsetReg() const {
     125             :       return OffsetReg;
     126             :     }
     127             : 
     128             :     void setFI(unsigned FI) {
     129             :       assert(isFIBase() && "Invalid base frame index  access!");
     130         129 :       Base.FI = FI;
     131             :     }
     132             : 
     133             :     unsigned getFI() const {
     134             :       assert(isFIBase() && "Invalid base frame index access!");
     135         134 :       return Base.FI;
     136             :     }
     137             : 
     138          99 :     void setOffset(int64_t O) { Offset = O; }
     139             :     int64_t getOffset() { return Offset; }
     140          64 :     void setShift(unsigned S) { Shift = S; }
     141             :     unsigned getShift() { return Shift; }
     142             : 
     143          87 :     void setGlobalValue(const GlobalValue *G) { GV = G; }
     144             :     const GlobalValue *getGlobalValue() { return GV; }
     145             :   };
     146             : 
     147             :   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
     148             :   /// make the right decision when generating code for different targets.
     149             :   const AArch64Subtarget *Subtarget;
     150             :   LLVMContext *Context;
     151             : 
     152             :   bool fastLowerArguments() override;
     153             :   bool fastLowerCall(CallLoweringInfo &CLI) override;
     154             :   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
     155             : 
     156             : private:
     157             :   // Selection routines.
     158             :   bool selectAddSub(const Instruction *I);
     159             :   bool selectLogicalOp(const Instruction *I);
     160             :   bool selectLoad(const Instruction *I);
     161             :   bool selectStore(const Instruction *I);
     162             :   bool selectBranch(const Instruction *I);
     163             :   bool selectIndirectBr(const Instruction *I);
     164             :   bool selectCmp(const Instruction *I);
     165             :   bool selectSelect(const Instruction *I);
     166             :   bool selectFPExt(const Instruction *I);
     167             :   bool selectFPTrunc(const Instruction *I);
     168             :   bool selectFPToInt(const Instruction *I, bool Signed);
     169             :   bool selectIntToFP(const Instruction *I, bool Signed);
     170             :   bool selectRem(const Instruction *I, unsigned ISDOpcode);
     171             :   bool selectRet(const Instruction *I);
     172             :   bool selectTrunc(const Instruction *I);
     173             :   bool selectIntExt(const Instruction *I);
     174             :   bool selectMul(const Instruction *I);
     175             :   bool selectShift(const Instruction *I);
     176             :   bool selectBitCast(const Instruction *I);
     177             :   bool selectFRem(const Instruction *I);
     178             :   bool selectSDiv(const Instruction *I);
     179             :   bool selectGetElementPtr(const Instruction *I);
     180             :   bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
     181             : 
     182             :   // Utility helper routines.
     183             :   bool isTypeLegal(Type *Ty, MVT &VT);
     184             :   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
     185             :   bool isValueAvailable(const Value *V) const;
     186             :   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
     187             :   bool computeCallAddress(const Value *V, Address &Addr);
     188             :   bool simplifyAddress(Address &Addr, MVT VT);
     189             :   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
     190             :                             MachineMemOperand::Flags Flags,
     191             :                             unsigned ScaleFactor, MachineMemOperand *MMO);
     192             :   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
     193             :   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
     194             :                           unsigned Alignment);
     195             :   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
     196             :                          const Value *Cond);
     197             :   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
     198             :   bool optimizeSelect(const SelectInst *SI);
     199             :   std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
     200             : 
     201             :   // Emit helper routines.
     202             :   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
     203             :                       const Value *RHS, bool SetFlags = false,
     204             :                       bool WantResult = true,  bool IsZExt = false);
     205             :   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
     206             :                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
     207             :                          bool SetFlags = false, bool WantResult = true);
     208             :   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
     209             :                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
     210             :                          bool WantResult = true);
     211             :   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
     212             :                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
     213             :                          AArch64_AM::ShiftExtendType ShiftType,
     214             :                          uint64_t ShiftImm, bool SetFlags = false,
     215             :                          bool WantResult = true);
     216             :   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
     217             :                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
     218             :                           AArch64_AM::ShiftExtendType ExtType,
     219             :                           uint64_t ShiftImm, bool SetFlags = false,
     220             :                          bool WantResult = true);
     221             : 
     222             :   // Emit functions.
     223             :   bool emitCompareAndBranch(const BranchInst *BI);
     224             :   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
     225             :   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
     226             :   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
     227             :   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
     228             :   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
     229             :                     MachineMemOperand *MMO = nullptr);
     230             :   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
     231             :                  MachineMemOperand *MMO = nullptr);
     232             :   bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
     233             :                         MachineMemOperand *MMO = nullptr);
     234             :   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
     235             :   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
     236             :   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
     237             :                    bool SetFlags = false, bool WantResult = true,
     238             :                    bool IsZExt = false);
     239             :   unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
     240             :   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
     241             :                    bool SetFlags = false, bool WantResult = true,
     242             :                    bool IsZExt = false);
     243             :   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
     244             :                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
     245             :   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
     246             :                        unsigned RHSReg, bool RHSIsKill,
     247             :                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
     248             :                        bool WantResult = true);
     249             :   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
     250             :                          const Value *RHS);
     251             :   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
     252             :                             bool LHSIsKill, uint64_t Imm);
     253             :   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
     254             :                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
     255             :                             uint64_t ShiftImm);
     256             :   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
     257             :   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
     258             :                       unsigned Op1, bool Op1IsKill);
     259             :   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
     260             :                         unsigned Op1, bool Op1IsKill);
     261             :   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
     262             :                         unsigned Op1, bool Op1IsKill);
     263             :   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
     264             :                       unsigned Op1Reg, bool Op1IsKill);
     265             :   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
     266             :                       uint64_t Imm, bool IsZExt = true);
     267             :   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
     268             :                       unsigned Op1Reg, bool Op1IsKill);
     269             :   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
     270             :                       uint64_t Imm, bool IsZExt = true);
     271             :   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
     272             :                       unsigned Op1Reg, bool Op1IsKill);
     273             :   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
     274             :                       uint64_t Imm, bool IsZExt = false);
     275             : 
     276             :   unsigned materializeInt(const ConstantInt *CI, MVT VT);
     277             :   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
     278             :   unsigned materializeGV(const GlobalValue *GV);
     279             : 
     280             :   // Call handling routines.
     281             : private:
     282             :   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
     283             :   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
     284             :                        unsigned &NumBytes);
     285             :   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
     286             : 
     287             : public:
     288             :   // Backend specific FastISel code.
     289             :   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
     290             :   unsigned fastMaterializeConstant(const Constant *C) override;
     291             :   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
     292             : 
     293        1193 :   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
     294             :                            const TargetLibraryInfo *LibInfo)
     295        1193 :       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
     296        1193 :     Subtarget =
     297        1193 :         &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
     298        1193 :     Context = &FuncInfo.Fn->getContext();
     299        1193 :   }
     300             : 
     301             :   bool fastSelectInstruction(const Instruction *I) override;
     302             : 
     303             : #include "AArch64GenFastISel.inc"
     304             : };
     305             : 
     306             : } // end anonymous namespace
     307             : 
     308             : #include "AArch64GenCallingConv.inc"
     309             : 
     310             : /// \brief Check if the sign-/zero-extend will be a noop.
     311          82 : static bool isIntExtFree(const Instruction *I) {
     312             :   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
     313             :          "Unexpected integer extend instruction.");
     314             :   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
     315             :          "Unexpected value type.");
     316             :   bool IsZExt = isa<ZExtInst>(I);
     317             : 
     318          82 :   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
     319           0 :     if (LI->hasOneUse())
     320             :       return true;
     321             : 
     322             :   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
     323          80 :     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
     324             :       return true;
     325             : 
     326             :   return false;
     327             : }
     328             : 
     329             : /// \brief Determine the implicit scale factor that is applied by a memory
     330             : /// operation for a given value type.
     331             : static unsigned getImplicitScaleFactor(MVT VT) {
     332             :   switch (VT.SimpleTy) {
     333             :   default:
     334             :     return 0;    // invalid
     335             :   case MVT::i1:  // fall-through
     336             :   case MVT::i8:
     337             :     return 1;
     338             :   case MVT::i16:
     339             :     return 2;
     340             :   case MVT::i32: // fall-through
     341             :   case MVT::f32:
     342             :     return 4;
     343             :   case MVT::i64: // fall-through
     344             :   case MVT::f64:
     345             :     return 8;
     346             :   }
     347             : }
     348             : 
     349             : CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
     350         188 :   if (CC == CallingConv::WebKit_JS)
     351             :     return CC_AArch64_WebKit_JS;
     352         181 :   if (CC == CallingConv::GHC)
     353             :     return CC_AArch64_GHC;
     354         181 :   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
     355             : }
     356             : 
     357          17 : unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
     358             :   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
     359             :          "Alloca should always return a pointer.");
     360             : 
     361             :   // Don't handle dynamic allocas.
     362          17 :   if (!FuncInfo.StaticAllocaMap.count(AI))
     363             :     return 0;
     364             : 
     365             :   DenseMap<const AllocaInst *, int>::iterator SI =
     366          17 :       FuncInfo.StaticAllocaMap.find(AI);
     367             : 
     368          34 :   if (SI != FuncInfo.StaticAllocaMap.end()) {
     369          17 :     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
     370          34 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
     371          34 :             ResultReg)
     372          17 :         .addFrameIndex(SI->second)
     373             :         .addImm(0)
     374             :         .addImm(0);
     375          17 :     return ResultReg;
     376             :   }
     377             : 
     378             :   return 0;
     379             : }
     380             : 
     381         350 : unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
     382         350 :   if (VT > MVT::i64)
     383             :     return 0;
     384             : 
     385         350 :   if (!CI->isZero())
     386         231 :     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
     387             : 
     388             :   // Create a copy from the zero register to materialize a "0" value.
     389         119 :   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
     390             :                                                    : &AArch64::GPR32RegClass;
     391         119 :   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
     392         119 :   unsigned ResultReg = createResultReg(RC);
     393         357 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
     394         357 :           ResultReg).addReg(ZeroReg, getKillRegState(true));
     395         119 :   return ResultReg;
     396             : }
     397             : 
     398          19 : unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
     399             :   // Positive zero (+0.0) has to be materialized with a fmov from the zero
     400             :   // register, because the immediate version of fmov cannot encode zero.
     401          19 :   if (CFP->isNullValue())
     402           2 :     return fastMaterializeFloatZero(CFP);
     403             : 
     404          17 :   if (VT != MVT::f32 && VT != MVT::f64)
     405             :     return 0;
     406             : 
     407             :   const APFloat Val = CFP->getValueAPF();
     408             :   bool Is64Bit = (VT == MVT::f64);
     409             :   // This checks to see if we can use FMOV instructions to materialize
     410             :   // a constant, otherwise we have to materialize via the constant pool.
     411          34 :   if (TLI.isFPImmLegal(Val, VT)) {
     412             :     int Imm =
     413          10 :         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
     414             :     assert((Imm != -1) && "Cannot encode floating-point constant.");
     415          10 :     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
     416          10 :     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
     417             :   }
     418             : 
     419             :   // For the MachO large code model materialize the FP constant in code.
     420          14 :   if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
     421           4 :     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
     422           4 :     const TargetRegisterClass *RC = Is64Bit ?
     423             :         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
     424             : 
     425           4 :     unsigned TmpReg = createResultReg(RC);
     426          12 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
     427          12 :         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
     428             : 
     429           4 :     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
     430           8 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
     431           4 :             TII.get(TargetOpcode::COPY), ResultReg)
     432           4 :         .addReg(TmpReg, getKillRegState(true));
     433             : 
     434           4 :     return ResultReg;
     435             :   }
     436             : 
     437             :   // Materialize via constant pool.  MachineConstantPool wants an explicit
     438             :   // alignment.
     439           3 :   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
     440           3 :   if (Align == 0)
     441           0 :     Align = DL.getTypeAllocSize(CFP->getType());
     442             : 
     443           3 :   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
     444           3 :   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
     445           9 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
     446           6 :           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
     447             : 
     448           3 :   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
     449           3 :   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
     450           9 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
     451           3 :       .addReg(ADRPReg)
     452             :       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
     453           3 :   return ResultReg;
     454             : }
     455             : 
     456          98 : unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
     457             :   // We can't handle thread-local variables quickly yet.
     458          98 :   if (GV->isThreadLocal())
     459             :     return 0;
     460             : 
     461             :   // MachO still uses GOT for large code-model accesses, but ELF requires
     462             :   // movz/movk sequences, which FastISel doesn't handle yet.
     463         106 :   if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
     464             :     return 0;
     465             : 
     466          88 :   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
     467             : 
     468         176 :   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
     469          88 :   if (!DestEVT.isSimple())
     470             :     return 0;
     471             : 
     472          88 :   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
     473             :   unsigned ResultReg;
     474             : 
     475          88 :   if (OpFlags & AArch64II::MO_GOT) {
     476             :     // ADRP + LDRX
     477         135 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
     478          90 :             ADRPReg)
     479          45 :         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
     480             : 
     481          45 :     ResultReg = createResultReg(&AArch64::GPR64RegClass);
     482         135 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
     483          45 :             ResultReg)
     484          45 :         .addReg(ADRPReg)
     485             :         .addGlobalAddress(GV, 0,
     486          45 :                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags);
     487             :   } else {
     488             :     // ADRP + ADDX
     489         129 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
     490          86 :             ADRPReg)
     491          43 :         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
     492             : 
     493          43 :     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
     494         129 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
     495          43 :             ResultReg)
     496          43 :         .addReg(ADRPReg)
     497             :         .addGlobalAddress(GV, 0,
     498          43 :                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
     499             :         .addImm(0);
     500             :   }
     501             :   return ResultReg;
     502             : }
     503             : 
     504         498 : unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
     505         498 :   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
     506             : 
     507             :   // Only handle simple types.
     508         498 :   if (!CEVT.isSimple())
     509             :     return 0;
     510         498 :   MVT VT = CEVT.getSimpleVT();
     511             : 
     512             :   if (const auto *CI = dyn_cast<ConstantInt>(C))
     513         344 :     return materializeInt(CI, VT);
     514             :   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
     515          19 :     return materializeFP(CFP, VT);
     516             :   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
     517          91 :     return materializeGV(GV);
     518             : 
     519             :   return 0;
     520             : }
     521             : 
     522           2 : unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
     523             :   assert(CFP->isNullValue() &&
     524             :          "Floating-point constant is not a positive zero.");
     525           2 :   MVT VT;
     526           2 :   if (!isTypeLegal(CFP->getType(), VT))
     527             :     return 0;
     528             : 
     529           2 :   if (VT != MVT::f32 && VT != MVT::f64)
     530             :     return 0;
     531             : 
     532             :   bool Is64Bit = (VT == MVT::f64);
     533           2 :   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
     534           2 :   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
     535           2 :   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
     536             : }
     537             : 
     538             : /// \brief Check if the multiply is by a power-of-2 constant.
     539         556 : static bool isMulPowOf2(const Value *I) {
     540             :   if (const auto *MI = dyn_cast<MulOperator>(I)) {
     541          23 :     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
     542           0 :       if (C->getValue().isPowerOf2())
     543             :         return true;
     544             :     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
     545          21 :       if (C->getValue().isPowerOf2())
     546             :         return true;
     547             :   }
     548             :   return false;
     549             : }
     550             : 
     551             : // Computes the address to get to an object.
     552        1126 : bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
     553             : {
     554             :   const User *U = nullptr;
     555             :   unsigned Opcode = Instruction::UserOp1;
     556             :   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
     557             :     // Don't walk into other basic blocks unless the object is an alloca from
     558             :     // another block, otherwise it may not have a virtual register assigned.
     559         965 :     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
     560         851 :         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
     561             :       Opcode = I->getOpcode();
     562             :       U = I;
     563             :     }
     564             :   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
     565             :     Opcode = C->getOpcode();
     566             :     U = C;
     567             :   }
     568             : 
     569        1126 :   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
     570         764 :     if (Ty->getAddressSpace() > 255)
     571             :       // Fast instruction selection doesn't support the special
     572             :       // address spaces.
     573             :       return false;
     574             : 
     575        1116 :   switch (Opcode) {
     576             :   default:
     577             :     break;
     578             :   case Instruction::BitCast:
     579             :     // Look through bitcasts.
     580          17 :     return computeAddress(U->getOperand(0), Addr, Ty);
     581             : 
     582         136 :   case Instruction::IntToPtr:
     583             :     // Look past no-op inttoptrs.
     584         544 :     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
     585             :         TLI.getPointerTy(DL))
     586         136 :       return computeAddress(U->getOperand(0), Addr, Ty);
     587             :     break;
     588             : 
     589           6 :   case Instruction::PtrToInt:
     590             :     // Look past no-op ptrtoints.
     591          18 :     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
     592           6 :       return computeAddress(U->getOperand(0), Addr, Ty);
     593             :     break;
     594             : 
     595          42 :   case Instruction::GetElementPtr: {
     596          42 :     Address SavedAddr = Addr;
     597          42 :     uint64_t TmpOffset = Addr.getOffset();
     598             : 
     599             :     // Iterate through the GEP folding the constants into offsets where
     600             :     // we can.
     601         114 :     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
     602         114 :          GTI != E; ++GTI) {
     603             :       const Value *Op = GTI.getOperand();
     604          17 :       if (StructType *STy = GTI.getStructTypeOrNull()) {
     605          17 :         const StructLayout *SL = DL.getStructLayout(STy);
     606          17 :         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
     607          17 :         TmpOffset += SL->getElementOffset(Idx);
     608             :       } else {
     609          56 :         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
     610             :         while (true) {
     611             :           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
     612             :             // Constant-offset addressing.
     613          55 :             TmpOffset += CI->getSExtValue() * S;
     614          55 :             break;
     615             :           }
     616           1 :           if (canFoldAddIntoGEP(U, Op)) {
     617             :             // A compatible add with a constant operand. Fold the constant.
     618             :             ConstantInt *CI =
     619           0 :                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
     620           0 :             TmpOffset += CI->getSExtValue() * S;
     621             :             // Iterate on the other operand.
     622             :             Op = cast<AddOperator>(Op)->getOperand(0);
     623           0 :             continue;
     624             :           }
     625             :           // Unsupported
     626           1 :           goto unsupported_gep;
     627           0 :         }
     628             :       }
     629             :     }
     630             : 
     631             :     // Try to grab the base operand now.
     632          41 :     Addr.setOffset(TmpOffset);
     633          41 :     if (computeAddress(U->getOperand(0), Addr, Ty))
     634          40 :       return true;
     635             : 
     636             :     // We failed, restore everything and try the other options.
     637           1 :     Addr = SavedAddr;
     638             : 
     639           2 :   unsupported_gep:
     640           2 :     break;
     641             :   }
     642             :   case Instruction::Alloca: {
     643             :     const AllocaInst *AI = cast<AllocaInst>(Obj);
     644             :     DenseMap<const AllocaInst *, int>::iterator SI =
     645         129 :         FuncInfo.StaticAllocaMap.find(AI);
     646         258 :     if (SI != FuncInfo.StaticAllocaMap.end()) {
     647             :       Addr.setKind(Address::FrameIndexBase);
     648         129 :       Addr.setFI(SI->second);
     649         129 :       return true;
     650             :     }
     651           0 :     break;
     652             :   }
     653             :   case Instruction::Add: {
     654             :     // Adds of constants are common and easy enough.
     655             :     const Value *LHS = U->getOperand(0);
     656             :     const Value *RHS = U->getOperand(1);
     657             : 
     658         117 :     if (isa<ConstantInt>(LHS))
     659             :       std::swap(LHS, RHS);
     660             : 
     661             :     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
     662          58 :       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
     663         146 :       return computeAddress(LHS, Addr, Ty);
     664             :     }
     665             : 
     666          88 :     Address Backup = Addr;
     667          88 :     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
     668             :       return true;
     669           0 :     Addr = Backup;
     670             : 
     671           0 :     break;
     672             :   }
     673             :   case Instruction::Sub: {
     674             :     // Subs of constants are common and easy enough.
     675             :     const Value *LHS = U->getOperand(0);
     676             :     const Value *RHS = U->getOperand(1);
     677             : 
     678             :     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
     679          42 :       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
     680          21 :       return computeAddress(LHS, Addr, Ty);
     681             :     }
     682             :     break;
     683             :   }
     684          26 :   case Instruction::Shl: {
     685          26 :     if (Addr.getOffsetReg())
     686             :       break;
     687             : 
     688             :     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
     689             :     if (!CI)
     690             :       break;
     691             : 
     692          23 :     unsigned Val = CI->getZExtValue();
     693          23 :     if (Val < 1 || Val > 3)
     694             :       break;
     695             : 
     696             :     uint64_t NumBytes = 0;
     697          23 :     if (Ty && Ty->isSized()) {
     698          23 :       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
     699          23 :       NumBytes = NumBits / 8;
     700             :       if (!isPowerOf2_64(NumBits))
     701             :         NumBytes = 0;
     702             :     }
     703             : 
     704          23 :     if (NumBytes != (1ULL << Val))
     705             :       break;
     706             : 
     707             :     Addr.setShift(Val);
     708             :     Addr.setExtendType(AArch64_AM::LSL);
     709             : 
     710             :     const Value *Src = U->getOperand(0);
     711             :     if (const auto *I = dyn_cast<Instruction>(Src)) {
     712          34 :       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
     713             :         // Fold the zext or sext when it won't become a noop.
     714             :         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
     715           9 :           if (!isIntExtFree(ZE) &&
     716           4 :               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
     717             :             Addr.setExtendType(AArch64_AM::UXTW);
     718             :             Src = ZE->getOperand(0);
     719             :           }
     720             :         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
     721          15 :           if (!isIntExtFree(SE) &&
     722           7 :               SE->getOperand(0)->getType()->isIntegerTy(32)) {
     723             :             Addr.setExtendType(AArch64_AM::SXTW);
     724             :             Src = SE->getOperand(0);
     725             :           }
     726             :         }
     727             :       }
     728             :     }
     729             : 
     730             :     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
     731           5 :       if (AI->getOpcode() == Instruction::And) {
     732             :         const Value *LHS = AI->getOperand(0);
     733             :         const Value *RHS = AI->getOperand(1);
     734             : 
     735             :         if (const auto *C = dyn_cast<ConstantInt>(LHS))
     736           0 :           if (C->getValue() == 0xffffffff)
     737             :             std::swap(LHS, RHS);
     738             : 
     739             :         if (const auto *C = dyn_cast<ConstantInt>(RHS))
     740           3 :           if (C->getValue() == 0xffffffff) {
     741             :             Addr.setExtendType(AArch64_AM::UXTW);
     742           3 :             unsigned Reg = getRegForValue(LHS);
     743           3 :             if (!Reg)
     744             :               return false;
     745           3 :             bool RegIsKill = hasTrivialKill(LHS);
     746           6 :             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
     747             :                                              AArch64::sub_32);
     748             :             Addr.setOffsetReg(Reg);
     749           3 :             return true;
     750             :           }
     751             :       }
     752             : 
     753          20 :     unsigned Reg = getRegForValue(Src);
     754          20 :     if (!Reg)
     755             :       return false;
     756             :     Addr.setOffsetReg(Reg);
     757          20 :     return true;
     758             :   }
     759          13 :   case Instruction::Mul: {
     760          13 :     if (Addr.getOffsetReg())
     761             :       break;
     762             : 
     763          13 :     if (!isMulPowOf2(U))
     764             :       break;
     765             : 
     766             :     const Value *LHS = U->getOperand(0);
     767             :     const Value *RHS = U->getOperand(1);
     768             : 
     769             :     // Canonicalize power-of-2 value to the RHS.
     770             :     if (const auto *C = dyn_cast<ConstantInt>(LHS))
     771           0 :       if (C->getValue().isPowerOf2())
     772             :         std::swap(LHS, RHS);
     773             : 
     774             :     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
     775             :     const auto *C = cast<ConstantInt>(RHS);
     776             :     unsigned Val = C->getValue().logBase2();
     777          13 :     if (Val < 1 || Val > 3)
     778             :       break;
     779             : 
     780             :     uint64_t NumBytes = 0;
     781          13 :     if (Ty && Ty->isSized()) {
     782          13 :       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
     783          13 :       NumBytes = NumBits / 8;
     784             :       if (!isPowerOf2_64(NumBits))
     785             :         NumBytes = 0;
     786             :     }
     787             : 
     788          13 :     if (NumBytes != (1ULL << Val))
     789             :       break;
     790             : 
     791             :     Addr.setShift(Val);
     792             :     Addr.setExtendType(AArch64_AM::LSL);
     793             : 
     794             :     const Value *Src = LHS;
     795             :     if (const auto *I = dyn_cast<Instruction>(Src)) {
     796          18 :       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
     797             :         // Fold the zext or sext when it won't become a noop.
     798             :         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
     799           7 :           if (!isIntExtFree(ZE) &&
     800           3 :               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
     801             :             Addr.setExtendType(AArch64_AM::UXTW);
     802             :             Src = ZE->getOperand(0);
     803             :           }
     804             :         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
     805           7 :           if (!isIntExtFree(SE) &&
     806           3 :               SE->getOperand(0)->getType()->isIntegerTy(32)) {
     807             :             Addr.setExtendType(AArch64_AM::SXTW);
     808             :             Src = SE->getOperand(0);
     809             :           }
     810             :         }
     811             :       }
     812             :     }
     813             : 
     814          13 :     unsigned Reg = getRegForValue(Src);
     815          13 :     if (!Reg)
     816             :       return false;
     817             :     Addr.setOffsetReg(Reg);
     818          13 :     return true;
     819             :   }
     820           3 :   case Instruction::And: {
     821           3 :     if (Addr.getOffsetReg())
     822             :       break;
     823             : 
     824           3 :     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
     825             :       break;
     826             : 
     827             :     const Value *LHS = U->getOperand(0);
     828             :     const Value *RHS = U->getOperand(1);
     829             : 
     830             :     if (const auto *C = dyn_cast<ConstantInt>(LHS))
     831           0 :       if (C->getValue() == 0xffffffff)
     832             :         std::swap(LHS, RHS);
     833             : 
     834             :     if (const auto *C = dyn_cast<ConstantInt>(RHS))
     835           1 :       if (C->getValue() == 0xffffffff) {
     836             :         Addr.setShift(0);
     837             :         Addr.setExtendType(AArch64_AM::LSL);
     838             :         Addr.setExtendType(AArch64_AM::UXTW);
     839             : 
     840           1 :         unsigned Reg = getRegForValue(LHS);
     841           1 :         if (!Reg)
     842             :           return false;
     843           1 :         bool RegIsKill = hasTrivialKill(LHS);
     844           2 :         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
     845             :                                          AArch64::sub_32);
     846             :         Addr.setOffsetReg(Reg);
     847           1 :         return true;
     848             :       }
     849             :     break;
     850             :   }
     851          20 :   case Instruction::SExt:
     852             :   case Instruction::ZExt: {
     853          20 :     if (!Addr.getReg() || Addr.getOffsetReg())
     854             :       break;
     855             : 
     856             :     const Value *Src = nullptr;
     857             :     // Fold the zext or sext when it won't become a noop.
     858             :     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
     859           0 :       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
     860             :         Addr.setExtendType(AArch64_AM::UXTW);
     861             :         Src = ZE->getOperand(0);
     862             :       }
     863             :     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
     864          40 :       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
     865             :         Addr.setExtendType(AArch64_AM::SXTW);
     866             :         Src = SE->getOperand(0);
     867             :       }
     868             :     }
     869             : 
     870          20 :     if (!Src)
     871             :       break;
     872             : 
     873             :     Addr.setShift(0);
     874          20 :     unsigned Reg = getRegForValue(Src);
     875          20 :     if (!Reg)
     876             :       return false;
     877             :     Addr.setOffsetReg(Reg);
     878          20 :     return true;
     879             :   }
     880             :   } // end switch
     881             : 
     882         593 :   if (Addr.isRegBase() && !Addr.getReg()) {
     883         559 :     unsigned Reg = getRegForValue(Obj);
     884         559 :     if (!Reg)
     885             :       return false;
     886             :     Addr.setReg(Reg);
     887         551 :     return true;
     888             :   }
     889             : 
     890          34 :   if (!Addr.getOffsetReg()) {
     891          34 :     unsigned Reg = getRegForValue(Obj);
     892          34 :     if (!Reg)
     893             :       return false;
     894             :     Addr.setOffsetReg(Reg);
     895          34 :     return true;
     896             :   }
     897             : 
     898             :   return false;
     899             : }
     900             : 
     901         107 : bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
     902             :   const User *U = nullptr;
     903             :   unsigned Opcode = Instruction::UserOp1;
     904             :   bool InMBB = true;
     905             : 
     906             :   if (const auto *I = dyn_cast<Instruction>(V)) {
     907             :     Opcode = I->getOpcode();
     908             :     U = I;
     909          15 :     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
     910             :   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
     911             :     Opcode = C->getOpcode();
     912             :     U = C;
     913             :   }
     914             : 
     915         122 :   switch (Opcode) {
     916             :   default: break;
     917           2 :   case Instruction::BitCast:
     918             :     // Look past bitcasts if its operand is in the same BB.
     919           2 :     if (InMBB)
     920           2 :       return computeCallAddress(U->getOperand(0), Addr);
     921             :     break;
     922          13 :   case Instruction::IntToPtr:
     923             :     // Look past no-op inttoptrs if its operand is in the same BB.
     924          13 :     if (InMBB &&
     925          39 :         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
     926             :             TLI.getPointerTy(DL))
     927          13 :       return computeCallAddress(U->getOperand(0), Addr);
     928             :     break;
     929           0 :   case Instruction::PtrToInt:
     930             :     // Look past no-op ptrtoints if its operand is in the same BB.
     931           0 :     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
     932           0 :       return computeCallAddress(U->getOperand(0), Addr);
     933             :     break;
     934             :   }
     935             : 
     936             :   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     937             :     Addr.setGlobalValue(GV);
     938          87 :     return true;
     939             :   }
     940             : 
     941             :   // If all else fails, try to materialize the value in a register.
     942          20 :   if (!Addr.getGlobalValue()) {
     943          20 :     Addr.setReg(getRegForValue(V));
     944          20 :     return Addr.getReg() != 0;
     945             :   }
     946             : 
     947             :   return false;
     948             : }
     949             : 
     950        3557 : bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
     951        3557 :   EVT evt = TLI.getValueType(DL, Ty, true);
     952             : 
     953             :   // Only handle simple types.
     954        3547 :   if (evt == MVT::Other || !evt.isSimple())
     955             :     return false;
     956        3546 :   VT = evt.getSimpleVT();
     957             : 
     958             :   // This is a legal type, but it's not something we handle in fast-isel.
     959        3546 :   if (VT == MVT::f128)
     960             :     return false;
     961             : 
     962             :   // Handle all other legal types, i.e. a register that will directly hold this
     963             :   // value.
     964        3520 :   return TLI.isTypeLegal(VT);
     965             : }
     966             : 
     967             : /// \brief Determine if the value type is supported by FastISel.
     968             : ///
     969             : /// FastISel for AArch64 can handle more value types than are legal. This adds
     970             : /// simple value type such as i1, i8, and i16.
     971        1833 : bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
     972        1833 :   if (Ty->isVectorTy() && !IsVectorAllowed)
     973             :     return false;
     974             : 
     975        1827 :   if (isTypeLegal(Ty, VT))
     976             :     return true;
     977             : 
     978             :   // If this is a type than can be sign or zero-extended to a basic operation
     979             :   // go ahead and accept it now.
     980         483 :   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
     981             :     return true;
     982             : 
     983             :   return false;
     984             : }
     985             : 
     986        1281 : bool AArch64FastISel::isValueAvailable(const Value *V) const {
     987        1281 :   if (!isa<Instruction>(V))
     988             :     return true;
     989             : 
     990             :   const auto *I = cast<Instruction>(V);
     991        1552 :   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
     992             : }
     993             : 
     994         769 : bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
     995             :   unsigned ScaleFactor = getImplicitScaleFactor(VT);
     996         545 :   if (!ScaleFactor)
     997             :     return false;
     998             : 
     999             :   bool ImmediateOffsetNeedsLowering = false;
    1000             :   bool RegisterOffsetNeedsLowering = false;
    1001         545 :   int64_t Offset = Addr.getOffset();
    1002         581 :   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
    1003             :     ImmediateOffsetNeedsLowering = true;
    1004         646 :   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
    1005         105 :            !isUInt<12>(Offset / ScaleFactor))
    1006             :     ImmediateOffsetNeedsLowering = true;
    1007             : 
    1008             :   // Cannot encode an offset register and an immediate offset in the same
    1009             :   // instruction. Fold the immediate offset into the load/store instruction and
    1010             :   // emit an additional add to take care of the offset register.
    1011         535 :   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
    1012             :     RegisterOffsetNeedsLowering = true;
    1013             : 
    1014             :   // Cannot encode zero register as base.
    1015         545 :   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
    1016             :     RegisterOffsetNeedsLowering = true;
    1017             : 
    1018             :   // If this is a stack pointer and the offset needs to be simplified then put
    1019             :   // the alloca address into a register, set the base type back to register and
    1020             :   // continue. This should almost never happen.
    1021         545 :   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
    1022             :   {
    1023           3 :     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
    1024           6 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
    1025           6 :             ResultReg)
    1026           3 :       .addFrameIndex(Addr.getFI())
    1027             :       .addImm(0)
    1028             :       .addImm(0);
    1029             :     Addr.setKind(Address::RegBase);
    1030             :     Addr.setReg(ResultReg);
    1031             :   }
    1032             : 
    1033         545 :   if (RegisterOffsetNeedsLowering) {
    1034             :     unsigned ResultReg = 0;
    1035           7 :     if (Addr.getReg()) {
    1036           4 :       if (Addr.getExtendType() == AArch64_AM::SXTW ||
    1037             :           Addr.getExtendType() == AArch64_AM::UXTW   )
    1038           2 :         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
    1039             :                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
    1040             :                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
    1041           1 :                                   Addr.getShift());
    1042             :       else
    1043           6 :         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
    1044             :                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
    1045             :                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
    1046           3 :                                   Addr.getShift());
    1047             :     } else {
    1048           3 :       if (Addr.getExtendType() == AArch64_AM::UXTW)
    1049           0 :         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
    1050           0 :                                /*Op0IsKill=*/false, Addr.getShift(),
    1051             :                                /*IsZExt=*/true);
    1052           3 :       else if (Addr.getExtendType() == AArch64_AM::SXTW)
    1053           2 :         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
    1054           1 :                                /*Op0IsKill=*/false, Addr.getShift(),
    1055             :                                /*IsZExt=*/false);
    1056             :       else
    1057           4 :         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
    1058           2 :                                /*Op0IsKill=*/false, Addr.getShift());
    1059             :     }
    1060           7 :     if (!ResultReg)
    1061             :       return false;
    1062             : 
    1063             :     Addr.setReg(ResultReg);
    1064             :     Addr.setOffsetReg(0);
    1065             :     Addr.setShift(0);
    1066             :     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
    1067             :   }
    1068             : 
    1069             :   // Since the offset is too large for the load/store instruction get the
    1070             :   // reg+offset into a register.
    1071         545 :   if (ImmediateOffsetNeedsLowering) {
    1072             :     unsigned ResultReg;
    1073          10 :     if (Addr.getReg())
    1074             :       // Try to fold the immediate into the add instruction.
    1075          10 :       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
    1076             :     else
    1077           0 :       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
    1078             : 
    1079          10 :     if (!ResultReg)
    1080             :       return false;
    1081             :     Addr.setReg(ResultReg);
    1082             :     Addr.setOffset(0);
    1083             :   }
    1084             :   return true;
    1085             : }
    1086             : 
    1087         543 : void AArch64FastISel::addLoadStoreOperands(Address &Addr,
    1088             :                                            const MachineInstrBuilder &MIB,
    1089             :                                            MachineMemOperand::Flags Flags,
    1090             :                                            unsigned ScaleFactor,
    1091             :                                            MachineMemOperand *MMO) {
    1092         543 :   int64_t Offset = Addr.getOffset() / ScaleFactor;
    1093             :   // Frame base works a bit differently. Handle it separately.
    1094         543 :   if (Addr.isFIBase()) {
    1095         134 :     int FI = Addr.getFI();
    1096             :     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
    1097             :     // and alignment should be based on the VT.
    1098         402 :     MMO = FuncInfo.MF->getMachineMemOperand(
    1099         134 :         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
    1100         134 :         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
    1101             :     // Now add the rest of the operands.
    1102             :     MIB.addFrameIndex(FI).addImm(Offset);
    1103             :   } else {
    1104             :     assert(Addr.isRegBase() && "Unexpected address kind.");
    1105         409 :     const MCInstrDesc &II = MIB->getDesc();
    1106         409 :     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
    1107         818 :     Addr.setReg(
    1108         409 :       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
    1109         409 :     Addr.setOffsetReg(
    1110         818 :       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
    1111         409 :     if (Addr.getOffsetReg()) {
    1112             :       assert(Addr.getOffset() == 0 && "Unexpected offset");
    1113          84 :       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
    1114             :                       Addr.getExtendType() == AArch64_AM::SXTX;
    1115          84 :       MIB.addReg(Addr.getReg());
    1116          84 :       MIB.addReg(Addr.getOffsetReg());
    1117          84 :       MIB.addImm(IsSigned);
    1118          84 :       MIB.addImm(Addr.getShift() != 0);
    1119             :     } else
    1120         325 :       MIB.addReg(Addr.getReg()).addImm(Offset);
    1121             :   }
    1122             : 
    1123         543 :   if (MMO)
    1124             :     MIB.addMemOperand(MMO);
    1125         543 : }
    1126             : 
    1127         286 : unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
    1128             :                                      const Value *RHS, bool SetFlags,
    1129             :                                      bool WantResult,  bool IsZExt) {
    1130             :   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
    1131             :   bool NeedExtend = false;
    1132         286 :   switch (RetVT.SimpleTy) {
    1133             :   default:
    1134             :     return 0;
    1135           2 :   case MVT::i1:
    1136             :     NeedExtend = true;
    1137           2 :     break;
    1138           3 :   case MVT::i8:
    1139             :     NeedExtend = true;
    1140           3 :     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
    1141             :     break;
    1142           5 :   case MVT::i16:
    1143             :     NeedExtend = true;
    1144           5 :     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
    1145             :     break;
    1146             :   case MVT::i32:  // fall-through
    1147             :   case MVT::i64:
    1148             :     break;
    1149             :   }
    1150         286 :   MVT SrcVT = RetVT;
    1151         572 :   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
    1152             : 
    1153             :   // Canonicalize immediates to the RHS first.
    1154         496 :   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
    1155             :     std::swap(LHS, RHS);
    1156             : 
    1157             :   // Canonicalize mul by power of 2 to the RHS.
    1158         496 :   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
    1159         175 :     if (isMulPowOf2(LHS))
    1160             :       std::swap(LHS, RHS);
    1161             : 
    1162             :   // Canonicalize shift immediate to the RHS.
    1163         496 :   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
    1164             :     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
    1165          79 :       if (isa<ConstantInt>(SI->getOperand(1)))
    1166           4 :         if (SI->getOpcode() == Instruction::Shl  ||
    1167           7 :             SI->getOpcode() == Instruction::LShr ||
    1168             :             SI->getOpcode() == Instruction::AShr   )
    1169             :           std::swap(LHS, RHS);
    1170             : 
    1171         286 :   unsigned LHSReg = getRegForValue(LHS);
    1172         286 :   if (!LHSReg)
    1173             :     return 0;
    1174         286 :   bool LHSIsKill = hasTrivialKill(LHS);
    1175             : 
    1176         286 :   if (NeedExtend)
    1177          10 :     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
    1178             : 
    1179             :   unsigned ResultReg = 0;
    1180             :   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
    1181          69 :     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
    1182          35 :     if (C->isNegative())
    1183           6 :       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
    1184             :                                 SetFlags, WantResult);
    1185             :     else
    1186          29 :       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
    1187             :                                 WantResult);
    1188             :   } else if (const auto *C = dyn_cast<Constant>(RHS))
    1189           2 :     if (C->isNullValue())
    1190           2 :       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
    1191             :                                 WantResult);
    1192             : 
    1193          37 :   if (ResultReg)
    1194             :     return ResultReg;
    1195             : 
    1196             :   // Only extend the RHS within the instruction if there is a valid extend type.
    1197         264 :   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
    1198           6 :       isValueAvailable(RHS)) {
    1199             :     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
    1200             :       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
    1201           0 :         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
    1202           0 :           unsigned RHSReg = getRegForValue(SI->getOperand(0));
    1203           0 :           if (!RHSReg)
    1204             :             return 0;
    1205           0 :           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
    1206           0 :           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
    1207             :                                RHSIsKill, ExtendType, C->getZExtValue(),
    1208           0 :                                SetFlags, WantResult);
    1209             :         }
    1210           6 :     unsigned RHSReg = getRegForValue(RHS);
    1211           6 :     if (!RHSReg)
    1212             :       return 0;
    1213           6 :     bool RHSIsKill = hasTrivialKill(RHS);
    1214           6 :     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
    1215           6 :                          ExtendType, 0, SetFlags, WantResult);
    1216             :   }
    1217             : 
    1218             :   // Check if the mul can be folded into the instruction.
    1219         246 :   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
    1220         208 :     if (isMulPowOf2(RHS)) {
    1221           0 :       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
    1222             :       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
    1223             : 
    1224             :       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
    1225           0 :         if (C->getValue().isPowerOf2())
    1226             :           std::swap(MulLHS, MulRHS);
    1227             : 
    1228             :       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
    1229           0 :       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
    1230           0 :       unsigned RHSReg = getRegForValue(MulLHS);
    1231           0 :       if (!RHSReg)
    1232             :         return 0;
    1233           0 :       bool RHSIsKill = hasTrivialKill(MulLHS);
    1234           0 :       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
    1235             :                                 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
    1236             :                                 WantResult);
    1237           0 :       if (ResultReg)
    1238             :         return ResultReg;
    1239             :     }
    1240             :   }
    1241             : 
    1242             :   // Check if the shift can be folded into the instruction.
    1243         246 :   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
    1244             :     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
    1245             :       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
    1246             :         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
    1247             :         switch (SI->getOpcode()) {
    1248             :         default: break;
    1249             :         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
    1250             :         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
    1251             :         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
    1252             :         }
    1253             :         uint64_t ShiftVal = C->getZExtValue();
    1254          12 :         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
    1255          12 :           unsigned RHSReg = getRegForValue(SI->getOperand(0));
    1256          12 :           if (!RHSReg)
    1257             :             return 0;
    1258          12 :           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
    1259          12 :           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
    1260             :                                     RHSIsKill, ShiftType, ShiftVal, SetFlags,
    1261             :                                     WantResult);
    1262          12 :           if (ResultReg)
    1263             :             return ResultReg;
    1264             :         }
    1265             :       }
    1266             :     }
    1267             :   }
    1268             : 
    1269         236 :   unsigned RHSReg = getRegForValue(RHS);
    1270         236 :   if (!RHSReg)
    1271             :     return 0;
    1272         236 :   bool RHSIsKill = hasTrivialKill(RHS);
    1273             : 
    1274         236 :   if (NeedExtend)
    1275           1 :     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
    1276             : 
    1277         236 :   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
    1278         236 :                        SetFlags, WantResult);
    1279             : }
    1280             : 
    1281         245 : unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
    1282             :                                         bool LHSIsKill, unsigned RHSReg,
    1283             :                                         bool RHSIsKill, bool SetFlags,
    1284             :                                         bool WantResult) {
    1285             :   assert(LHSReg && RHSReg && "Invalid register number.");
    1286             : 
    1287         245 :   if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
    1288         243 :       RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
    1289             :     return 0;
    1290             : 
    1291         243 :   if (RetVT != MVT::i32 && RetVT != MVT::i64)
    1292             :     return 0;
    1293             : 
    1294             :   static const unsigned OpcTable[2][2][2] = {
    1295             :     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
    1296             :       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
    1297             :     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
    1298             :       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
    1299             :   };
    1300             :   bool Is64Bit = RetVT == MVT::i64;
    1301         243 :   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
    1302             :   const TargetRegisterClass *RC =
    1303         243 :       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    1304             :   unsigned ResultReg;
    1305         243 :   if (WantResult)
    1306         200 :     ResultReg = createResultReg(RC);
    1307             :   else
    1308          43 :     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
    1309             : 
    1310         243 :   const MCInstrDesc &II = TII.get(Opc);
    1311         486 :   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
    1312         486 :   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
    1313         486 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    1314         243 :       .addReg(LHSReg, getKillRegState(LHSIsKill))
    1315         243 :       .addReg(RHSReg, getKillRegState(RHSIsKill));
    1316         243 :   return ResultReg;
    1317             : }
    1318             : 
    1319          82 : unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
    1320             :                                         bool LHSIsKill, uint64_t Imm,
    1321             :                                         bool SetFlags, bool WantResult) {
    1322             :   assert(LHSReg && "Invalid register number.");
    1323             : 
    1324          82 :   if (RetVT != MVT::i32 && RetVT != MVT::i64)
    1325             :     return 0;
    1326             : 
    1327             :   unsigned ShiftImm;
    1328          82 :   if (isUInt<12>(Imm))
    1329             :     ShiftImm = 0;
    1330          12 :   else if ((Imm & 0xfff000) == Imm) {
    1331             :     ShiftImm = 12;
    1332           4 :     Imm >>= 12;
    1333             :   } else
    1334             :     return 0;
    1335             : 
    1336             :   static const unsigned OpcTable[2][2][2] = {
    1337             :     { { AArch64::SUBWri,  AArch64::SUBXri  },
    1338             :       { AArch64::ADDWri,  AArch64::ADDXri  }  },
    1339             :     { { AArch64::SUBSWri, AArch64::SUBSXri },
    1340             :       { AArch64::ADDSWri, AArch64::ADDSXri }  }
    1341             :   };
    1342             :   bool Is64Bit = RetVT == MVT::i64;
    1343          74 :   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
    1344             :   const TargetRegisterClass *RC;
    1345          74 :   if (SetFlags)
    1346          32 :     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    1347             :   else
    1348          42 :     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
    1349             :   unsigned ResultReg;
    1350          74 :   if (WantResult)
    1351          47 :     ResultReg = createResultReg(RC);
    1352             :   else
    1353          27 :     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
    1354             : 
    1355          74 :   const MCInstrDesc &II = TII.get(Opc);
    1356         148 :   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
    1357         148 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    1358          74 :       .addReg(LHSReg, getKillRegState(LHSIsKill))
    1359          74 :       .addImm(Imm)
    1360          74 :       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
    1361          74 :   return ResultReg;
    1362             : }
    1363             : 
    1364          28 : unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
    1365             :                                         bool LHSIsKill, unsigned RHSReg,
    1366             :                                         bool RHSIsKill,
    1367             :                                         AArch64_AM::ShiftExtendType ShiftType,
    1368             :                                         uint64_t ShiftImm, bool SetFlags,
    1369             :                                         bool WantResult) {
    1370             :   assert(LHSReg && RHSReg && "Invalid register number.");
    1371             :   assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
    1372             :          RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
    1373             : 
    1374          28 :   if (RetVT != MVT::i32 && RetVT != MVT::i64)
    1375             :     return 0;
    1376             : 
    1377             :   // Don't deal with undefined shifts.
    1378          28 :   if (ShiftImm >= RetVT.getSizeInBits())
    1379             :     return 0;
    1380             : 
    1381             :   static const unsigned OpcTable[2][2][2] = {
    1382             :     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
    1383             :       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
    1384             :     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
    1385             :       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
    1386             :   };
    1387             :   bool Is64Bit = RetVT == MVT::i64;
    1388          26 :   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
    1389             :   const TargetRegisterClass *RC =
    1390          26 :       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    1391             :   unsigned ResultReg;
    1392          26 :   if (WantResult)
    1393          17 :     ResultReg = createResultReg(RC);
    1394             :   else
    1395           9 :     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
    1396             : 
    1397          26 :   const MCInstrDesc &II = TII.get(Opc);
    1398          52 :   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
    1399          52 :   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
    1400          52 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    1401          26 :       .addReg(LHSReg, getKillRegState(LHSIsKill))
    1402          26 :       .addReg(RHSReg, getKillRegState(RHSIsKill))
    1403          52 :       .addImm(getShifterImm(ShiftType, ShiftImm));
    1404          26 :   return ResultReg;
    1405             : }
    1406             : 
    1407           7 : unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
    1408             :                                         bool LHSIsKill, unsigned RHSReg,
    1409             :                                         bool RHSIsKill,
    1410             :                                         AArch64_AM::ShiftExtendType ExtType,
    1411             :                                         uint64_t ShiftImm, bool SetFlags,
    1412             :                                         bool WantResult) {
    1413             :   assert(LHSReg && RHSReg && "Invalid register number.");
    1414             :   assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
    1415             :          RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
    1416             : 
    1417           7 :   if (RetVT != MVT::i32 && RetVT != MVT::i64)
    1418             :     return 0;
    1419             : 
    1420           7 :   if (ShiftImm >= 4)
    1421             :     return 0;
    1422             : 
    1423             :   static const unsigned OpcTable[2][2][2] = {
    1424             :     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
    1425             :       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
    1426             :     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
    1427             :       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
    1428             :   };
    1429             :   bool Is64Bit = RetVT == MVT::i64;
    1430           7 :   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
    1431             :   const TargetRegisterClass *RC = nullptr;
    1432           7 :   if (SetFlags)
    1433           5 :     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    1434             :   else
    1435           2 :     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
    1436             :   unsigned ResultReg;
    1437           7 :   if (WantResult)
    1438           2 :     ResultReg = createResultReg(RC);
    1439             :   else
    1440           5 :     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
    1441             : 
    1442           7 :   const MCInstrDesc &II = TII.get(Opc);
    1443          14 :   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
    1444          14 :   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
    1445          14 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    1446           7 :       .addReg(LHSReg, getKillRegState(LHSIsKill))
    1447           7 :       .addReg(RHSReg, getKillRegState(RHSIsKill))
    1448          14 :       .addImm(getArithExtendImm(ExtType, ShiftImm));
    1449           7 :   return ResultReg;
    1450             : }
    1451             : 
    1452         107 : bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
    1453         107 :   Type *Ty = LHS->getType();
    1454         107 :   EVT EVT = TLI.getValueType(DL, Ty, true);
    1455         107 :   if (!EVT.isSimple())
    1456             :     return false;
    1457             :   MVT VT = EVT.getSimpleVT();
    1458             : 
    1459         107 :   switch (VT.SimpleTy) {
    1460             :   default:
    1461             :     return false;
    1462          60 :   case MVT::i1:
    1463             :   case MVT::i8:
    1464             :   case MVT::i16:
    1465             :   case MVT::i32:
    1466             :   case MVT::i64:
    1467         120 :     return emitICmp(VT, LHS, RHS, IsZExt);
    1468          47 :   case MVT::f32:
    1469             :   case MVT::f64:
    1470          47 :     return emitFCmp(VT, LHS, RHS);
    1471             :   }
    1472             : }
    1473             : 
    1474             : bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
    1475             :                                bool IsZExt) {
    1476             :   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
    1477          60 :                  IsZExt) != 0;
    1478             : }
    1479             : 
    1480             : bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
    1481             :                                   uint64_t Imm) {
    1482          11 :   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
    1483             :                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
    1484             : }
    1485             : 
    1486          47 : bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
    1487          47 :   if (RetVT != MVT::f32 && RetVT != MVT::f64)
    1488             :     return false;
    1489             : 
    1490             :   // Check to see if the 2nd operand is a constant that we can encode directly
    1491             :   // in the compare.
    1492             :   bool UseImm = false;
    1493             :   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
    1494           6 :     if (CFP->isZero() && !CFP->isNegative())
    1495             :       UseImm = true;
    1496             : 
    1497          47 :   unsigned LHSReg = getRegForValue(LHS);
    1498          47 :   if (!LHSReg)
    1499             :     return false;
    1500          47 :   bool LHSIsKill = hasTrivialKill(LHS);
    1501             : 
    1502          47 :   if (UseImm) {
    1503           2 :     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
    1504           6 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
    1505           2 :         .addReg(LHSReg, getKillRegState(LHSIsKill));
    1506           2 :     return true;
    1507             :   }
    1508             : 
    1509          45 :   unsigned RHSReg = getRegForValue(RHS);
    1510          45 :   if (!RHSReg)
    1511             :     return false;
    1512          45 :   bool RHSIsKill = hasTrivialKill(RHS);
    1513             : 
    1514          45 :   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
    1515         135 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
    1516          45 :       .addReg(LHSReg, getKillRegState(LHSIsKill))
    1517          45 :       .addReg(RHSReg, getKillRegState(RHSIsKill));
    1518          45 :   return true;
    1519             : }
    1520             : 
    1521             : unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
    1522             :                                   bool SetFlags, bool WantResult, bool IsZExt) {
    1523             :   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
    1524         210 :                     IsZExt);
    1525             : }
    1526             : 
    1527             : /// \brief This method is a wrapper to simplify add emission.
    1528             : ///
    1529             : /// First try to emit an add with an immediate operand using emitAddSub_ri. If
    1530             : /// that fails, then try to materialize the immediate into a register and use
    1531             : /// emitAddSub_rr instead.
    1532          34 : unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
    1533             :                                       int64_t Imm) {
    1534             :   unsigned ResultReg;
    1535          34 :   if (Imm < 0)
    1536           2 :     ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
    1537             :   else
    1538          32 :     ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
    1539             : 
    1540          34 :   if (ResultReg)
    1541             :     return ResultReg;
    1542             : 
    1543           5 :   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
    1544           5 :   if (!CReg)
    1545             :     return 0;
    1546             : 
    1547           5 :   ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
    1548           5 :   return ResultReg;
    1549             : }
    1550             : 
    1551             : unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
    1552             :                                   bool SetFlags, bool WantResult, bool IsZExt) {
    1553             :   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
    1554          76 :                     IsZExt);
    1555             : }
    1556             : 
    1557             : unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
    1558             :                                       bool LHSIsKill, unsigned RHSReg,
    1559             :                                       bool RHSIsKill, bool WantResult) {
    1560             :   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
    1561           4 :                        RHSIsKill, /*SetFlags=*/true, WantResult);
    1562             : }
    1563             : 
    1564             : unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
    1565             :                                       bool LHSIsKill, unsigned RHSReg,
    1566             :                                       bool RHSIsKill,
    1567             :                                       AArch64_AM::ShiftExtendType ShiftType,
    1568             :                                       uint64_t ShiftImm, bool WantResult) {
    1569             :   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
    1570             :                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
    1571           9 :                        WantResult);
    1572             : }
    1573             : 
    1574          89 : unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
    1575             :                                         const Value *LHS, const Value *RHS) {
    1576             :   // Canonicalize immediates to the RHS first.
    1577          89 :   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
    1578             :     std::swap(LHS, RHS);
    1579             : 
    1580             :   // Canonicalize mul by power-of-2 to the RHS.
    1581          89 :   if (LHS->hasOneUse() && isValueAvailable(LHS))
    1582          89 :     if (isMulPowOf2(LHS))
    1583             :       std::swap(LHS, RHS);
    1584             : 
    1585             :   // Canonicalize shift immediate to the RHS.
    1586          89 :   if (LHS->hasOneUse() && isValueAvailable(LHS))
    1587             :     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
    1588           0 :       if (isa<ConstantInt>(SI->getOperand(1)))
    1589             :         std::swap(LHS, RHS);
    1590             : 
    1591          89 :   unsigned LHSReg = getRegForValue(LHS);
    1592          89 :   if (!LHSReg)
    1593             :     return 0;
    1594          89 :   bool LHSIsKill = hasTrivialKill(LHS);
    1595             : 
    1596             :   unsigned ResultReg = 0;
    1597             :   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
    1598             :     uint64_t Imm = C->getZExtValue();
    1599          18 :     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
    1600             :   }
    1601          18 :   if (ResultReg)
    1602             :     return ResultReg;
    1603             : 
    1604             :   // Check if the mul can be folded into the instruction.
    1605          71 :   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
    1606          71 :     if (isMulPowOf2(RHS)) {
    1607           6 :       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
    1608             :       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
    1609             : 
    1610             :       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
    1611           0 :         if (C->getValue().isPowerOf2())
    1612             :           std::swap(MulLHS, MulRHS);
    1613             : 
    1614             :       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
    1615           6 :       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
    1616             : 
    1617           6 :       unsigned RHSReg = getRegForValue(MulLHS);
    1618           6 :       if (!RHSReg)
    1619             :         return 0;
    1620           6 :       bool RHSIsKill = hasTrivialKill(MulLHS);
    1621           6 :       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
    1622             :                                    RHSIsKill, ShiftVal);
    1623           6 :       if (ResultReg)
    1624             :         return ResultReg;
    1625             :     }
    1626             :   }
    1627             : 
    1628             :   // Check if the shift can be folded into the instruction.
    1629          65 :   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
    1630             :     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
    1631          24 :       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
    1632             :         uint64_t ShiftVal = C->getZExtValue();
    1633          24 :         unsigned RHSReg = getRegForValue(SI->getOperand(0));
    1634          24 :         if (!RHSReg)
    1635             :           return 0;
    1636          24 :         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
    1637          24 :         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
    1638             :                                      RHSIsKill, ShiftVal);
    1639          24 :         if (ResultReg)
    1640             :           return ResultReg;
    1641             :       }
    1642             :   }
    1643             : 
    1644          53 :   unsigned RHSReg = getRegForValue(RHS);
    1645          53 :   if (!RHSReg)
    1646             :     return 0;
    1647          53 :   bool RHSIsKill = hasTrivialKill(RHS);
    1648             : 
    1649         159 :   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
    1650          53 :   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
    1651          53 :   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
    1652          12 :     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
    1653             :     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
    1654             :   }
    1655             :   return ResultReg;
    1656             : }
    1657             : 
    1658         203 : unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
    1659             :                                            unsigned LHSReg, bool LHSIsKill,
    1660             :                                            uint64_t Imm) {
    1661             :   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
    1662             :                 "ISD nodes are not consecutive!");
    1663             :   static const unsigned OpcTable[3][2] = {
    1664             :     { AArch64::ANDWri, AArch64::ANDXri },
    1665             :     { AArch64::ORRWri, AArch64::ORRXri },
    1666             :     { AArch64::EORWri, AArch64::EORXri }
    1667             :   };
    1668             :   const TargetRegisterClass *RC;
    1669             :   unsigned Opc;
    1670             :   unsigned RegSize;
    1671         203 :   switch (RetVT.SimpleTy) {
    1672             :   default:
    1673             :     return 0;
    1674         197 :   case MVT::i1:
    1675             :   case MVT::i8:
    1676             :   case MVT::i16:
    1677             :   case MVT::i32: {
    1678         197 :     unsigned Idx = ISDOpc - ISD::AND;
    1679         197 :     Opc = OpcTable[Idx][0];
    1680             :     RC = &AArch64::GPR32spRegClass;
    1681             :     RegSize = 32;
    1682         197 :     break;
    1683             :   }
    1684           6 :   case MVT::i64:
    1685           6 :     Opc = OpcTable[ISDOpc - ISD::AND][1];
    1686             :     RC = &AArch64::GPR64spRegClass;
    1687             :     RegSize = 64;
    1688           6 :     break;
    1689             :   }
    1690             : 
    1691         203 :   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
    1692             :     return 0;
    1693             : 
    1694             :   unsigned ResultReg =
    1695         406 :       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
    1696         203 :                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
    1697         203 :   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
    1698           4 :     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
    1699           4 :     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
    1700             :   }
    1701             :   return ResultReg;
    1702             : }
    1703             : 
    1704          30 : unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
    1705             :                                            unsigned LHSReg, bool LHSIsKill,
    1706             :                                            unsigned RHSReg, bool RHSIsKill,
    1707             :                                            uint64_t ShiftImm) {
    1708             :   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
    1709             :                 "ISD nodes are not consecutive!");
    1710             :   static const unsigned OpcTable[3][2] = {
    1711             :     { AArch64::ANDWrs, AArch64::ANDXrs },
    1712             :     { AArch64::ORRWrs, AArch64::ORRXrs },
    1713             :     { AArch64::EORWrs, AArch64::EORXrs }
    1714             :   };
    1715             : 
    1716             :   // Don't deal with undefined shifts.
    1717          30 :   if (ShiftImm >= RetVT.getSizeInBits())
    1718             :     return 0;
    1719             : 
    1720             :   const TargetRegisterClass *RC;
    1721             :   unsigned Opc;
    1722          18 :   switch (RetVT.SimpleTy) {
    1723             :   default:
    1724             :     return 0;
    1725          12 :   case MVT::i1:
    1726             :   case MVT::i8:
    1727             :   case MVT::i16:
    1728             :   case MVT::i32:
    1729          12 :     Opc = OpcTable[ISDOpc - ISD::AND][0];
    1730             :     RC = &AArch64::GPR32RegClass;
    1731          12 :     break;
    1732           6 :   case MVT::i64:
    1733           6 :     Opc = OpcTable[ISDOpc - ISD::AND][1];
    1734             :     RC = &AArch64::GPR64RegClass;
    1735           6 :     break;
    1736             :   }
    1737             :   unsigned ResultReg =
    1738          36 :       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
    1739          36 :                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
    1740          18 :   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
    1741           6 :     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
    1742             :     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
    1743             :   }
    1744             :   return ResultReg;
    1745             : }
    1746             : 
    1747           4 : unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
    1748             :                                      uint64_t Imm) {
    1749         184 :   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
    1750             : }
    1751             : 
    1752         362 : unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
    1753             :                                    bool WantZExt, MachineMemOperand *MMO) {
    1754         724 :   if (!TLI.allowsMisalignedMemoryAccesses(VT))
    1755             :     return 0;
    1756             : 
    1757             :   // Simplify this down to something we can handle.
    1758         360 :   if (!simplifyAddress(Addr, VT))
    1759             :     return 0;
    1760             : 
    1761             :   unsigned ScaleFactor = getImplicitScaleFactor(VT);
    1762         288 :   if (!ScaleFactor)
    1763           0 :     llvm_unreachable("Unexpected value type.");
    1764             : 
    1765             :   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
    1766             :   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
    1767             :   bool UseScaled = true;
    1768         288 :   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
    1769             :     UseScaled = false;
    1770             :     ScaleFactor = 1;
    1771             :   }
    1772             : 
    1773             :   static const unsigned GPOpcTable[2][8][4] = {
    1774             :     // Sign-extend.
    1775             :     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
    1776             :         AArch64::LDURXi  },
    1777             :       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
    1778             :         AArch64::LDURXi  },
    1779             :       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
    1780             :         AArch64::LDRXui  },
    1781             :       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
    1782             :         AArch64::LDRXui  },
    1783             :       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
    1784             :         AArch64::LDRXroX },
    1785             :       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
    1786             :         AArch64::LDRXroX },
    1787             :       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
    1788             :         AArch64::LDRXroW },
    1789             :       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
    1790             :         AArch64::LDRXroW }
    1791             :     },
    1792             :     // Zero-extend.
    1793             :     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
    1794             :         AArch64::LDURXi  },
    1795             :       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
    1796             :         AArch64::LDURXi  },
    1797             :       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
    1798             :         AArch64::LDRXui  },
    1799             :       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
    1800             :         AArch64::LDRXui  },
    1801             :       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
    1802             :         AArch64::LDRXroX },
    1803             :       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
    1804             :         AArch64::LDRXroX },
    1805             :       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
    1806             :         AArch64::LDRXroW },
    1807             :       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
    1808             :         AArch64::LDRXroW }
    1809             :     }
    1810             :   };
    1811             : 
    1812             :   static const unsigned FPOpcTable[4][2] = {
    1813             :     { AArch64::LDURSi,  AArch64::LDURDi  },
    1814             :     { AArch64::LDRSui,  AArch64::LDRDui  },
    1815             :     { AArch64::LDRSroX, AArch64::LDRDroX },
    1816             :     { AArch64::LDRSroW, AArch64::LDRDroW }
    1817             :   };
    1818             : 
    1819             :   unsigned Opc;
    1820             :   const TargetRegisterClass *RC;
    1821         463 :   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
    1822         175 :                       Addr.getOffsetReg();
    1823         207 :   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
    1824         288 :   if (Addr.getExtendType() == AArch64_AM::UXTW ||
    1825             :       Addr.getExtendType() == AArch64_AM::SXTW)
    1826          39 :     Idx++;
    1827             : 
    1828             :   bool IsRet64Bit = RetVT == MVT::i64;
    1829         288 :   switch (VT.SimpleTy) {
    1830           0 :   default:
    1831           0 :     llvm_unreachable("Unexpected value type.");
    1832          53 :   case MVT::i1: // Intentional fall-through.
    1833             :   case MVT::i8:
    1834          53 :     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
    1835          53 :     RC = (IsRet64Bit && !WantZExt) ?
    1836             :              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
    1837             :     break;
    1838          42 :   case MVT::i16:
    1839          42 :     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
    1840          42 :     RC = (IsRet64Bit && !WantZExt) ?
    1841             :              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
    1842             :     break;
    1843          86 :   case MVT::i32:
    1844          86 :     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
    1845          86 :     RC = (IsRet64Bit && !WantZExt) ?
    1846             :              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
    1847             :     break;
    1848          94 :   case MVT::i64:
    1849          94 :     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
    1850             :     RC = &AArch64::GPR64RegClass;
    1851          94 :     break;
    1852           3 :   case MVT::f32:
    1853           3 :     Opc = FPOpcTable[Idx][0];
    1854             :     RC = &AArch64::FPR32RegClass;
    1855           3 :     break;
    1856          10 :   case MVT::f64:
    1857          10 :     Opc = FPOpcTable[Idx][1];
    1858             :     RC = &AArch64::FPR64RegClass;
    1859          10 :     break;
    1860             :   }
    1861             : 
    1862             :   // Create the base instruction, then add the operands.
    1863         288 :   unsigned ResultReg = createResultReg(RC);
    1864         288 :   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    1865         864 :                                     TII.get(Opc), ResultReg);
    1866         288 :   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
    1867             : 
    1868             :   // Loading an i1 requires special handling.
    1869         288 :   if (VT == MVT::i1) {
    1870             :     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
    1871             :     assert(ANDReg && "Unexpected AND instruction emission failure.");
    1872             :     ResultReg = ANDReg;
    1873             :   }
    1874             : 
    1875             :   // For zero-extending loads to 64bit we emit a 32bit load and then convert
    1876             :   // the 32bit reg to a 64bit reg.
    1877         288 :   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
    1878          20 :     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
    1879          40 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    1880          20 :             TII.get(AArch64::SUBREG_TO_REG), Reg64)
    1881             :         .addImm(0)
    1882          20 :         .addReg(ResultReg, getKillRegState(true))
    1883             :         .addImm(AArch64::sub_32);
    1884             :     ResultReg = Reg64;
    1885             :   }
    1886             :   return ResultReg;
    1887             : }
    1888             : 
    1889         262 : bool AArch64FastISel::selectAddSub(const Instruction *I) {
    1890         262 :   MVT VT;
    1891         262 :   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
    1892             :     return false;
    1893             : 
    1894         262 :   if (VT.isVector())
    1895         142 :     return selectOperator(I, I->getOpcode());
    1896             : 
    1897             :   unsigned ResultReg;
    1898         191 :   switch (I->getOpcode()) {
    1899           0 :   default:
    1900           0 :     llvm_unreachable("Unexpected instruction.");
    1901         188 :   case Instruction::Add:
    1902         188 :     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
    1903         188 :     break;
    1904           3 :   case Instruction::Sub:
    1905           3 :     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
    1906           3 :     break;
    1907             :   }
    1908         191 :   if (!ResultReg)
    1909             :     return false;
    1910             : 
    1911         191 :   updateValueMap(I, ResultReg);
    1912         191 :   return true;
    1913             : }
    1914             : 
    1915          89 : bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
    1916          89 :   MVT VT;
    1917          89 :   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
    1918             :     return false;
    1919             : 
    1920          89 :   if (VT.isVector())
    1921           0 :     return selectOperator(I, I->getOpcode());
    1922             : 
    1923             :   unsigned ResultReg;
    1924          89 :   switch (I->getOpcode()) {
    1925           0 :   default:
    1926           0 :     llvm_unreachable("Unexpected instruction.");
    1927          50 :   case Instruction::And:
    1928         100 :     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
    1929          50 :     break;
    1930          20 :   case Instruction::Or:
    1931          40 :     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
    1932          20 :     break;
    1933          19 :   case Instruction::Xor:
    1934          38 :     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
    1935          19 :     break;
    1936             :   }
    1937          89 :   if (!ResultReg)
    1938             :     return false;
    1939             : 
    1940          89 :   updateValueMap(I, ResultReg);
    1941          89 :   return true;
    1942             : }
    1943             : 
    1944         346 : bool AArch64FastISel::selectLoad(const Instruction *I) {
    1945         346 :   MVT VT;
    1946             :   // Verify we have a legal type before going any further.  Currently, we handle
    1947             :   // simple types that will directly fit in a register (i32/f32/i64/f64) or
    1948             :   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
    1949         687 :   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
    1950         341 :       cast<LoadInst>(I)->isAtomic())
    1951             :     return false;
    1952             : 
    1953         341 :   const Value *SV = I->getOperand(0);
    1954         341 :   if (TLI.supportSwiftError()) {
    1955             :     // Swifterror values can come from either a function parameter with
    1956             :     // swifterror attribute or an alloca with swifterror attribute.
    1957             :     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
    1958         103 :       if (Arg->hasSwiftErrorAttr())
    1959             :         return false;
    1960             :     }
    1961             : 
    1962             :     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
    1963          49 :       if (Alloca->isSwiftError())
    1964             :         return false;
    1965             :     }
    1966             :   }
    1967             : 
    1968             :   // See if we can handle this address.
    1969             :   Address Addr;
    1970         672 :   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
    1971             :     return false;
    1972             : 
    1973             :   // Fold the following sign-/zero-extend into the load instruction.
    1974             :   bool WantZExt = true;
    1975         321 :   MVT RetVT = VT;
    1976             :   const Value *IntExtVal = nullptr;
    1977         321 :   if (I->hasOneUse()) {
    1978         227 :     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
    1979          37 :       if (isTypeSupported(ZE->getType(), RetVT))
    1980             :         IntExtVal = ZE;
    1981             :       else
    1982           0 :         RetVT = VT;
    1983             :     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
    1984          38 :       if (isTypeSupported(SE->getType(), RetVT))
    1985             :         IntExtVal = SE;
    1986             :       else
    1987           0 :         RetVT = VT;
    1988             :       WantZExt = false;
    1989             :     }
    1990             :   }
    1991             : 
    1992             :   unsigned ResultReg =
    1993         321 :       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
    1994         321 :   if (!ResultReg)
    1995             :     return false;
    1996             : 
    1997             :   // There are a few different cases we have to handle, because the load or the
    1998             :   // sign-/zero-extend might not be selected by FastISel if we fall-back to
    1999             :   // SelectionDAG. There is also an ordering issue when both instructions are in
    2000             :   // different basic blocks.
    2001             :   // 1.) The load instruction is selected by FastISel, but the integer extend
    2002             :   //     not. This usually happens when the integer extend is in a different
    2003             :   //     basic block and SelectionDAG took over for that basic block.
    2004             :   // 2.) The load instruction is selected before the integer extend. This only
    2005             :   //     happens when the integer extend is in a different basic block.
    2006             :   // 3.) The load instruction is selected by SelectionDAG and the integer extend
    2007             :   //     by FastISel. This happens if there are instructions between the load
    2008             :   //     and the integer extend that couldn't be selected by FastISel.
    2009         247 :   if (IntExtVal) {
    2010             :     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
    2011             :     // could select it. Emit a copy to subreg if necessary. FastISel will remove
    2012             :     // it when it selects the integer extend.
    2013          75 :     unsigned Reg = lookUpRegForValue(IntExtVal);
    2014          75 :     auto *MI = MRI.getUniqueVRegDef(Reg);
    2015          75 :     if (!MI) {
    2016           2 :       if (RetVT == MVT::i64 && VT <= MVT::i32) {
    2017           1 :         if (WantZExt) {
    2018             :           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
    2019           2 :           std::prev(FuncInfo.InsertPt)->eraseFromParent();
    2020           2 :           ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
    2021             :         } else
    2022           0 :           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
    2023             :                                                  /*IsKill=*/true,
    2024             :                                                  AArch64::sub_32);
    2025             :       }
    2026           1 :       updateValueMap(I, ResultReg);
    2027           1 :       return true;
    2028             :     }
    2029             : 
    2030             :     // The integer extend has already been emitted - delete all the instructions
    2031             :     // that have been emitted by the integer extend lowering code and use the
    2032             :     // result from the load instruction directly.
    2033         186 :     while (MI) {
    2034             :       Reg = 0;
    2035         188 :       for (auto &Opnd : MI->uses()) {
    2036         150 :         if (Opnd.isReg()) {
    2037         112 :           Reg = Opnd.getReg();
    2038         112 :           break;
    2039             :         }
    2040             :       }
    2041         112 :       MI->eraseFromParent();
    2042             :       MI = nullptr;
    2043         112 :       if (Reg)
    2044         112 :         MI = MRI.getUniqueVRegDef(Reg);
    2045             :     }
    2046          74 :     updateValueMap(IntExtVal, ResultReg);
    2047          74 :     return true;
    2048             :   }
    2049             : 
    2050         172 :   updateValueMap(I, ResultReg);
    2051         172 :   return true;
    2052             : }
    2053             : 
    2054          16 : bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
    2055             :                                        unsigned AddrReg,
    2056             :                                        MachineMemOperand *MMO) {
    2057             :   unsigned Opc;
    2058          16 :   switch (VT.SimpleTy) {
    2059             :   default: return false;
    2060             :   case MVT::i8:  Opc = AArch64::STLRB; break;
    2061           4 :   case MVT::i16: Opc = AArch64::STLRH; break;
    2062           4 :   case MVT::i32: Opc = AArch64::STLRW; break;
    2063           4 :   case MVT::i64: Opc = AArch64::STLRX; break;
    2064             :   }
    2065             : 
    2066          16 :   const MCInstrDesc &II = TII.get(Opc);
    2067          16 :   SrcReg = constrainOperandRegClass(II, SrcReg, 0);
    2068          16 :   AddrReg = constrainOperandRegClass(II, AddrReg, 1);
    2069          32 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    2070          16 :       .addReg(SrcReg)
    2071          16 :       .addReg(AddrReg)
    2072             :       .addMemOperand(MMO);
    2073          16 :   return true;
    2074             : }
    2075             : 
    2076         409 : bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
    2077             :                                 MachineMemOperand *MMO) {
    2078         818 :   if (!TLI.allowsMisalignedMemoryAccesses(VT))
    2079             :     return false;
    2080             : 
    2081             :   // Simplify this down to something we can handle.
    2082         409 :   if (!simplifyAddress(Addr, VT))
    2083             :     return false;
    2084             : 
    2085             :   unsigned ScaleFactor = getImplicitScaleFactor(VT);
    2086         255 :   if (!ScaleFactor)
    2087           0 :     llvm_unreachable("Unexpected value type.");
    2088             : 
    2089             :   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
    2090             :   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
    2091             :   bool UseScaled = true;
    2092         255 :   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
    2093             :     UseScaled = false;
    2094             :     ScaleFactor = 1;
    2095             :   }
    2096             : 
    2097             :   static const unsigned OpcTable[4][6] = {
    2098             :     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
    2099             :       AArch64::STURSi,   AArch64::STURDi },
    2100             :     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
    2101             :       AArch64::STRSui,   AArch64::STRDui },
    2102             :     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
    2103             :       AArch64::STRSroX,  AArch64::STRDroX },
    2104             :     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
    2105             :       AArch64::STRSroW,  AArch64::STRDroW }
    2106             :   };
    2107             : 
    2108             :   unsigned Opc;
    2109             :   bool VTIsi1 = false;
    2110         376 :   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
    2111         121 :                       Addr.getOffsetReg();
    2112         252 :   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
    2113         255 :   if (Addr.getExtendType() == AArch64_AM::UXTW ||
    2114             :       Addr.getExtendType() == AArch64_AM::SXTW)
    2115           0 :     Idx++;
    2116             : 
    2117         255 :   switch (VT.SimpleTy) {
    2118           0 :   default: llvm_unreachable("Unexpected value type.");
    2119           8 :   case MVT::i1:  VTIsi1 = true; LLVM_FALLTHROUGH;
    2120          44 :   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
    2121          21 :   case MVT::i16: Opc = OpcTable[Idx][1]; break;
    2122          70 :   case MVT::i32: Opc = OpcTable[Idx][2]; break;
    2123         101 :   case MVT::i64: Opc = OpcTable[Idx][3]; break;
    2124           2 :   case MVT::f32: Opc = OpcTable[Idx][4]; break;
    2125          17 :   case MVT::f64: Opc = OpcTable[Idx][5]; break;
    2126             :   }
    2127             : 
    2128             :   // Storing an i1 requires special handling.
    2129         255 :   if (VTIsi1 && SrcReg != AArch64::WZR) {
    2130             :     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
    2131             :     assert(ANDReg && "Unexpected AND instruction emission failure.");
    2132             :     SrcReg = ANDReg;
    2133             :   }
    2134             :   // Create the base instruction, then add the operands.
    2135         255 :   const MCInstrDesc &II = TII.get(Opc);
    2136         510 :   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
    2137             :   MachineInstrBuilder MIB =
    2138         255 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
    2139         255 :   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
    2140             : 
    2141         255 :   return true;
    2142             : }
    2143             : 
    2144         372 : bool AArch64FastISel::selectStore(const Instruction *I) {
    2145         372 :   MVT VT;
    2146         372 :   const Value *Op0 = I->getOperand(0);
    2147             :   // Verify we have a legal type before going any further.  Currently, we handle
    2148             :   // simple types that will directly fit in a register (i32/f32/i64/f64) or
    2149             :   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
    2150         372 :   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
    2151             :     return false;
    2152             : 
    2153             :   const Value *PtrV = I->getOperand(1);
    2154         356 :   if (TLI.supportSwiftError()) {
    2155             :     // Swifterror values can come from either a function parameter with
    2156             :     // swifterror attribute or an alloca with swifterror attribute.
    2157             :     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
    2158         228 :       if (Arg->hasSwiftErrorAttr())
    2159             :         return false;
    2160             :     }
    2161             : 
    2162             :     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
    2163          74 :       if (Alloca->isSwiftError())
    2164             :         return false;
    2165             :     }
    2166             :   }
    2167             : 
    2168             :   // Get the value to be stored into a register. Use the zero register directly
    2169             :   // when possible to avoid an unnecessary copy and a wasted register.
    2170             :   unsigned SrcReg = 0;
    2171             :   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
    2172          47 :     if (CI->isZero())
    2173          28 :       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
    2174             :   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
    2175           4 :     if (CF->isZero() && !CF->isNegative()) {
    2176           2 :       VT = MVT::getIntegerVT(VT.getSizeInBits());
    2177           2 :       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
    2178             :     }
    2179             :   }
    2180             : 
    2181             :   if (!SrcReg)
    2182         324 :     SrcReg = getRegForValue(Op0);
    2183             : 
    2184         354 :   if (!SrcReg)
    2185             :     return false;
    2186             : 
    2187             :   auto *SI = cast<StoreInst>(I);
    2188             : 
    2189             :   // Try to emit a STLR for seq_cst/release.
    2190         354 :   if (SI->isAtomic()) {
    2191             :     AtomicOrdering Ord = SI->getOrdering();
    2192             :     // The non-atomic instructions are sufficient for relaxed stores.
    2193          24 :     if (isReleaseOrStronger(Ord)) {
    2194             :       // The STLR addressing mode only supports a base reg; pass that directly.
    2195          16 :       unsigned AddrReg = getRegForValue(PtrV);
    2196          16 :       return emitStoreRelease(VT, SrcReg, AddrReg,
    2197          16 :                               createMachineMemOperandFor(I));
    2198             :     }
    2199             :   }
    2200             : 
    2201             :   // See if we can handle this address.
    2202             :   Address Addr;
    2203         338 :   if (!computeAddress(PtrV, Addr, Op0->getType()))
    2204             :     return false;
    2205             : 
    2206         336 :   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
    2207             :     return false;
    2208         184 :   return true;
    2209             : }
    2210             : 
    2211             : static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
    2212             :   switch (Pred) {
    2213             :   case CmpInst::FCMP_ONE:
    2214             :   case CmpInst::FCMP_UEQ:
    2215             :   default:
    2216             :     // AL is our "false" for now. The other two need more compares.
    2217             :     return AArch64CC::AL;
    2218             :   case CmpInst::ICMP_EQ:
    2219             :   case CmpInst::FCMP_OEQ:
    2220             :     return AArch64CC::EQ;
    2221             :   case CmpInst::ICMP_SGT:
    2222             :   case CmpInst::FCMP_OGT:
    2223             :     return AArch64CC::GT;
    2224             :   case CmpInst::ICMP_SGE:
    2225             :   case CmpInst::FCMP_OGE:
    2226             :     return AArch64CC::GE;
    2227             :   case CmpInst::ICMP_UGT:
    2228             :   case CmpInst::FCMP_UGT:
    2229             :     return AArch64CC::HI;
    2230             :   case CmpInst::FCMP_OLT:
    2231             :     return AArch64CC::MI;
    2232             :   case CmpInst::ICMP_ULE:
    2233             :   case CmpInst::FCMP_OLE:
    2234             :     return AArch64CC::LS;
    2235             :   case CmpInst::FCMP_ORD:
    2236             :     return AArch64CC::VC;
    2237             :   case CmpInst::FCMP_UNO:
    2238             :     return AArch64CC::VS;
    2239             :   case CmpInst::FCMP_UGE:
    2240             :     return AArch64CC::PL;
    2241             :   case CmpInst::ICMP_SLT:
    2242             :   case CmpInst::FCMP_ULT:
    2243             :     return AArch64CC::LT;
    2244             :   case CmpInst::ICMP_SLE:
    2245             :   case CmpInst::FCMP_ULE:
    2246             :     return AArch64CC::LE;
    2247             :   case CmpInst::FCMP_UNE:
    2248             :   case CmpInst::ICMP_NE:
    2249             :     return AArch64CC::NE;
    2250             :   case CmpInst::ICMP_UGE:
    2251             :     return AArch64CC::HS;
    2252             :   case CmpInst::ICMP_ULT:
    2253             :     return AArch64CC::LO;
    2254             :   }
    2255             : }
    2256             : 
    2257             : /// \brief Try to emit a combined compare-and-branch instruction.
    2258          88 : bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
    2259             :   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
    2260             :   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
    2261          88 :   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
    2262             : 
    2263             :   const Value *LHS = CI->getOperand(0);
    2264             :   const Value *RHS = CI->getOperand(1);
    2265             : 
    2266          88 :   MVT VT;
    2267          88 :   if (!isTypeSupported(LHS->getType(), VT))
    2268             :     return false;
    2269             : 
    2270          88 :   unsigned BW = VT.getSizeInBits();
    2271          88 :   if (BW > 64)
    2272             :     return false;
    2273             : 
    2274         176 :   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
    2275         176 :   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
    2276             : 
    2277             :   // Try to take advantage of fallthrough opportunities.
    2278          88 :   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
    2279             :     std::swap(TBB, FBB);
    2280          49 :     Predicate = CmpInst::getInversePredicate(Predicate);
    2281             :   }
    2282             : 
    2283             :   int TestBit = -1;
    2284             :   bool IsCmpNE;
    2285             :   switch (Predicate) {
    2286             :   default:
    2287             :     return false;
    2288          44 :   case CmpInst::ICMP_EQ:
    2289             :   case CmpInst::ICMP_NE:
    2290          44 :     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
    2291             :       std::swap(LHS, RHS);
    2292             : 
    2293          44 :     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
    2294             :       return false;
    2295             : 
    2296             :     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
    2297          13 :       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
    2298             :         const Value *AndLHS = AI->getOperand(0);
    2299             :         const Value *AndRHS = AI->getOperand(1);
    2300             : 
    2301             :         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
    2302           0 :           if (C->getValue().isPowerOf2())
    2303             :             std::swap(AndLHS, AndRHS);
    2304             : 
    2305             :         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
    2306          12 :           if (C->getValue().isPowerOf2()) {
    2307          12 :             TestBit = C->getValue().logBase2();
    2308             :             LHS = AndLHS;
    2309             :           }
    2310             :       }
    2311             : 
    2312          41 :     if (VT == MVT::i1)
    2313             :       TestBit = 0;
    2314             : 
    2315          41 :     IsCmpNE = Predicate == CmpInst::ICMP_NE;
    2316          41 :     break;
    2317          13 :   case CmpInst::ICMP_SLT:
    2318             :   case CmpInst::ICMP_SGE:
    2319          13 :     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
    2320             :       return false;
    2321             : 
    2322           6 :     TestBit = BW - 1;
    2323           6 :     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
    2324           6 :     break;
    2325          10 :   case CmpInst::ICMP_SGT:
    2326             :   case CmpInst::ICMP_SLE:
    2327          10 :     if (!isa<ConstantInt>(RHS))
    2328             :       return false;
    2329             : 
    2330           8 :     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
    2331             :       return false;
    2332             : 
    2333           8 :     TestBit = BW - 1;
    2334           8 :     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
    2335           8 :     break;
    2336             :   } // end switch
    2337             : 
    2338             :   static const unsigned OpcTable[2][2][2] = {
    2339             :     { {AArch64::CBZW,  AArch64::CBZX },
    2340             :       {AArch64::CBNZW, AArch64::CBNZX} },
    2341             :     { {AArch64::TBZW,  AArch64::TBZX },
    2342             :       {AArch64::TBNZW, AArch64::TBNZX} }
    2343             :   };
    2344             : 
    2345          55 :   bool IsBitTest = TestBit != -1;
    2346          55 :   bool Is64Bit = BW == 64;
    2347          55 :   if (TestBit < 32 && TestBit >= 0)
    2348             :     Is64Bit = false;
    2349             : 
    2350          55 :   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
    2351          55 :   const MCInstrDesc &II = TII.get(Opc);
    2352             : 
    2353          55 :   unsigned SrcReg = getRegForValue(LHS);
    2354          55 :   if (!SrcReg)
    2355             :     return false;
    2356          55 :   bool SrcIsKill = hasTrivialKill(LHS);
    2357             : 
    2358          55 :   if (BW == 64 && !Is64Bit)
    2359           4 :     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
    2360             :                                         AArch64::sub_32);
    2361             : 
    2362          55 :   if ((BW < 32) && !IsBitTest)
    2363           8 :     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
    2364             : 
    2365             :   // Emit the combined compare and branch instruction.
    2366         110 :   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
    2367             :   MachineInstrBuilder MIB =
    2368         165 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
    2369          55 :           .addReg(SrcReg, getKillRegState(SrcIsKill));
    2370          55 :   if (IsBitTest)
    2371          27 :     MIB.addImm(TestBit);
    2372             :   MIB.addMBB(TBB);
    2373             : 
    2374          55 :   finishCondBranch(BI->getParent(), TBB, FBB);
    2375          55 :   return true;
    2376             : }
    2377             : 
    2378         272 : bool AArch64FastISel::selectBranch(const Instruction *I) {
    2379             :   const BranchInst *BI = cast<BranchInst>(I);
    2380         272 :   if (BI->isUnconditional()) {
    2381         300 :     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
    2382         300 :     fastEmitBranch(MSucc, BI->getDebugLoc());
    2383         150 :     return true;
    2384             :   }
    2385             : 
    2386         244 :   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
    2387         244 :   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
    2388             : 
    2389             :   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
    2390          89 :     if (CI->hasOneUse() && isValueAvailable(CI)) {
    2391             :       // Try to optimize or fold the cmp.
    2392          88 :       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
    2393          88 :       switch (Predicate) {
    2394             :       default:
    2395             :         break;
    2396           0 :       case CmpInst::FCMP_FALSE:
    2397           0 :         fastEmitBranch(FBB, DbgLoc);
    2398           0 :         return true;
    2399           0 :       case CmpInst::FCMP_TRUE:
    2400           0 :         fastEmitBranch(TBB, DbgLoc);
    2401           0 :         return true;
    2402             :       }
    2403             : 
    2404             :       // Try to emit a combined compare-and-branch first.
    2405          88 :       if (emitCompareAndBranch(BI))
    2406             :         return true;
    2407             : 
    2408             :       // Try to take advantage of fallthrough opportunities.
    2409          33 :       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
    2410             :         std::swap(TBB, FBB);
    2411           6 :         Predicate = CmpInst::getInversePredicate(Predicate);
    2412             :       }
    2413             : 
    2414             :       // Emit the cmp.
    2415          66 :       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
    2416             :         return false;
    2417             : 
    2418             :       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
    2419             :       // instruction.
    2420             :       AArch64CC::CondCode CC = getCompareCC(Predicate);
    2421             :       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
    2422          33 :       switch (Predicate) {
    2423             :       default:
    2424             :         break;
    2425             :       case CmpInst::FCMP_UEQ:
    2426             :         ExtraCC = AArch64CC::EQ;
    2427             :         CC = AArch64CC::VS;
    2428             :         break;
    2429             :       case CmpInst::FCMP_ONE:
    2430             :         ExtraCC = AArch64CC::MI;
    2431             :         CC = AArch64CC::GT;
    2432             :         break;
    2433             :       }
    2434             :       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
    2435             : 
    2436             :       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
    2437             :       if (ExtraCC != AArch64CC::AL) {
    2438           4 :         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
    2439           2 :             .addImm(ExtraCC)
    2440             :             .addMBB(TBB);
    2441             :       }
    2442             : 
    2443             :       // Emit the branch.
    2444          66 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
    2445          33 :           .addImm(CC)
    2446             :           .addMBB(TBB);
    2447             : 
    2448          33 :       finishCondBranch(BI->getParent(), TBB, FBB);
    2449          33 :       return true;
    2450             :     }
    2451             :   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
    2452             :     uint64_t Imm = CI->getZExtValue();
    2453           2 :     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
    2454           6 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
    2455             :         .addMBB(Target);
    2456             : 
    2457             :     // Obtain the branch probability and add the target to the successor list.
    2458           2 :     if (FuncInfo.BPI) {
    2459             :       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
    2460           0 :           BI->getParent(), Target->getBasicBlock());
    2461           0 :       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
    2462             :     } else
    2463           2 :       FuncInfo.MBB->addSuccessorWithoutProb(Target);
    2464             :     return true;
    2465             :   } else {
    2466          31 :     AArch64CC::CondCode CC = AArch64CC::NE;
    2467          31 :     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
    2468             :       // Fake request the condition, otherwise the intrinsic might be completely
    2469             :       // optimized away.
    2470          28 :       unsigned CondReg = getRegForValue(BI->getCondition());
    2471          14 :       if (!CondReg)
    2472          14 :         return false;
    2473             : 
    2474             :       // Emit the branch.
    2475          28 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
    2476          14 :         .addImm(CC)
    2477             :         .addMBB(TBB);
    2478             : 
    2479          14 :       finishCondBranch(BI->getParent(), TBB, FBB);
    2480          14 :       return true;
    2481             :     }
    2482             :   }
    2483             : 
    2484          36 :   unsigned CondReg = getRegForValue(BI->getCondition());
    2485          18 :   if (CondReg == 0)
    2486             :     return false;
    2487          18 :   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
    2488             : 
    2489             :   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
    2490             :   unsigned Opcode = AArch64::TBNZW;
    2491          18 :   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
    2492             :     std::swap(TBB, FBB);
    2493             :     Opcode = AArch64::TBZW;
    2494             :   }
    2495             : 
    2496          18 :   const MCInstrDesc &II = TII.get(Opcode);
    2497             :   unsigned ConstrainedCondReg
    2498          36 :     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
    2499          36 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    2500          18 :       .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
    2501             :       .addImm(0)
    2502             :       .addMBB(TBB);
    2503             : 
    2504          18 :   finishCondBranch(BI->getParent(), TBB, FBB);
    2505          18 :   return true;
    2506             : }
    2507             : 
    2508           0 : bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
    2509             :   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
    2510           0 :   unsigned AddrReg = getRegForValue(BI->getOperand(0));
    2511           0 :   if (AddrReg == 0)
    2512             :     return false;
    2513             : 
    2514             :   // Emit the indirect branch.
    2515           0 :   const MCInstrDesc &II = TII.get(AArch64::BR);
    2516           0 :   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
    2517           0 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
    2518             : 
    2519             :   // Make sure the CFG is up-to-date.
    2520           0 :   for (auto *Succ : BI->successors())
    2521           0 :     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
    2522             : 
    2523           0 :   return true;
    2524             : }
    2525             : 
    2526          57 : bool AArch64FastISel::selectCmp(const Instruction *I) {
    2527             :   const CmpInst *CI = cast<CmpInst>(I);
    2528             : 
    2529             :   // Vectors of i1 are weird: bail out.
    2530         114 :   if (CI->getType()->isVectorTy())
    2531             :     return false;
    2532             : 
    2533             :   // Try to optimize or fold the cmp.
    2534          51 :   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
    2535             :   unsigned ResultReg = 0;
    2536          51 :   switch (Predicate) {
    2537             :   default:
    2538             :     break;
    2539           1 :   case CmpInst::FCMP_FALSE:
    2540           1 :     ResultReg = createResultReg(&AArch64::GPR32RegClass);
    2541           2 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    2542           2 :             TII.get(TargetOpcode::COPY), ResultReg)
    2543           1 :         .addReg(AArch64::WZR, getKillRegState(true));
    2544           1 :     break;
    2545             :   case CmpInst::FCMP_TRUE:
    2546           1 :     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
    2547           1 :     break;
    2548             :   }
    2549             : 
    2550           2 :   if (ResultReg) {
    2551           2 :     updateValueMap(I, ResultReg);
    2552           2 :     return true;
    2553             :   }
    2554             : 
    2555             :   // Emit the cmp.
    2556          98 :   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
    2557             :     return false;
    2558             : 
    2559          49 :   ResultReg = createResultReg(&AArch64::GPR32RegClass);
    2560             : 
    2561             :   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
    2562             :   // condition codes are inverted, because they are used by CSINC.
    2563             :   static unsigned CondCodeTable[2][2] = {
    2564             :     { AArch64CC::NE, AArch64CC::VC },
    2565             :     { AArch64CC::PL, AArch64CC::LE }
    2566             :   };
    2567             :   unsigned *CondCodes = nullptr;
    2568          49 :   switch (Predicate) {
    2569             :   default:
    2570             :     break;
    2571             :   case CmpInst::FCMP_UEQ:
    2572             :     CondCodes = &CondCodeTable[0][0];
    2573             :     break;
    2574           1 :   case CmpInst::FCMP_ONE:
    2575             :     CondCodes = &CondCodeTable[1][0];
    2576           1 :     break;
    2577             :   }
    2578             : 
    2579             :   if (CondCodes) {
    2580           2 :     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
    2581           6 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
    2582           4 :             TmpReg1)
    2583           2 :         .addReg(AArch64::WZR, getKillRegState(true))
    2584           2 :         .addReg(AArch64::WZR, getKillRegState(true))
    2585           2 :         .addImm(CondCodes[0]);
    2586           6 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
    2587           2 :             ResultReg)
    2588           2 :         .addReg(TmpReg1, getKillRegState(true))
    2589           2 :         .addReg(AArch64::WZR, getKillRegState(true))
    2590           2 :         .addImm(CondCodes[1]);
    2591             : 
    2592           2 :     updateValueMap(I, ResultReg);
    2593           2 :     return true;
    2594             :   }
    2595             : 
    2596             :   // Now set a register based on the comparison.
    2597             :   AArch64CC::CondCode CC = getCompareCC(Predicate);
    2598             :   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
    2599             :   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
    2600         141 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
    2601          94 :           ResultReg)
    2602          47 :       .addReg(AArch64::WZR, getKillRegState(true))
    2603          47 :       .addReg(AArch64::WZR, getKillRegState(true))
    2604          47 :       .addImm(invertedCC);
    2605             : 
    2606          47 :   updateValueMap(I, ResultReg);
    2607          47 :   return true;
    2608             : }
    2609             : 
    2610             : /// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
    2611             : /// value.
    2612          53 : bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
    2613          53 :   if (!SI->getType()->isIntegerTy(1))
    2614             :     return false;
    2615             : 
    2616             :   const Value *Src1Val, *Src2Val;
    2617             :   unsigned Opc = 0;
    2618             :   bool NeedExtraOp = false;
    2619             :   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
    2620           3 :     if (CI->isOne()) {
    2621             :       Src1Val = SI->getCondition();
    2622             :       Src2Val = SI->getFalseValue();
    2623             :       Opc = AArch64::ORRWrr;
    2624             :     } else {
    2625             :       assert(CI->isZero());
    2626             :       Src1Val = SI->getFalseValue();
    2627             :       Src2Val = SI->getCondition();
    2628             :       Opc = AArch64::BICWrr;
    2629             :     }
    2630             :   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
    2631           2 :     if (CI->isOne()) {
    2632             :       Src1Val = SI->getCondition();
    2633             :       Src2Val = SI->getTrueValue();
    2634             :       Opc = AArch64::ORRWrr;
    2635             :       NeedExtraOp = true;
    2636             :     } else {
    2637             :       assert(CI->isZero());
    2638             :       Src1Val = SI->getCondition();
    2639             :       Src2Val = SI->getTrueValue();
    2640             :       Opc = AArch64::ANDWrr;
    2641             :     }
    2642             :   }
    2643             : 
    2644           6 :   if (!Opc)
    2645             :     return false;
    2646             : 
    2647           5 :   unsigned Src1Reg = getRegForValue(Src1Val);
    2648           5 :   if (!Src1Reg)
    2649             :     return false;
    2650           5 :   bool Src1IsKill = hasTrivialKill(Src1Val);
    2651             : 
    2652           5 :   unsigned Src2Reg = getRegForValue(Src2Val);
    2653           5 :   if (!Src2Reg)
    2654             :     return false;
    2655           5 :   bool Src2IsKill = hasTrivialKill(Src2Val);
    2656             : 
    2657           5 :   if (NeedExtraOp) {
    2658           2 :     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
    2659             :     Src1IsKill = true;
    2660             :   }
    2661           5 :   unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
    2662           5 :                                        Src1IsKill, Src2Reg, Src2IsKill);
    2663           5 :   updateValueMap(SI, ResultReg);
    2664           5 :   return true;
    2665             : }
    2666             : 
    2667          53 : bool AArch64FastISel::selectSelect(const Instruction *I) {
    2668             :   assert(isa<SelectInst>(I) && "Expected a select instruction.");
    2669          53 :   MVT VT;
    2670          53 :   if (!isTypeSupported(I->getType(), VT))
    2671             :     return false;
    2672             : 
    2673             :   unsigned Opc;
    2674             :   const TargetRegisterClass *RC;
    2675          53 :   switch (VT.SimpleTy) {
    2676             :   default:
    2677             :     return false;
    2678             :   case MVT::i1:
    2679             :   case MVT::i8:
    2680             :   case MVT::i16:
    2681             :   case MVT::i32:
    2682             :     Opc = AArch64::CSELWr;
    2683             :     RC = &AArch64::GPR32RegClass;
    2684             :     break;
    2685           8 :   case MVT::i64:
    2686             :     Opc = AArch64::CSELXr;
    2687             :     RC = &AArch64::GPR64RegClass;
    2688           8 :     break;
    2689          26 :   case MVT::f32:
    2690             :     Opc = AArch64::FCSELSrrr;
    2691             :     RC = &AArch64::FPR32RegClass;
    2692          26 :     break;
    2693           1 :   case MVT::f64:
    2694             :     Opc = AArch64::FCSELDrrr;
    2695             :     RC = &AArch64::FPR64RegClass;
    2696           1 :     break;
    2697             :   }
    2698             : 
    2699             :   const SelectInst *SI = cast<SelectInst>(I);
    2700             :   const Value *Cond = SI->getCondition();
    2701          53 :   AArch64CC::CondCode CC = AArch64CC::NE;
    2702             :   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
    2703             : 
    2704          53 :   if (optimizeSelect(SI))
    2705             :     return true;
    2706             : 
    2707             :   // Try to pickup the flags, so we don't have to emit another compare.
    2708          48 :   if (foldXALUIntrinsic(CC, I, Cond)) {
    2709             :     // Fake request the condition to force emission of the XALU intrinsic.
    2710          12 :     unsigned CondReg = getRegForValue(Cond);
    2711          12 :     if (!CondReg)
    2712             :       return false;
    2713          55 :   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
    2714          27 :              isValueAvailable(Cond)) {
    2715             :     const auto *Cmp = cast<CmpInst>(Cond);
    2716             :     // Try to optimize or fold the cmp.
    2717          27 :     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
    2718             :     const Value *FoldSelect = nullptr;
    2719          27 :     switch (Predicate) {
    2720             :     default:
    2721             :       break;
    2722             :     case CmpInst::FCMP_FALSE:
    2723             :       FoldSelect = SI->getFalseValue();
    2724           1 :       break;
    2725             :     case CmpInst::FCMP_TRUE:
    2726             :       FoldSelect = SI->getTrueValue();
    2727           1 :       break;
    2728             :     }
    2729             : 
    2730           2 :     if (FoldSelect) {
    2731           2 :       unsigned SrcReg = getRegForValue(FoldSelect);
    2732           2 :       if (!SrcReg)
    2733             :         return false;
    2734           2 :       unsigned UseReg = lookUpRegForValue(SI);
    2735           2 :       if (UseReg)
    2736           2 :         MRI.clearKillFlags(UseReg);
    2737             : 
    2738           2 :       updateValueMap(I, SrcReg);
    2739           2 :       return true;
    2740             :     }
    2741             : 
    2742             :     // Emit the cmp.
    2743          50 :     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
    2744             :       return false;
    2745             : 
    2746             :     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
    2747          25 :     CC = getCompareCC(Predicate);
    2748          25 :     switch (Predicate) {
    2749             :     default:
    2750             :       break;
    2751           1 :     case CmpInst::FCMP_UEQ:
    2752             :       ExtraCC = AArch64CC::EQ;
    2753           1 :       CC = AArch64CC::VS;
    2754           1 :       break;
    2755           1 :     case CmpInst::FCMP_ONE:
    2756             :       ExtraCC = AArch64CC::MI;
    2757           1 :       CC = AArch64CC::GT;
    2758           1 :       break;
    2759             :     }
    2760             :     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
    2761             :   } else {
    2762           9 :     unsigned CondReg = getRegForValue(Cond);
    2763           9 :     if (!CondReg)
    2764             :       return false;
    2765           9 :     bool CondIsKill = hasTrivialKill(Cond);
    2766             : 
    2767           9 :     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
    2768           9 :     CondReg = constrainOperandRegClass(II, CondReg, 1);
    2769             : 
    2770             :     // Emit a TST instruction (ANDS wzr, reg, #imm).
    2771          18 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
    2772           9 :             AArch64::WZR)
    2773           9 :         .addReg(CondReg, getKillRegState(CondIsKill))
    2774           9 :         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
    2775             :   }
    2776             : 
    2777          92 :   unsigned Src1Reg = getRegForValue(SI->getTrueValue());
    2778          46 :   bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
    2779             : 
    2780          46 :   unsigned Src2Reg = getRegForValue(SI->getFalseValue());
    2781          46 :   bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
    2782             : 
    2783          46 :   if (!Src1Reg || !Src2Reg)
    2784             :     return false;
    2785             : 
    2786          46 :   if (ExtraCC != AArch64CC::AL) {
    2787           2 :     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
    2788             :                                Src2IsKill, ExtraCC);
    2789             :     Src2IsKill = true;
    2790             :   }
    2791          46 :   unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
    2792          46 :                                         Src2IsKill, CC);
    2793          46 :   updateValueMap(I, ResultReg);
    2794          46 :   return true;
    2795             : }
    2796             : 
    2797           5 : bool AArch64FastISel::selectFPExt(const Instruction *I) {
    2798           5 :   Value *V = I->getOperand(0);
    2799          13 :   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
    2800             :     return false;
    2801             : 
    2802           3 :   unsigned Op = getRegForValue(V);
    2803           3 :   if (Op == 0)
    2804             :     return false;
    2805             : 
    2806           3 :   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
    2807           9 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
    2808           9 :           ResultReg).addReg(Op);
    2809           3 :   updateValueMap(I, ResultReg);
    2810           3 :   return true;
    2811             : }
    2812             : 
    2813           2 : bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
    2814           2 :   Value *V = I->getOperand(0);
    2815           5 :   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
    2816             :     return false;
    2817             : 
    2818           1 :   unsigned Op = getRegForValue(V);
    2819           1 :   if (Op == 0)
    2820             :     return false;
    2821             : 
    2822           1 :   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
    2823           3 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
    2824           3 :           ResultReg).addReg(Op);
    2825           1 :   updateValueMap(I, ResultReg);
    2826           1 :   return true;
    2827             : }
    2828             : 
    2829             : // FPToUI and FPToSI
    2830           7 : bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
    2831           7 :   MVT DestVT;
    2832          14 :   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
    2833             :     return false;
    2834             : 
    2835          12 :   unsigned SrcReg = getRegForValue(I->getOperand(0));
    2836           6 :   if (SrcReg == 0)
    2837             :     return false;
    2838             : 
    2839          12 :   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
    2840             :   if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
    2841             :     return false;
    2842             : 
    2843             :   unsigned Opc;
    2844             :   if (SrcVT == MVT::f64) {
    2845           1 :     if (Signed)
    2846           0 :       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
    2847             :     else
    2848           1 :       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
    2849             :   } else {
    2850           1 :     if (Signed)
    2851           0 :       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
    2852             :     else
    2853           1 :       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
    2854             :   }
    2855           2 :   unsigned ResultReg = createResultReg(
    2856           2 :       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
    2857           6 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
    2858           2 :       .addReg(SrcReg);
    2859           2 :   updateValueMap(I, ResultReg);
    2860           2 :   return true;
    2861             : }
    2862             : 
    2863          22 : bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
    2864          22 :   MVT DestVT;
    2865          44 :   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
    2866             :     return false;
    2867             :   // Let regular ISEL handle FP16
    2868          20 :   if (DestVT == MVT::f16)
    2869             :     return false;
    2870             : 
    2871             :   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
    2872             :          "Unexpected value type.");
    2873             : 
    2874          20 :   unsigned SrcReg = getRegForValue(I->getOperand(0));
    2875          10 :   if (!SrcReg)
    2876             :     return false;
    2877          10 :   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
    2878             : 
    2879          20 :   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
    2880             : 
    2881             :   // Handle sign-extension.
    2882             :   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
    2883           6 :     SrcReg =
    2884          12 :         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
    2885           6 :     if (!SrcReg)
    2886             :       return false;
    2887             :     SrcIsKill = true;
    2888             :   }
    2889             : 
    2890             :   unsigned Opc;
    2891             :   if (SrcVT == MVT::i64) {
    2892           2 :     if (Signed)
    2893           0 :       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
    2894             :     else
    2895           2 :       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
    2896             :   } else {
    2897           8 :     if (Signed)
    2898           3 :       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
    2899             :     else
    2900           5 :       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
    2901             :   }
    2902             : 
    2903          10 :   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
    2904          10 :                                       SrcIsKill);
    2905          10 :   updateValueMap(I, ResultReg);
    2906          10 :   return true;
    2907             : }
    2908             : 
    2909        1193 : bool AArch64FastISel::fastLowerArguments() {
    2910        1193 :   if (!FuncInfo.CanLowerReturn)
    2911             :     return false;
    2912             : 
    2913        1193 :   const Function *F = FuncInfo.Fn;
    2914        1193 :   if (F->isVarArg())
    2915             :     return false;
    2916             : 
    2917             :   CallingConv::ID CC = F->getCallingConv();
    2918        1192 :   if (CC != CallingConv::C && CC != CallingConv::Swift)
    2919             :     return false;
    2920             : 
    2921             :   // Only handle simple cases of up to 8 GPR and FPR each.
    2922             :   unsigned GPRCnt = 0;
    2923             :   unsigned FPRCnt = 0;
    2924        3092 :   for (auto const &Arg : F->args()) {
    2925        4026 :     if (Arg.hasAttribute(Attribute::ByVal) ||
    2926        4026 :         Arg.hasAttribute(Attribute::InReg) ||
    2927        4024 :         Arg.hasAttribute(Attribute::StructRet) ||
    2928        4015 :         Arg.hasAttribute(Attribute::SwiftSelf) ||
    2929        6014 :         Arg.hasAttribute(Attribute::SwiftError) ||
    2930        1997 :         Arg.hasAttribute(Attribute::Nest))
    2931         107 :       return false;
    2932             : 
    2933        1997 :     Type *ArgTy = Arg.getType();
    2934        1997 :     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
    2935             :       return false;
    2936             : 
    2937        1993 :     EVT ArgVT = TLI.getValueType(DL, ArgTy);
    2938        1993 :     if (!ArgVT.isSimple())
    2939             :       return false;
    2940             : 
    2941             :     MVT VT = ArgVT.getSimpleVT().SimpleTy;
    2942        1993 :     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
    2943             :       return false;
    2944             : 
    2945        2110 :     if (VT.isVector() &&
    2946         234 :         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
    2947             :       return false;
    2948             : 
    2949        1922 :     if (VT >= MVT::i1 && VT <= MVT::i64)
    2950        1607 :       ++GPRCnt;
    2951         665 :     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
    2952          37 :              VT.is128BitVector())
    2953         305 :       ++FPRCnt;
    2954             :     else
    2955             :       return false;
    2956             : 
    2957        1912 :     if (GPRCnt > 8 || FPRCnt > 8)
    2958             :       return false;
    2959             :   }
    2960             : 
    2961             :   static const MCPhysReg Registers[6][8] = {
    2962             :     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
    2963             :       AArch64::W5, AArch64::W6, AArch64::W7 },
    2964             :     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
    2965             :       AArch64::X5, AArch64::X6, AArch64::X7 },
    2966             :     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
    2967             :       AArch64::H5, AArch64::H6, AArch64::H7 },
    2968             :     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
    2969             :       AArch64::S5, AArch64::S6, AArch64::S7 },
    2970             :     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
    2971             :       AArch64::D5, AArch64::D6, AArch64::D7 },
    2972             :     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
    2973             :       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
    2974             :   };
    2975             : 
    2976             :   unsigned GPRIdx = 0;
    2977             :   unsigned FPRIdx = 0;
    2978        2916 :   for (auto const &Arg : F->args()) {
    2979        3674 :     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
    2980             :     unsigned SrcReg;
    2981             :     const TargetRegisterClass *RC;
    2982        1837 :     if (VT >= MVT::i1 && VT <= MVT::i32) {
    2983         647 :       SrcReg = Registers[0][GPRIdx++];
    2984             :       RC = &AArch64::GPR32RegClass;
    2985             :       VT = MVT::i32;
    2986        1190 :     } else if (VT == MVT::i64) {
    2987         894 :       SrcReg = Registers[1][GPRIdx++];
    2988             :       RC = &AArch64::GPR64RegClass;
    2989         296 :     } else if (VT == MVT::f16) {
    2990           2 :       SrcReg = Registers[2][FPRIdx++];
    2991             :       RC = &AArch64::FPR16RegClass;
    2992         294 :     } else if (VT ==  MVT::f32) {
    2993         188 :       SrcReg = Registers[3][FPRIdx++];
    2994             :       RC = &AArch64::FPR32RegClass;
    2995         106 :     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
    2996          79 :       SrcReg = Registers[4][FPRIdx++];
    2997             :       RC = &AArch64::FPR64RegClass;
    2998          27 :     } else if (VT.is128BitVector()) {
    2999          27 :       SrcReg = Registers[5][FPRIdx++];
    3000             :       RC = &AArch64::FPR128RegClass;
    3001             :     } else
    3002           0 :       llvm_unreachable("Unexpected value type.");
    3003             : 
    3004        1837 :     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
    3005             :     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
    3006             :     // Without this, EmitLiveInCopies may eliminate the livein if its only
    3007             :     // use is a bitcast (which isn't turned into an instruction).
    3008        1837 :     unsigned ResultReg = createResultReg(RC);
    3009        3674 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3010        3674 :             TII.get(TargetOpcode::COPY), ResultReg)
    3011        1837 :         .addReg(DstReg, getKillRegState(true));
    3012        1837 :     updateValueMap(&Arg, ResultReg);
    3013             :   }
    3014             :   return true;
    3015             : }
    3016             : 
    3017         123 : bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
    3018             :                                       SmallVectorImpl<MVT> &OutVTs,
    3019             :                                       unsigned &NumBytes) {
    3020         123 :   CallingConv::ID CC = CLI.CallConv;
    3021             :   SmallVector<CCValAssign, 16> ArgLocs;
    3022         246 :   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
    3023         123 :   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
    3024             : 
    3025             :   // Get a count of how many bytes are to be pushed on the stack.
    3026         123 :   NumBytes = CCInfo.getNextStackOffset();
    3027             : 
    3028             :   // Issue CALLSEQ_START
    3029         123 :   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
    3030         246 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
    3031         123 :     .addImm(NumBytes).addImm(0);
    3032             : 
    3033             :   // Process the args.
    3034        2755 :   for (CCValAssign &VA : ArgLocs) {
    3035        2640 :     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
    3036        1320 :     MVT ArgVT = OutVTs[VA.getValNo()];
    3037             : 
    3038        1320 :     unsigned ArgReg = getRegForValue(ArgVal);
    3039        1320 :     if (!ArgReg)
    3040           4 :       return false;
    3041             : 
    3042             :     // Handle arg promotion: SExt, ZExt, AExt.
    3043        1318 :     switch (VA.getLocInfo()) {
    3044             :     case CCValAssign::Full:
    3045             :       break;
    3046          15 :     case CCValAssign::SExt: {
    3047          15 :       MVT DestVT = VA.getLocVT();
    3048          15 :       MVT SrcVT = ArgVT;
    3049          15 :       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
    3050          15 :       if (!ArgReg)
    3051           0 :         return false;
    3052          15 :       break;
    3053             :     }
    3054          84 :     case CCValAssign::AExt:
    3055             :     // Intentional fall-through.
    3056             :     case CCValAssign::ZExt: {
    3057          84 :       MVT DestVT = VA.getLocVT();
    3058          84 :       MVT SrcVT = ArgVT;
    3059          84 :       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
    3060          84 :       if (!ArgReg)
    3061           0 :         return false;
    3062          84 :       break;
    3063             :     }
    3064           0 :     default:
    3065           0 :       llvm_unreachable("Unknown arg promotion!");
    3066             :     }
    3067             : 
    3068             :     // Now copy/store arg to correct locations.
    3069        1572 :     if (VA.isRegLoc() && !VA.needsCustom()) {
    3070         508 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3071         508 :               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
    3072         254 :       CLI.OutRegs.push_back(VA.getLocReg());
    3073        1064 :     } else if (VA.needsCustom()) {
    3074             :       // FIXME: Handle custom args.
    3075             :       return false;
    3076             :     } else {
    3077             :       assert(VA.isMemLoc() && "Assuming store on stack.");
    3078             : 
    3079             :       // Don't emit stores for undef values.
    3080        1064 :       if (isa<UndefValue>(ArgVal))
    3081        1032 :         continue;
    3082             : 
    3083             :       // Need to store on the stack.
    3084          32 :       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
    3085             : 
    3086             :       unsigned BEAlign = 0;
    3087          32 :       if (ArgSize < 8 && !Subtarget->isLittleEndian())
    3088           2 :         BEAlign = 8 - ArgSize;
    3089             : 
    3090             :       Address Addr;
    3091             :       Addr.setKind(Address::RegBase);
    3092             :       Addr.setReg(AArch64::SP);
    3093          32 :       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
    3094             : 
    3095          32 :       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
    3096         128 :       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
    3097          32 :           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
    3098          32 :           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
    3099             : 
    3100          32 :       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
    3101           2 :         return false;
    3102             :     }
    3103             :   }
    3104             :   return true;
    3105             : }
    3106             : 
    3107         119 : bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
    3108             :                                  unsigned NumBytes) {
    3109         119 :   CallingConv::ID CC = CLI.CallConv;
    3110             : 
    3111             :   // Issue CALLSEQ_END
    3112         119 :   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
    3113         238 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
    3114         119 :     .addImm(NumBytes).addImm(0);
    3115             : 
    3116             :   // Now the return value.
    3117         119 :   if (RetVT != MVT::isVoid) {
    3118             :     SmallVector<CCValAssign, 16> RVLocs;
    3119         120 :     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
    3120          65 :     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
    3121             : 
    3122             :     // Only handle a single return value.
    3123          65 :     if (RVLocs.size() != 1)
    3124          10 :       return false;
    3125             : 
    3126             :     // Copy all of the result registers out of their specified physreg.
    3127             :     MVT CopyVT = RVLocs[0].getValVT();
    3128             : 
    3129             :     // TODO: Handle big-endian results
    3130          65 :     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
    3131             :       return false;
    3132             : 
    3133          55 :     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
    3134         110 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3135          55 :             TII.get(TargetOpcode::COPY), ResultReg)
    3136          55 :         .addReg(RVLocs[0].getLocReg());
    3137         110 :     CLI.InRegs.push_back(RVLocs[0].getLocReg());
    3138             : 
    3139          55 :     CLI.ResultReg = ResultReg;
    3140          55 :     CLI.NumResultRegs = 1;
    3141             :   }
    3142             : 
    3143             :   return true;
    3144             : }
    3145             : 
    3146         232 : bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
    3147         232 :   CallingConv::ID CC  = CLI.CallConv;
    3148         232 :   bool IsTailCall     = CLI.IsTailCall;
    3149         232 :   bool IsVarArg       = CLI.IsVarArg;
    3150         232 :   const Value *Callee = CLI.Callee;
    3151         232 :   MCSymbol *Symbol = CLI.Symbol;
    3152             : 
    3153         232 :   if (!Callee && !Symbol)
    3154             :     return false;
    3155             : 
    3156             :   // Allow SelectionDAG isel to handle tail calls.
    3157         232 :   if (IsTailCall)
    3158             :     return false;
    3159             : 
    3160         208 :   CodeModel::Model CM = TM.getCodeModel();
    3161             :   // Only support the small-addressing and large code models.
    3162         208 :   if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
    3163             :     return false;
    3164             : 
    3165             :   // FIXME: Add large code model support for ELF.
    3166         224 :   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
    3167             :     return false;
    3168             : 
    3169             :   // Let SDISel handle vararg functions.
    3170         208 :   if (IsVarArg)
    3171             :     return false;
    3172             : 
    3173             :   // FIXME: Only handle *simple* calls for now.
    3174         208 :   MVT RetVT;
    3175         416 :   if (CLI.RetTy->isVoidTy())
    3176          58 :     RetVT = MVT::isVoid;
    3177         150 :   else if (!isTypeLegal(CLI.RetTy, RetVT))
    3178             :     return false;
    3179             : 
    3180        2983 :   for (auto Flag : CLI.OutFlags)
    3181        5592 :     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
    3182        2792 :         Flag.isSwiftSelf() || Flag.isSwiftError())
    3183           5 :       return false;
    3184             : 
    3185             :   // Set up the argument vectors.
    3186             :   SmallVector<MVT, 16> OutVTs;
    3187             :   OutVTs.reserve(CLI.OutVals.size());
    3188             : 
    3189        2838 :   for (auto *Val : CLI.OutVals) {
    3190        1392 :     MVT VT;
    3191        1500 :     if (!isTypeLegal(Val->getType(), VT) &&
    3192          90 :         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
    3193          69 :       return false;
    3194             : 
    3195             :     // We don't handle vector parameters yet.
    3196        1383 :     if (VT.isVector() || VT.getSizeInBits() > 64)
    3197             :       return false;
    3198             : 
    3199        1323 :     OutVTs.push_back(VT);
    3200             :   }
    3201             : 
    3202             :   Address Addr;
    3203         123 :   if (Callee && !computeCallAddress(Callee, Addr))
    3204             :     return false;
    3205             : 
    3206             :   // Handle the arguments now that we've gotten them.
    3207             :   unsigned NumBytes;
    3208         123 :   if (!processCallArgs(CLI, OutVTs, NumBytes))
    3209             :     return false;
    3210             : 
    3211             :   // Issue the call.
    3212         119 :   MachineInstrBuilder MIB;
    3213         119 :   if (Subtarget->useSmallAddressing()) {
    3214         103 :     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
    3215         103 :     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
    3216         103 :     if (Symbol)
    3217             :       MIB.addSym(Symbol, 0);
    3218          88 :     else if (Addr.getGlobalValue())
    3219             :       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
    3220          19 :     else if (Addr.getReg()) {
    3221          19 :       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
    3222          19 :       MIB.addReg(Reg);
    3223             :     } else
    3224             :       return false;
    3225             :   } else {
    3226             :     unsigned CallReg = 0;
    3227          16 :     if (Symbol) {
    3228           8 :       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
    3229          24 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
    3230          16 :               ADRPReg)
    3231             :           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
    3232             : 
    3233           8 :       CallReg = createResultReg(&AArch64::GPR64RegClass);
    3234          16 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3235           8 :               TII.get(AArch64::LDRXui), CallReg)
    3236           8 :           .addReg(ADRPReg)
    3237             :           .addSym(Symbol,
    3238             :                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
    3239           8 :     } else if (Addr.getGlobalValue())
    3240           7 :       CallReg = materializeGV(Addr.getGlobalValue());
    3241           1 :     else if (Addr.getReg())
    3242             :       CallReg = Addr.getReg();
    3243             : 
    3244          16 :     if (!CallReg)
    3245             :       return false;
    3246             : 
    3247          16 :     const MCInstrDesc &II = TII.get(AArch64::BLR);
    3248          16 :     CallReg = constrainOperandRegClass(II, CallReg, 0);
    3249          16 :     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
    3250             :   }
    3251             : 
    3252             :   // Add implicit physical register uses to the call.
    3253         593 :   for (auto Reg : CLI.OutRegs)
    3254         237 :     MIB.addReg(Reg, RegState::Implicit);
    3255             : 
    3256             :   // Add a register mask with the call-preserved registers.
    3257             :   // Proper defs for return values will be added by setPhysRegsDeadExcept().
    3258         119 :   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
    3259             : 
    3260         119 :   CLI.Call = MIB;
    3261             : 
    3262             :   // Finish off the call including any return values.
    3263         119 :   return finishCall(CLI, RetVT, NumBytes);
    3264             : }
    3265             : 
    3266             : bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
    3267          31 :   if (Alignment)
    3268          29 :     return Len / Alignment <= 4;
    3269             :   else
    3270           2 :     return Len < 32;
    3271             : }
    3272             : 
    3273          13 : bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
    3274             :                                          uint64_t Len, unsigned Alignment) {
    3275             :   // Make sure we don't bloat code by inlining very large memcpy's.
    3276          13 :   if (!isMemCpySmall(Len, Alignment))
    3277             :     return false;
    3278             : 
    3279             :   int64_t UnscaledOffset = 0;
    3280             :   Address OrigDest = Dest;
    3281             :   Address OrigSrc = Src;
    3282             : 
    3283          95 :   while (Len) {
    3284          41 :     MVT VT;
    3285          41 :     if (!Alignment || Alignment >= 8) {
    3286          30 :       if (Len >= 8)
    3287          27 :         VT = MVT::i64;
    3288           3 :       else if (Len >= 4)
    3289           0 :         VT = MVT::i32;
    3290           3 :       else if (Len >= 2)
    3291           0 :         VT = MVT::i16;
    3292             :       else {
    3293           3 :         VT = MVT::i8;
    3294             :       }
    3295             :     } else {
    3296             :       // Bound based on alignment.
    3297          11 :       if (Len >= 4 && Alignment == 4)
    3298           2 :         VT = MVT::i32;
    3299           9 :       else if (Len >= 2 && Alignment == 2)
    3300           3 :         VT = MVT::i16;
    3301             :       else {
    3302           6 :         VT = MVT::i8;
    3303             :       }
    3304             :     }
    3305             : 
    3306          41 :     unsigned ResultReg = emitLoad(VT, VT, Src);
    3307          41 :     if (!ResultReg)
    3308           0 :       return false;
    3309             : 
    3310          41 :     if (!emitStore(VT, ResultReg, Dest))
    3311             :       return false;
    3312             : 
    3313          41 :     int64_t Size = VT.getSizeInBits() / 8;
    3314          41 :     Len -= Size;
    3315          41 :     UnscaledOffset += Size;
    3316             : 
    3317             :     // We need to recompute the unscaled offset for each iteration.
    3318          41 :     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
    3319          41 :     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
    3320             :   }
    3321             : 
    3322             :   return true;
    3323             : }
    3324             : 
    3325             : /// \brief Check if it is possible to fold the condition from the XALU intrinsic
    3326             : /// into the user. The condition code will only be updated on success.
    3327          79 : bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
    3328             :                                         const Instruction *I,
    3329             :                                         const Value *Cond) {
    3330             :   if (!isa<ExtractValueInst>(Cond))
    3331             :     return false;
    3332             : 
    3333             :   const auto *EV = cast<ExtractValueInst>(Cond);
    3334             :   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
    3335             :     return false;
    3336             : 
    3337             :   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
    3338          26 :   MVT RetVT;
    3339             :   const Function *Callee = II->getCalledFunction();
    3340             :   Type *RetTy =
    3341          26 :   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
    3342          26 :   if (!isTypeLegal(RetTy, RetVT))
    3343             :     return false;
    3344             : 
    3345          26 :   if (RetVT != MVT::i32 && RetVT != MVT::i64)
    3346             :     return false;
    3347             : 
    3348          26 :   const Value *LHS = II->getArgOperand(0);
    3349             :   const Value *RHS = II->getArgOperand(1);
    3350             : 
    3351             :   // Canonicalize immediate to the RHS.
    3352          26 :   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
    3353             :       isCommutativeIntrinsic(II))
    3354             :     std::swap(LHS, RHS);
    3355             : 
    3356             :   // Simplify multiplies.
    3357             :   Intrinsic::ID IID = II->getIntrinsicID();
    3358          26 :   switch (IID) {
    3359             :   default:
    3360             :     break;
    3361           5 :   case Intrinsic::smul_with_overflow:
    3362             :     if (const auto *C = dyn_cast<ConstantInt>(RHS))
    3363           1 :       if (C->getValue() == 2)
    3364             :         IID = Intrinsic::sadd_with_overflow;
    3365             :     break;
    3366           5 :   case Intrinsic::umul_with_overflow:
    3367             :     if (const auto *C = dyn_cast<ConstantInt>(RHS))
    3368           1 :       if (C->getValue() == 2)
    3369             :         IID = Intrinsic::uadd_with_overflow;
    3370             :     break;
    3371             :   }
    3372             : 
    3373             :   AArch64CC::CondCode TmpCC;
    3374          24 :   switch (IID) {
    3375             :   default:
    3376             :     return false;
    3377             :   case Intrinsic::sadd_with_overflow:
    3378             :   case Intrinsic::ssub_with_overflow:
    3379             :     TmpCC = AArch64CC::VS;
    3380             :     break;
    3381             :   case Intrinsic::uadd_with_overflow:
    3382             :     TmpCC = AArch64CC::HS;
    3383             :     break;
    3384           4 :   case Intrinsic::usub_with_overflow:
    3385             :     TmpCC = AArch64CC::LO;
    3386           4 :     break;
    3387           8 :   case Intrinsic::smul_with_overflow:
    3388             :   case Intrinsic::umul_with_overflow:
    3389             :     TmpCC = AArch64CC::NE;
    3390           8 :     break;
    3391             :   }
    3392             : 
    3393             :   // Check if both instructions are in the same basic block.
    3394          26 :   if (!isValueAvailable(II))
    3395             :     return false;
    3396             : 
    3397             :   // Make sure nothing is in the way
    3398             :   BasicBlock::const_iterator Start(I);
    3399             :   BasicBlock::const_iterator End(II);
    3400          66 :   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
    3401             :     // We only expect extractvalue instructions between the intrinsic and the
    3402             :     // instruction to be selected.
    3403          40 :     if (!isa<ExtractValueInst>(Itr))
    3404             :       return false;
    3405             : 
    3406             :     // Check that the extractvalue operand comes from the intrinsic.
    3407             :     const auto *EVI = cast<ExtractValueInst>(Itr);
    3408          40 :     if (EVI->getAggregateOperand() != II)
    3409             :       return false;
    3410             :   }
    3411             : 
    3412          26 :   CC = TmpCC;
    3413          26 :   return true;
    3414             : }
    3415             : 
    3416          94 : bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
    3417             :   // FIXME: Handle more intrinsics.
    3418          94 :   switch (II->getIntrinsicID()) {
    3419             :   default: return false;
    3420           2 :   case Intrinsic::frameaddress: {
    3421           2 :     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
    3422             :     MFI.setFrameAddressIsTaken(true);
    3423             : 
    3424           2 :     const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
    3425           2 :     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
    3426           2 :     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
    3427           4 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3428           6 :             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
    3429             :     // Recursively load frame address
    3430             :     // ldr x0, [fp]
    3431             :     // ldr x0, [x0]
    3432             :     // ldr x0, [x0]
    3433             :     // ...
    3434             :     unsigned DestReg;
    3435           4 :     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
    3436           6 :     while (Depth--) {
    3437           2 :       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
    3438             :                                 SrcReg, /*IsKill=*/true, 0);
    3439             :       assert(DestReg && "Unexpected LDR instruction emission failure.");
    3440             :       SrcReg = DestReg;
    3441             :     }
    3442             : 
    3443           2 :     updateValueMap(II, SrcReg);
    3444           2 :     return true;
    3445             :   }
    3446          19 :   case Intrinsic::memcpy:
    3447             :   case Intrinsic::memmove: {
    3448             :     const auto *MTI = cast<MemTransferInst>(II);
    3449             :     // Don't handle volatile.
    3450          19 :     if (MTI->isVolatile())
    3451             :       return false;
    3452             : 
    3453             :     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
    3454             :     // we would emit dead code because we don't currently handle memmoves.
    3455             :     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
    3456          19 :     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
    3457             :       // Small memcpy's are common enough that we want to do them without a call
    3458             :       // if possible.
    3459             :       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
    3460             :       unsigned Alignment = MinAlign(MTI->getDestAlignment(),
    3461          18 :                                     MTI->getSourceAlignment());
    3462          18 :       if (isMemCpySmall(Len, Alignment)) {
    3463             :         Address Dest, Src;
    3464          26 :         if (!computeAddress(MTI->getRawDest(), Dest) ||
    3465          13 :             !computeAddress(MTI->getRawSource(), Src))
    3466          13 :           return false;
    3467          13 :         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
    3468             :           return true;
    3469             :       }
    3470             :     }
    3471             : 
    3472           6 :     if (!MTI->getLength()->getType()->isIntegerTy(64))
    3473             :       return false;
    3474             : 
    3475          12 :     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
    3476             :       // Fast instruction selection doesn't support the special
    3477             :       // address spaces.
    3478             :       return false;
    3479             : 
    3480           6 :     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
    3481          12 :     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
    3482             :   }
    3483           1 :   case Intrinsic::memset: {
    3484             :     const MemSetInst *MSI = cast<MemSetInst>(II);
    3485             :     // Don't handle volatile.
    3486           1 :     if (MSI->isVolatile())
    3487             :       return false;
    3488             : 
    3489           1 :     if (!MSI->getLength()->getType()->isIntegerTy(64))
    3490             :       return false;
    3491             : 
    3492           1 :     if (MSI->getDestAddressSpace() > 255)
    3493             :       // Fast instruction selection doesn't support the special
    3494             :       // address spaces.
    3495             :       return false;
    3496             : 
    3497           2 :     return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
    3498             :   }
    3499          12 :   case Intrinsic::sin:
    3500             :   case Intrinsic::cos:
    3501             :   case Intrinsic::pow: {
    3502          12 :     MVT RetVT;
    3503          12 :     if (!isTypeLegal(II->getType(), RetVT))
    3504             :       return false;
    3505             : 
    3506          12 :     if (RetVT != MVT::f32 && RetVT != MVT::f64)
    3507             :       return false;
    3508             : 
    3509             :     static const RTLIB::Libcall LibCallTable[3][2] = {
    3510             :       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
    3511             :       { RTLIB::COS_F32, RTLIB::COS_F64 },
    3512             :       { RTLIB::POW_F32, RTLIB::POW_F64 }
    3513             :     };
    3514             :     RTLIB::Libcall LC;
    3515             :     bool Is64Bit = RetVT == MVT::f64;
    3516          12 :     switch (II->getIntrinsicID()) {
    3517           0 :     default:
    3518           0 :       llvm_unreachable("Unexpected intrinsic.");
    3519           4 :     case Intrinsic::sin:
    3520           4 :       LC = LibCallTable[0][Is64Bit];
    3521           4 :       break;
    3522           4 :     case Intrinsic::cos:
    3523           4 :       LC = LibCallTable[1][Is64Bit];
    3524           4 :       break;
    3525           4 :     case Intrinsic::pow:
    3526           4 :       LC = LibCallTable[2][Is64Bit];
    3527           4 :       break;
    3528             :     }
    3529             : 
    3530             :     ArgListTy Args;
    3531          24 :     Args.reserve(II->getNumArgOperands());
    3532             : 
    3533             :     // Populate the argument list.
    3534          28 :     for (auto &Arg : II->arg_operands()) {
    3535             :       ArgListEntry Entry;
    3536          16 :       Entry.Val = Arg;
    3537          16 :       Entry.Ty = Arg->getType();
    3538          16 :       Args.push_back(Entry);
    3539             :     }
    3540             : 
    3541          24 :     CallLoweringInfo CLI;
    3542          12 :     MCContext &Ctx = MF->getContext();
    3543          12 :     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
    3544          48 :                   TLI.getLibcallName(LC), std::move(Args));
    3545          12 :     if (!lowerCallTo(CLI))
    3546             :       return false;
    3547          12 :     updateValueMap(II, CLI.ResultReg);
    3548          12 :     return true;
    3549             :   }
    3550           2 :   case Intrinsic::fabs: {
    3551           2 :     MVT VT;
    3552           2 :     if (!isTypeLegal(II->getType(), VT))
    3553             :       return false;
    3554             : 
    3555             :     unsigned Opc;
    3556           2 :     switch (VT.SimpleTy) {
    3557             :     default:
    3558             :       return false;
    3559             :     case MVT::f32:
    3560             :       Opc = AArch64::FABSSr;
    3561             :       break;
    3562           1 :     case MVT::f64:
    3563             :       Opc = AArch64::FABSDr;
    3564           1 :       break;
    3565             :     }
    3566           4 :     unsigned SrcReg = getRegForValue(II->getOperand(0));
    3567           2 :     if (!SrcReg)
    3568             :       return false;
    3569           2 :     bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
    3570           2 :     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
    3571           6 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
    3572           2 :       .addReg(SrcReg, getKillRegState(SrcRegIsKill));
    3573           2 :     updateValueMap(II, ResultReg);
    3574           2 :     return true;
    3575             :   }
    3576           1 :   case Intrinsic::trap:
    3577           3 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
    3578             :         .addImm(1);
    3579           1 :     return true;
    3580             : 
    3581             :   case Intrinsic::sqrt: {
    3582             :     Type *RetTy = II->getCalledFunction()->getReturnType();
    3583             : 
    3584           2 :     MVT VT;
    3585           2 :     if (!isTypeLegal(RetTy, VT))
    3586             :       return false;
    3587             : 
    3588           4 :     unsigned Op0Reg = getRegForValue(II->getOperand(0));
    3589           2 :     if (!Op0Reg)
    3590             :       return false;
    3591           2 :     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
    3592             : 
    3593           2 :     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
    3594           2 :     if (!ResultReg)
    3595             :       return false;
    3596             : 
    3597           2 :     updateValueMap(II, ResultReg);
    3598           2 :     return true;
    3599             :   }
    3600             :   case Intrinsic::sadd_with_overflow:
    3601             :   case Intrinsic::uadd_with_overflow:
    3602             :   case Intrinsic::ssub_with_overflow:
    3603             :   case Intrinsic::usub_with_overflow:
    3604             :   case Intrinsic::smul_with_overflow:
    3605             :   case Intrinsic::umul_with_overflow: {
    3606             :     // This implements the basic lowering of the xalu with overflow intrinsics.
    3607             :     const Function *Callee = II->getCalledFunction();
    3608             :     auto *Ty = cast<StructType>(Callee->getReturnType());
    3609          48 :     Type *RetTy = Ty->getTypeAtIndex(0U);
    3610             : 
    3611          48 :     MVT VT;
    3612          48 :     if (!isTypeLegal(RetTy, VT))
    3613             :       return false;
    3614             : 
    3615          48 :     if (VT != MVT::i32 && VT != MVT::i64)
    3616             :       return false;
    3617             : 
    3618          48 :     const Value *LHS = II->getArgOperand(0);
    3619             :     const Value *RHS = II->getArgOperand(1);
    3620             :     // Canonicalize immediate to the RHS.
    3621          48 :     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
    3622             :         isCommutativeIntrinsic(II))
    3623             :       std::swap(LHS, RHS);
    3624             : 
    3625             :     // Simplify multiplies.
    3626             :     Intrinsic::ID IID = II->getIntrinsicID();
    3627          48 :     switch (IID) {
    3628             :     default:
    3629             :       break;
    3630           8 :     case Intrinsic::smul_with_overflow:
    3631             :       if (const auto *C = dyn_cast<ConstantInt>(RHS))
    3632           2 :         if (C->getValue() == 2) {
    3633             :           IID = Intrinsic::sadd_with_overflow;
    3634             :           RHS = LHS;
    3635             :         }
    3636             :       break;
    3637           9 :     case Intrinsic::umul_with_overflow:
    3638             :       if (const auto *C = dyn_cast<ConstantInt>(RHS))
    3639           3 :         if (C->getValue() == 2) {
    3640             :           IID = Intrinsic::uadd_with_overflow;
    3641             :           RHS = LHS;
    3642             :         }
    3643             :       break;
    3644             :     }
    3645             : 
    3646             :     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
    3647             :     AArch64CC::CondCode CC = AArch64CC::Invalid;
    3648          44 :     switch (IID) {
    3649           0 :     default: llvm_unreachable("Unexpected intrinsic!");
    3650          14 :     case Intrinsic::sadd_with_overflow:
    3651             :       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
    3652             :       CC = AArch64CC::VS;
    3653          14 :       break;
    3654           8 :     case Intrinsic::uadd_with_overflow:
    3655             :       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
    3656             :       CC = AArch64CC::HS;
    3657           8 :       break;
    3658           7 :     case Intrinsic::ssub_with_overflow:
    3659             :       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
    3660             :       CC = AArch64CC::VS;
    3661           7 :       break;
    3662           6 :     case Intrinsic::usub_with_overflow:
    3663             :       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
    3664             :       CC = AArch64CC::LO;
    3665           6 :       break;
    3666           6 :     case Intrinsic::smul_with_overflow: {
    3667             :       CC = AArch64CC::NE;
    3668           6 :       unsigned LHSReg = getRegForValue(LHS);
    3669           6 :       if (!LHSReg)
    3670             :         return false;
    3671           6 :       bool LHSIsKill = hasTrivialKill(LHS);
    3672             : 
    3673           6 :       unsigned RHSReg = getRegForValue(RHS);
    3674           6 :       if (!RHSReg)
    3675             :         return false;
    3676           6 :       bool RHSIsKill = hasTrivialKill(RHS);
    3677             : 
    3678           6 :       if (VT == MVT::i32) {
    3679           3 :         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
    3680           3 :         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
    3681           3 :                                        /*IsKill=*/false, 32);
    3682           3 :         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
    3683             :                                             AArch64::sub_32);
    3684           3 :         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
    3685             :                                               AArch64::sub_32);
    3686             :         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
    3687             :                     AArch64_AM::ASR, 31, /*WantResult=*/false);
    3688             :       } else {
    3689             :         assert(VT == MVT::i64 && "Unexpected value type.");
    3690             :         // LHSReg and RHSReg cannot be killed by this Mul, since they are
    3691             :         // reused in the next instruction.
    3692           3 :         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
    3693             :                             /*IsKill=*/false);
    3694           3 :         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
    3695           3 :                                         RHSReg, RHSIsKill);
    3696             :         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
    3697             :                     AArch64_AM::ASR, 63, /*WantResult=*/false);
    3698             :       }
    3699             :       break;
    3700             :     }
    3701           7 :     case Intrinsic::umul_with_overflow: {
    3702             :       CC = AArch64CC::NE;
    3703           7 :       unsigned LHSReg = getRegForValue(LHS);
    3704           7 :       if (!LHSReg)
    3705             :         return false;
    3706           7 :       bool LHSIsKill = hasTrivialKill(LHS);
    3707             : 
    3708           7 :       unsigned RHSReg = getRegForValue(RHS);
    3709           7 :       if (!RHSReg)
    3710             :         return false;
    3711           7 :       bool RHSIsKill = hasTrivialKill(RHS);
    3712             : 
    3713           7 :       if (VT == MVT::i32) {
    3714           3 :         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
    3715             :         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
    3716             :                     /*IsKill=*/false, AArch64_AM::LSR, 32,
    3717             :                     /*WantResult=*/false);
    3718           3 :         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
    3719             :                                             AArch64::sub_32);
    3720             :       } else {
    3721             :         assert(VT == MVT::i64 && "Unexpected value type.");
    3722             :         // LHSReg and RHSReg cannot be killed by this Mul, since they are
    3723             :         // reused in the next instruction.
    3724           4 :         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
    3725             :                             /*IsKill=*/false);
    3726           4 :         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
    3727           4 :                                         RHSReg, RHSIsKill);
    3728             :         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
    3729             :                     /*IsKill=*/false, /*WantResult=*/false);
    3730             :       }
    3731             :       break;
    3732             :     }
    3733             :     }
    3734             : 
    3735          48 :     if (MulReg) {
    3736          13 :       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
    3737          26 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3738          39 :               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
    3739             :     }
    3740             : 
    3741          96 :     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
    3742             :                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
    3743             :                                   /*IsKill=*/true, getInvertedCondCode(CC));
    3744             :     (void)ResultReg2;
    3745             :     assert((ResultReg1 + 1) == ResultReg2 &&
    3746             :            "Nonconsecutive result registers.");
    3747          48 :     updateValueMap(II, ResultReg1, 2);
    3748          48 :     return true;
    3749             :   }
    3750             :   }
    3751             :   return false;
    3752             : }
    3753             : 
    3754        1273 : bool AArch64FastISel::selectRet(const Instruction *I) {
    3755             :   const ReturnInst *Ret = cast<ReturnInst>(I);
    3756        1273 :   const Function &F = *I->getParent()->getParent();
    3757             : 
    3758        1273 :   if (!FuncInfo.CanLowerReturn)
    3759             :     return false;
    3760             : 
    3761        1273 :   if (F.isVarArg())
    3762             :     return false;
    3763             : 
    3764        2544 :   if (TLI.supportSwiftError() &&
    3765        1283 :       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
    3766             :     return false;
    3767             : 
    3768        1261 :   if (TLI.supportSplitCSR(FuncInfo.MF))
    3769             :     return false;
    3770             : 
    3771             :   // Build a list of return value registers.
    3772             :   SmallVector<unsigned, 4> RetRegs;
    3773             : 
    3774        1258 :   if (Ret->getNumOperands() > 0) {
    3775             :     CallingConv::ID CC = F.getCallingConv();
    3776             :     SmallVector<ISD::OutputArg, 4> Outs;
    3777        1890 :     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
    3778             : 
    3779             :     // Analyze operands of the call, assigning locations to each operand.
    3780             :     SmallVector<CCValAssign, 16> ValLocs;
    3781        2738 :     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
    3782         945 :     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
    3783             :                                                      : RetCC_AArch64_AAPCS;
    3784         945 :     CCInfo.AnalyzeReturn(Outs, RetCC);
    3785             : 
    3786             :     // Only handle a single return value for now.
    3787         945 :     if (ValLocs.size() != 1)
    3788          97 :       return false;
    3789             : 
    3790             :     CCValAssign &VA = ValLocs[0];
    3791             :     const Value *RV = Ret->getOperand(0);
    3792             : 
    3793             :     // Don't bother handling odd stuff for now.
    3794         938 :     if ((VA.getLocInfo() != CCValAssign::Full) &&
    3795             :         (VA.getLocInfo() != CCValAssign::BCvt))
    3796             :       return false;
    3797             : 
    3798             :     // Only handle register returns for now.
    3799         938 :     if (!VA.isRegLoc())
    3800             :       return false;
    3801             : 
    3802         938 :     unsigned Reg = getRegForValue(RV);
    3803         938 :     if (Reg == 0)
    3804             :       return false;
    3805             : 
    3806         938 :     unsigned SrcReg = Reg + VA.getValNo();
    3807         938 :     unsigned DestReg = VA.getLocReg();
    3808             :     // Avoid a cross-class copy. This is very unlikely.
    3809        2814 :     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
    3810             :       return false;
    3811             : 
    3812         938 :     EVT RVEVT = TLI.getValueType(DL, RV->getType());
    3813         938 :     if (!RVEVT.isSimple())
    3814             :       return false;
    3815             : 
    3816             :     // Vectors (of > 1 lane) in big endian need tricky handling.
    3817        1027 :     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
    3818          89 :         !Subtarget->isLittleEndian())
    3819             :       return false;
    3820             : 
    3821             :     MVT RVVT = RVEVT.getSimpleVT();
    3822         878 :     if (RVVT == MVT::f128)
    3823             :       return false;
    3824             : 
    3825             :     MVT DestVT = VA.getValVT();
    3826             :     // Special handling for extended integers.
    3827         870 :     if (RVVT != DestVT) {
    3828         173 :       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
    3829             :         return false;
    3830             : 
    3831         206 :       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
    3832             :         return false;
    3833             : 
    3834             :       bool IsZExt = Outs[0].Flags.isZExt();
    3835         151 :       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
    3836         151 :       if (SrcReg == 0)
    3837             :         return false;
    3838             :     }
    3839             : 
    3840             :     // Make the copy.
    3841        1696 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3842        2544 :             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
    3843             : 
    3844             :     // Add register to return instruction.
    3845         848 :     RetRegs.push_back(VA.getLocReg());
    3846             :   }
    3847             : 
    3848        1161 :   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3849        3483 :                                     TII.get(AArch64::RET_ReallyLR));
    3850        2857 :   for (unsigned RetReg : RetRegs)
    3851         848 :     MIB.addReg(RetReg, RegState::Implicit);
    3852             :   return true;
    3853             : }
    3854             : 
    3855          14 : bool AArch64FastISel::selectTrunc(const Instruction *I) {
    3856          14 :   Type *DestTy = I->getType();
    3857          14 :   Value *Op = I->getOperand(0);
    3858          14 :   Type *SrcTy = Op->getType();
    3859             : 
    3860          14 :   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
    3861          14 :   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
    3862          14 :   if (!SrcEVT.isSimple())
    3863             :     return false;
    3864          14 :   if (!DestEVT.isSimple())
    3865             :     return false;
    3866             : 
    3867             :   MVT SrcVT = SrcEVT.getSimpleVT();
    3868             :   MVT DestVT = DestEVT.getSimpleVT();
    3869             : 
    3870          14 :   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
    3871             :       SrcVT != MVT::i8)
    3872             :     return false;
    3873          12 :   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
    3874             :       DestVT != MVT::i1)
    3875             :     return false;
    3876             : 
    3877          12 :   unsigned SrcReg = getRegForValue(Op);
    3878          12 :   if (!SrcReg)
    3879             :     return false;
    3880          12 :   bool SrcIsKill = hasTrivialKill(Op);
    3881             : 
    3882             :   // If we're truncating from i64 to a smaller non-legal type then generate an
    3883             :   // AND. Otherwise, we know the high bits are undefined and a truncate only
    3884             :   // generate a COPY. We cannot mark the source register also as result
    3885             :   // register, because this can incorrectly transfer the kill flag onto the
    3886             :   // source register.
    3887             :   unsigned ResultReg;
    3888          12 :   if (SrcVT == MVT::i64) {
    3889             :     uint64_t Mask = 0;
    3890           6 :     switch (DestVT.SimpleTy) {
    3891             :     default:
    3892             :       // Trunc i64 to i32 is handled by the target-independent fast-isel.
    3893             :       return false;
    3894             :     case MVT::i1:
    3895             :       Mask = 0x1;
    3896             :       break;
    3897           2 :     case MVT::i8:
    3898             :       Mask = 0xff;
    3899           2 :       break;
    3900           1 :     case MVT::i16:
    3901             :       Mask = 0xffff;
    3902           1 :       break;
    3903             :     }
    3904             :     // Issue an extract_subreg to get the lower 32-bits.
    3905          12 :     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
    3906           6 :                                                 AArch64::sub_32);
    3907             :     // Create the AND instruction which performs the actual truncation.
    3908             :     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
    3909             :     assert(ResultReg && "Unexpected AND instruction emission failure.");
    3910             :   } else {
    3911           6 :     ResultReg = createResultReg(&AArch64::GPR32RegClass);
    3912          12 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3913          12 :             TII.get(TargetOpcode::COPY), ResultReg)
    3914           6 :         .addReg(SrcReg, getKillRegState(SrcIsKill));
    3915             :   }
    3916             : 
    3917          12 :   updateValueMap(I, ResultReg);
    3918          12 :   return true;
    3919             : }
    3920             : 
    3921         141 : unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
    3922             :   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
    3923             :           DestVT == MVT::i64) &&
    3924             :          "Unexpected value type.");
    3925             :   // Handle i8 and i16 as i32.
    3926         141 :   if (DestVT == MVT::i8 || DestVT == MVT::i16)
    3927             :     DestVT = MVT::i32;
    3928             : 
    3929         141 :   if (IsZExt) {
    3930             :     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
    3931             :     assert(ResultReg && "Unexpected AND instruction emission failure.");
    3932         132 :     if (DestVT == MVT::i64) {
    3933             :       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
    3934             :       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
    3935           0 :       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
    3936           0 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3937           0 :               TII.get(AArch64::SUBREG_TO_REG), Reg64)
    3938             :           .addImm(0)
    3939           0 :           .addReg(ResultReg)
    3940             :           .addImm(AArch64::sub_32);
    3941             :       ResultReg = Reg64;
    3942             :     }
    3943             :     return ResultReg;
    3944             :   } else {
    3945           9 :     if (DestVT == MVT::i64) {
    3946             :       // FIXME: We're SExt i1 to i64.
    3947             :       return 0;
    3948             :     }
    3949           9 :     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
    3950           9 :                             /*TODO:IsKill=*/false, 0, 0);
    3951             :   }
    3952             : }
    3953             : 
    3954          18 : unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
    3955             :                                       unsigned Op1, bool Op1IsKill) {
    3956             :   unsigned Opc, ZReg;
    3957          18 :   switch (RetVT.SimpleTy) {
    3958             :   default: return 0;
    3959             :   case MVT::i8:
    3960             :   case MVT::i16:
    3961             :   case MVT::i32:
    3962             :     RetVT = MVT::i32;
    3963           5 :     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
    3964             :   case MVT::i64:
    3965             :     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
    3966             :   }
    3967             : 
    3968             :   const TargetRegisterClass *RC =
    3969          18 :       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    3970          18 :   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
    3971          18 :                           /*IsKill=*/ZReg, true);
    3972             : }
    3973             : 
    3974             : unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
    3975             :                                         unsigned Op1, bool Op1IsKill) {
    3976             :   if (RetVT != MVT::i64)
    3977             :     return 0;
    3978             : 
    3979             :   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
    3980             :                           Op0, Op0IsKill, Op1, Op1IsKill,
    3981           3 :                           AArch64::XZR, /*IsKill=*/true);
    3982             : }
    3983             : 
    3984             : unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
    3985             :                                         unsigned Op1, bool Op1IsKill) {
    3986             :   if (RetVT != MVT::i64)
    3987             :     return 0;
    3988             : 
    3989             :   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
    3990             :                           Op0, Op0IsKill, Op1, Op1IsKill,
    3991           3 :                           AArch64::XZR, /*IsKill=*/true);
    3992             : }
    3993             : 
    3994           4 : unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
    3995             :                                      unsigned Op1Reg, bool Op1IsKill) {
    3996             :   unsigned Opc = 0;
    3997             :   bool NeedTrunc = false;
    3998             :   uint64_t Mask = 0;
    3999           4 :   switch (RetVT.SimpleTy) {
    4000             :   default: return 0;
    4001             :   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
    4002           1 :   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
    4003           1 :   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
    4004           1 :   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
    4005             :   }
    4006             : 
    4007             :   const TargetRegisterClass *RC =
    4008           4 :       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4009           4 :   if (NeedTrunc) {
    4010           2 :     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
    4011             :     Op1IsKill = true;
    4012             :   }
    4013           4 :   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
    4014           4 :                                        Op1IsKill);
    4015           4 :   if (NeedTrunc)
    4016             :     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
    4017             :   return ResultReg;
    4018             : }
    4019             : 
    4020          53 : unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
    4021             :                                      bool Op0IsKill, uint64_t Shift,
    4022             :                                      bool IsZExt) {
    4023             :   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
    4024             :          "Unexpected source/return type pair.");
    4025             :   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
    4026             :           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
    4027             :          "Unexpected source value type.");
    4028             :   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
    4029             :           RetVT == MVT::i64) && "Unexpected return value type.");
    4030             : 
    4031             :   bool Is64Bit = (RetVT == MVT::i64);
    4032          53 :   unsigned RegSize = Is64Bit ? 64 : 32;
    4033          53 :   unsigned DstBits = RetVT.getSizeInBits();
    4034          53 :   unsigned SrcBits = SrcVT.getSizeInBits();
    4035             :   const TargetRegisterClass *RC =
    4036          53 :       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4037             : 
    4038             :   // Just emit a copy for "zero" shifts.
    4039          53 :   if (Shift == 0) {
    4040           2 :     if (RetVT == SrcVT) {
    4041           1 :       unsigned ResultReg = createResultReg(RC);
    4042           2 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4043           2 :               TII.get(TargetOpcode::COPY), ResultReg)
    4044           1 :           .addReg(Op0, getKillRegState(Op0IsKill));
    4045           1 :       return ResultReg;
    4046             :     } else
    4047           1 :       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
    4048             :   }
    4049             : 
    4050             :   // Don't deal with undefined shifts.
    4051          51 :   if (Shift >= DstBits)
    4052             :     return 0;
    4053             : 
    4054             :   // For immediate shifts we can fold the zero-/sign-extension into the shift.
    4055             :   // {S|U}BFM Wd, Wn, #r, #s
    4056             :   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
    4057             : 
    4058             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4059             :   // %2 = shl i16 %1, 4
    4060             :   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
    4061             :   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
    4062             :   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
    4063             :   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
    4064             : 
    4065             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4066             :   // %2 = shl i16 %1, 8
    4067             :   // Wd<32+7-24,32-24> = Wn<7:0>
    4068             :   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
    4069             :   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
    4070             :   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
    4071             : 
    4072             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4073             :   // %2 = shl i16 %1, 12
    4074             :   // Wd<32+3-20,32-20> = Wn<3:0>
    4075             :   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
    4076             :   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
    4077             :   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
    4078             : 
    4079          37 :   unsigned ImmR = RegSize - Shift;
    4080             :   // Limit the width to the length of the source type.
    4081          74 :   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
    4082             :   static const unsigned OpcTable[2][2] = {
    4083             :     {AArch64::SBFMWri, AArch64::SBFMXri},
    4084             :     {AArch64::UBFMWri, AArch64::UBFMXri}
    4085             :   };
    4086          37 :   unsigned Opc = OpcTable[IsZExt][Is64Bit];
    4087          37 :   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
    4088          10 :     unsigned TmpReg = MRI.createVirtualRegister(RC);
    4089          20 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4090          20 :             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
    4091             :         .addImm(0)
    4092          10 :         .addReg(Op0, getKillRegState(Op0IsKill))
    4093             :         .addImm(AArch64::sub_32);
    4094             :     Op0 = TmpReg;
    4095             :     Op0IsKill = true;
    4096             :   }
    4097          37 :   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
    4098             : }
    4099             : 
    4100           4 : unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
    4101             :                                      unsigned Op1Reg, bool Op1IsKill) {
    4102             :   unsigned Opc = 0;
    4103             :   bool NeedTrunc = false;
    4104             :   uint64_t Mask = 0;
    4105           4 :   switch (RetVT.SimpleTy) {
    4106             :   default: return 0;
    4107             :   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
    4108           1 :   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
    4109           1 :   case MVT::i32: Opc = AArch64::LSRVWr; break;
    4110           1 :   case MVT::i64: Opc = AArch64::LSRVXr; break;
    4111             :   }
    4112             : 
    4113             :   const TargetRegisterClass *RC =
    4114           4 :       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4115           4 :   if (NeedTrunc) {
    4116           2 :     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
    4117           2 :     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
    4118             :     Op0IsKill = Op1IsKill = true;
    4119             :   }
    4120           4 :   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
    4121           4 :                                        Op1IsKill);
    4122           4 :   if (NeedTrunc)
    4123             :     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
    4124             :   return ResultReg;
    4125             : }
    4126             : 
    4127          27 : unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
    4128             :                                      bool Op0IsKill, uint64_t Shift,
    4129             :                                      bool IsZExt) {
    4130             :   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
    4131             :          "Unexpected source/return type pair.");
    4132             :   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
    4133             :           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
    4134             :          "Unexpected source value type.");
    4135             :   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
    4136             :           RetVT == MVT::i64) && "Unexpected return value type.");
    4137             : 
    4138             :   bool Is64Bit = (RetVT == MVT::i64);
    4139          27 :   unsigned RegSize = Is64Bit ? 64 : 32;
    4140          27 :   unsigned DstBits = RetVT.getSizeInBits();
    4141          27 :   unsigned SrcBits = SrcVT.getSizeInBits();
    4142             :   const TargetRegisterClass *RC =
    4143          27 :       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4144             : 
    4145             :   // Just emit a copy for "zero" shifts.
    4146          27 :   if (Shift == 0) {
    4147           2 :     if (RetVT == SrcVT) {
    4148           1 :       unsigned ResultReg = createResultReg(RC);
    4149           2 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4150           2 :               TII.get(TargetOpcode::COPY), ResultReg)
    4151           1 :       .addReg(Op0, getKillRegState(Op0IsKill));
    4152           1 :       return ResultReg;
    4153             :     } else
    4154           1 :       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
    4155             :   }
    4156             : 
    4157             :   // Don't deal with undefined shifts.
    4158          25 :   if (Shift >= DstBits)
    4159             :     return 0;
    4160             : 
    4161             :   // For immediate shifts we can fold the zero-/sign-extension into the shift.
    4162             :   // {S|U}BFM Wd, Wn, #r, #s
    4163             :   // Wd<s-r:0> = Wn<s:r> when r <= s
    4164             : 
    4165             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4166             :   // %2 = lshr i16 %1, 4
    4167             :   // Wd<7-4:0> = Wn<7:4>
    4168             :   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
    4169             :   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
    4170             :   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
    4171             : 
    4172             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4173             :   // %2 = lshr i16 %1, 8
    4174             :   // Wd<7-7,0> = Wn<7:7>
    4175             :   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
    4176             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
    4177             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
    4178             : 
    4179             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4180             :   // %2 = lshr i16 %1, 12
    4181             :   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
    4182             :   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
    4183             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
    4184             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
    4185             : 
    4186          25 :   if (Shift >= SrcBits && IsZExt)
    4187           6 :     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
    4188             : 
    4189             :   // It is not possible to fold a sign-extend into the LShr instruction. In this
    4190             :   // case emit a sign-extend.
    4191          22 :   if (!IsZExt) {
    4192           4 :     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
    4193           4 :     if (!Op0)
    4194             :       return 0;
    4195             :     Op0IsKill = true;
    4196           4 :     SrcVT = RetVT;
    4197           4 :     SrcBits = SrcVT.getSizeInBits();
    4198             :     IsZExt = true;
    4199             :   }
    4200             : 
    4201          44 :   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
    4202             :   unsigned ImmS = SrcBits - 1;
    4203             :   static const unsigned OpcTable[2][2] = {
    4204             :     {AArch64::SBFMWri, AArch64::SBFMXri},
    4205             :     {AArch64::UBFMWri, AArch64::UBFMXri}
    4206             :   };
    4207          22 :   unsigned Opc = OpcTable[IsZExt][Is64Bit];
    4208          22 :   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
    4209           0 :     unsigned TmpReg = MRI.createVirtualRegister(RC);
    4210           0 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4211           0 :             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
    4212             :         .addImm(0)
    4213           0 :         .addReg(Op0, getKillRegState(Op0IsKill))
    4214             :         .addImm(AArch64::sub_32);
    4215             :     Op0 = TmpReg;
    4216             :     Op0IsKill = true;
    4217             :   }
    4218          22 :   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
    4219             : }
    4220             : 
    4221           4 : unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
    4222             :                                      unsigned Op1Reg, bool Op1IsKill) {
    4223             :   unsigned Opc = 0;
    4224             :   bool NeedTrunc = false;
    4225             :   uint64_t Mask = 0;
    4226           4 :   switch (RetVT.SimpleTy) {
    4227             :   default: return 0;
    4228             :   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
    4229           1 :   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
    4230           1 :   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
    4231           1 :   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
    4232             :   }
    4233             : 
    4234             :   const TargetRegisterClass *RC =
    4235           4 :       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4236           4 :   if (NeedTrunc) {
    4237           2 :     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
    4238           2 :     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
    4239             :     Op0IsKill = Op1IsKill = true;
    4240             :   }
    4241           4 :   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
    4242           4 :                                        Op1IsKill);
    4243           4 :   if (NeedTrunc)
    4244             :     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
    4245             :   return ResultReg;
    4246             : }
    4247             : 
    4248          28 : unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
    4249             :                                      bool Op0IsKill, uint64_t Shift,
    4250             :                                      bool IsZExt) {
    4251             :   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
    4252             :          "Unexpected source/return type pair.");
    4253             :   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
    4254             :           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
    4255             :          "Unexpected source value type.");
    4256             :   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
    4257             :           RetVT == MVT::i64) && "Unexpected return value type.");
    4258             : 
    4259             :   bool Is64Bit = (RetVT == MVT::i64);
    4260          28 :   unsigned RegSize = Is64Bit ? 64 : 32;
    4261          28 :   unsigned DstBits = RetVT.getSizeInBits();
    4262          28 :   unsigned SrcBits = SrcVT.getSizeInBits();
    4263             :   const TargetRegisterClass *RC =
    4264          28 :       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4265             : 
    4266             :   // Just emit a copy for "zero" shifts.
    4267          28 :   if (Shift == 0) {
    4268           2 :     if (RetVT == SrcVT) {
    4269           1 :       unsigned ResultReg = createResultReg(RC);
    4270           2 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4271           2 :               TII.get(TargetOpcode::COPY), ResultReg)
    4272           1 :       .addReg(Op0, getKillRegState(Op0IsKill));
    4273           1 :       return ResultReg;
    4274             :     } else
    4275           1 :       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
    4276             :   }
    4277             : 
    4278             :   // Don't deal with undefined shifts.
    4279          26 :   if (Shift >= DstBits)
    4280             :     return 0;
    4281             : 
    4282             :   // For immediate shifts we can fold the zero-/sign-extension into the shift.
    4283             :   // {S|U}BFM Wd, Wn, #r, #s
    4284             :   // Wd<s-r:0> = Wn<s:r> when r <= s
    4285             : 
    4286             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4287             :   // %2 = ashr i16 %1, 4
    4288             :   // Wd<7-4:0> = Wn<7:4>
    4289             :   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
    4290             :   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
    4291             :   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
    4292             : 
    4293             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4294             :   // %2 = ashr i16 %1, 8
    4295             :   // Wd<7-7,0> = Wn<7:7>
    4296             :   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
    4297             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
    4298             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
    4299             : 
    4300             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4301             :   // %2 = ashr i16 %1, 12
    4302             :   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
    4303             :   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
    4304             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
    4305             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
    4306             : 
    4307          26 :   if (Shift >= SrcBits && IsZExt)
    4308           6 :     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
    4309             : 
    4310          46 :   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
    4311             :   unsigned ImmS = SrcBits - 1;
    4312             :   static const unsigned OpcTable[2][2] = {
    4313             :     {AArch64::SBFMWri, AArch64::SBFMXri},
    4314             :     {AArch64::UBFMWri, AArch64::UBFMXri}
    4315             :   };
    4316          23 :   unsigned Opc = OpcTable[IsZExt][Is64Bit];
    4317          23 :   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
    4318           1 :     unsigned TmpReg = MRI.createVirtualRegister(RC);
    4319           2 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4320           2 :             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
    4321             :         .addImm(0)
    4322           1 :         .addReg(Op0, getKillRegState(Op0IsKill))
    4323             :         .addImm(AArch64::sub_32);
    4324             :     Op0 = TmpReg;
    4325             :     Op0IsKill = true;
    4326             :   }
    4327          23 :   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
    4328             : }
    4329             : 
    4330         412 : unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
    4331             :                                      bool IsZExt) {
    4332             :   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
    4333             : 
    4334             :   // FastISel does not have plumbing to deal with extensions where the SrcVT or
    4335             :   // DestVT are odd things, so test to make sure that they are both types we can
    4336             :   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
    4337             :   // bail out to SelectionDAG.
    4338         411 :   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
    4339         471 :        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
    4340         271 :       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
    4341          29 :        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
    4342             :     return 0;
    4343             : 
    4344             :   unsigned Opc;
    4345             :   unsigned Imm = 0;
    4346             : 
    4347         412 :   switch (SrcVT.SimpleTy) {
    4348             :   default:
    4349             :     return 0;
    4350         141 :   case MVT::i1:
    4351         141 :     return emiti1Ext(SrcReg, DestVT, IsZExt);
    4352             :   case MVT::i8:
    4353         144 :     if (DestVT == MVT::i64)
    4354          15 :       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
    4355             :     else
    4356         129 :       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
    4357             :     Imm = 7;
    4358             :     break;
    4359             :   case MVT::i16:
    4360          98 :     if (DestVT == MVT::i64)
    4361          15 :       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
    4362             :     else
    4363          83 :       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
    4364             :     Imm = 15;
    4365             :     break;
    4366          29 :   case MVT::i32:
    4367             :     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
    4368          29 :     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
    4369             :     Imm = 31;
    4370             :     break;
    4371             :   }
    4372             : 
    4373             :   // Handle i8 and i16 as i32.
    4374         271 :   if (DestVT == MVT::i8 || DestVT == MVT::i16)
    4375             :     DestVT = MVT::i32;
    4376         268 :   else if (DestVT == MVT::i64) {
    4377          59 :     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
    4378         118 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4379         118 :             TII.get(AArch64::SUBREG_TO_REG), Src64)
    4380             :         .addImm(0)
    4381          59 :         .addReg(SrcReg)
    4382             :         .addImm(AArch64::sub_32);
    4383             :     SrcReg = Src64;
    4384             :   }
    4385             : 
    4386             :   const TargetRegisterClass *RC =
    4387         271 :       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4388         271 :   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
    4389             : }
    4390             : 
    4391             : static bool isZExtLoad(const MachineInstr *LI) {
    4392           2 :   switch (LI->getOpcode()) {
    4393             :   default:
    4394             :     return false;
    4395             :   case AArch64::LDURBBi:
    4396             :   case AArch64::LDURHHi:
    4397             :   case AArch64::LDURWi:
    4398             :   case AArch64::LDRBBui:
    4399             :   case AArch64::LDRHHui:
    4400             :   case AArch64::LDRWui:
    4401             :   case AArch64::LDRBBroX:
    4402             :   case AArch64::LDRHHroX:
    4403             :   case AArch64::LDRWroX:
    4404             :   case AArch64::LDRBBroW:
    4405             :   case AArch64::LDRHHroW:
    4406             :   case AArch64::LDRWroW:
    4407             :     return true;
    4408             :   }
    4409             : }
    4410             : 
    4411             : static bool isSExtLoad(const MachineInstr *LI) {
    4412           0 :   switch (LI->getOpcode()) {
    4413             :   default:
    4414             :     return false;
    4415             :   case AArch64::LDURSBWi:
    4416             :   case AArch64::LDURSHWi:
    4417             :   case AArch64::LDURSBXi:
    4418             :   case AArch64::LDURSHXi:
    4419             :   case AArch64::LDURSWi:
    4420             :   case AArch64::LDRSBWui:
    4421             :   case AArch64::LDRSHWui:
    4422             :   case AArch64::LDRSBXui:
    4423             :   case AArch64::LDRSHXui:
    4424             :   case AArch64::LDRSWui:
    4425             :   case AArch64::LDRSBWroX:
    4426             :   case AArch64::LDRSHWroX:
    4427             :   case AArch64::LDRSBXroX:
    4428             :   case AArch64::LDRSHXroX:
    4429             :   case AArch64::LDRSWroX:
    4430             :   case AArch64::LDRSBWroW:
    4431             :   case AArch64::LDRSHWroW:
    4432             :   case AArch64::LDRSBXroW:
    4433             :   case AArch64::LDRSHXroW:
    4434             :   case AArch64::LDRSWroW:
    4435             :     return true;
    4436             :   }
    4437             : }
    4438             : 
    4439         194 : bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
    4440             :                                          MVT SrcVT) {
    4441         194 :   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
    4442          85 :   if (!LI || !LI->hasOneUse())
    4443             :     return false;
    4444             : 
    4445             :   // Check if the load instruction has already been selected.
    4446          85 :   unsigned Reg = lookUpRegForValue(LI);
    4447          85 :   if (!Reg)
    4448             :     return false;
    4449             : 
    4450           1 :   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
    4451           1 :   if (!MI)
    4452             :     return false;
    4453             : 
    4454             :   // Check if the correct load instruction has been emitted - SelectionDAG might
    4455             :   // have emitted a zero-extending load, but we need a sign-extending load.
    4456             :   bool IsZExt = isa<ZExtInst>(I);
    4457             :   const auto *LoadMI = MI;
    4458           2 :   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
    4459           0 :       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
    4460           0 :     unsigned LoadReg = MI->getOperand(1).getReg();
    4461           0 :     LoadMI = MRI.getUniqueVRegDef(LoadReg);
    4462             :     assert(LoadMI && "Expected valid instruction");
    4463             :   }
    4464           1 :   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
    4465             :     return false;
    4466             : 
    4467             :   // Nothing to be done.
    4468           2 :   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
    4469           0 :     updateValueMap(I, Reg);
    4470           0 :     return true;
    4471             :   }
    4472             : 
    4473           1 :   if (IsZExt) {
    4474           1 :     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
    4475           2 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4476           2 :             TII.get(AArch64::SUBREG_TO_REG), Reg64)
    4477             :         .addImm(0)
    4478           1 :         .addReg(Reg, getKillRegState(true))
    4479             :         .addImm(AArch64::sub_32);
    4480             :     Reg = Reg64;
    4481             :   } else {
    4482             :     assert((MI->getOpcode() == TargetOpcode::COPY &&
    4483             :             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
    4484             :            "Expected copy instruction");
    4485           0 :     Reg = MI->getOperand(1).getReg();
    4486           0 :     MI->eraseFromParent();
    4487             :   }
    4488           1 :   updateValueMap(I, Reg);
    4489           1 :   return true;
    4490             : }
    4491             : 
    4492         201 : bool AArch64FastISel::selectIntExt(const Instruction *I) {
    4493             :   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
    4494             :          "Unexpected integer extend instruction.");
    4495         201 :   MVT RetVT;
    4496         201 :   MVT SrcVT;
    4497         201 :   if (!isTypeSupported(I->getType(), RetVT))
    4498             :     return false;
    4499             : 
    4500         388 :   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
    4501             :     return false;
    4502             : 
    4503             :   // Try to optimize already sign-/zero-extended values from load instructions.
    4504         194 :   if (optimizeIntExtLoad(I, RetVT, SrcVT))
    4505             :     return true;
    4506             : 
    4507         386 :   unsigned SrcReg = getRegForValue(I->getOperand(0));
    4508         193 :   if (!SrcReg)
    4509             :     return false;
    4510         193 :   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
    4511             : 
    4512             :   // Try to optimize already sign-/zero-extended values from function arguments.
    4513             :   bool IsZExt = isa<ZExtInst>(I);
    4514             :   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
    4515          79 :     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
    4516          79 :       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
    4517          13 :         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
    4518          26 :         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4519          26 :                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
    4520             :             .addImm(0)
    4521          13 :             .addReg(SrcReg, getKillRegState(SrcIsKill))
    4522             :             .addImm(AArch64::sub_32);
    4523             :         SrcReg = ResultReg;
    4524             :       }
    4525             :       // Conservatively clear all kill flags from all uses, because we are
    4526             :       // replacing a sign-/zero-extend instruction at IR level with a nop at MI
    4527             :       // level. The result of the instruction at IR level might have been
    4528             :       // trivially dead, which is now not longer true.
    4529          66 :       unsigned UseReg = lookUpRegForValue(I);
    4530          66 :       if (UseReg)
    4531          66 :         MRI.clearKillFlags(UseReg);
    4532             : 
    4533          66 :       updateValueMap(I, SrcReg);
    4534          66 :       return true;
    4535             :     }
    4536             :   }
    4537             : 
    4538         127 :   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
    4539         127 :   if (!ResultReg)
    4540             :     return false;
    4541             : 
    4542         127 :   updateValueMap(I, ResultReg);
    4543         127 :   return true;
    4544             : }
    4545             : 
    4546           8 : bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
    4547           8 :   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
    4548           8 :   if (!DestEVT.isSimple())
    4549             :     return false;
    4550             : 
    4551             :   MVT DestVT = DestEVT.getSimpleVT();
    4552           8 :   if (DestVT != MVT::i64 && DestVT != MVT::i32)
    4553             :     return false;
    4554             : 
    4555             :   unsigned DivOpc;
    4556             :   bool Is64bit = (DestVT == MVT::i64);
    4557           8 :   switch (ISDOpcode) {
    4558             :   default:
    4559             :     return false;
    4560           4 :   case ISD::SREM:
    4561           4 :     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
    4562             :     break;
    4563           4 :   case ISD::UREM:
    4564           4 :     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
    4565             :     break;
    4566             :   }
    4567           8 :   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
    4568          16 :   unsigned Src0Reg = getRegForValue(I->getOperand(0));
    4569           8 :   if (!Src0Reg)
    4570             :     return false;
    4571           8 :   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
    4572             : 
    4573           8 :   unsigned Src1Reg = getRegForValue(I->getOperand(1));
    4574           8 :   if (!Src1Reg)
    4575             :     return false;
    4576           8 :   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
    4577             : 
    4578             :   const TargetRegisterClass *RC =
    4579           8 :       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4580             :   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
    4581           8 :                                      Src1Reg, /*IsKill=*/false);
    4582             :   assert(QuotReg && "Unexpected DIV instruction emission failure.");
    4583             :   // The remainder is computed as numerator - (quotient * denominator) using the
    4584             :   // MSUB instruction.
    4585           8 :   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
    4586             :                                         Src1Reg, Src1IsKill, Src0Reg,
    4587           8 :                                         Src0IsKill);
    4588           8 :   updateValueMap(I, ResultReg);
    4589           8 :   return true;
    4590             : }
    4591             : 
    4592          10 : bool AArch64FastISel::selectMul(const Instruction *I) {
    4593          10 :   MVT VT;
    4594          10 :   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
    4595             :     return false;
    4596             : 
    4597          10 :   if (VT.isVector())
    4598           0 :     return selectBinaryOp(I, ISD::MUL);
    4599             : 
    4600          10 :   const Value *Src0 = I->getOperand(0);
    4601             :   const Value *Src1 = I->getOperand(1);
    4602             :   if (const auto *C = dyn_cast<ConstantInt>(Src0))
    4603           0 :     if (C->getValue().isPowerOf2())
    4604             :       std::swap(Src0, Src1);
    4605             : 
    4606             :   // Try to simplify to a shift instruction.
    4607             :   if (const auto *C = dyn_cast<ConstantInt>(Src1))
    4608           4 :     if (C->getValue().isPowerOf2()) {
    4609           2 :       uint64_t ShiftVal = C->getValue().logBase2();
    4610           2 :       MVT SrcVT = VT;
    4611             :       bool IsZExt = true;
    4612             :       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
    4613           0 :         if (!isIntExtFree(ZExt)) {
    4614           0 :           MVT VT;
    4615           0 :           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
    4616           0 :             SrcVT = VT;
    4617             :             IsZExt = true;
    4618             :             Src0 = ZExt->getOperand(0);
    4619             :           }
    4620             :         }
    4621             :       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
    4622           0 :         if (!isIntExtFree(SExt)) {
    4623           0 :           MVT VT;
    4624           0 :           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
    4625           0 :             SrcVT = VT;
    4626             :             IsZExt = false;
    4627             :             Src0 = SExt->getOperand(0);
    4628             :           }
    4629             :         }
    4630             :       }
    4631             : 
    4632           2 :       unsigned Src0Reg = getRegForValue(Src0);
    4633           2 :       if (!Src0Reg)
    4634           2 :         return false;
    4635           2 :       bool Src0IsKill = hasTrivialKill(Src0);
    4636             : 
    4637             :       unsigned ResultReg =
    4638           2 :           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
    4639             : 
    4640           2 :       if (ResultReg) {
    4641           2 :         updateValueMap(I, ResultReg);
    4642           2 :         return true;
    4643             :       }
    4644             :     }
    4645             : 
    4646          16 :   unsigned Src0Reg = getRegForValue(I->getOperand(0));
    4647           8 :   if (!Src0Reg)
    4648             :     return false;
    4649           8 :   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
    4650             : 
    4651           8 :   unsigned Src1Reg = getRegForValue(I->getOperand(1));
    4652           8 :   if (!Src1Reg)
    4653             :     return false;
    4654           8 :   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
    4655             : 
    4656           8 :   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
    4657             : 
    4658           8 :   if (!ResultReg)
    4659             :     return false;
    4660             : 
    4661           8 :   updateValueMap(I, ResultReg);
    4662           8 :   return true;
    4663             : }
    4664             : 
    4665         102 : bool AArch64FastISel::selectShift(const Instruction *I) {
    4666         102 :   MVT RetVT;
    4667         102 :   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
    4668             :     return false;
    4669             : 
    4670         102 :   if (RetVT.isVector())
    4671           0 :     return selectOperator(I, I->getOpcode());
    4672             : 
    4673         102 :   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
    4674             :     unsigned ResultReg = 0;
    4675             :     uint64_t ShiftVal = C->getZExtValue();
    4676          90 :     MVT SrcVT = RetVT;
    4677          90 :     bool IsZExt = I->getOpcode() != Instruction::AShr;
    4678             :     const Value *Op0 = I->getOperand(0);
    4679             :     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
    4680          22 :       if (!isIntExtFree(ZExt)) {
    4681          22 :         MVT TmpVT;
    4682          44 :         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
    4683          22 :           SrcVT = TmpVT;
    4684             :           IsZExt = true;
    4685             :           Op0 = ZExt->getOperand(0);
    4686             :         }
    4687             :       }
    4688             :     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
    4689          19 :       if (!isIntExtFree(SExt)) {
    4690          19 :         MVT TmpVT;
    4691          38 :         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
    4692          19 :           SrcVT = TmpVT;
    4693             :           IsZExt = false;
    4694             :           Op0 = SExt->getOperand(0);
    4695             :         }
    4696             :       }
    4697             :     }
    4698             : 
    4699          90 :     unsigned Op0Reg = getRegForValue(Op0);
    4700          90 :     if (!Op0Reg)
    4701             :       return false;
    4702          90 :     bool Op0IsKill = hasTrivialKill(Op0);
    4703             : 
    4704          90 :     switch (I->getOpcode()) {
    4705           0 :     default: llvm_unreachable("Unexpected instruction.");
    4706          48 :     case Instruction::Shl:
    4707          48 :       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
    4708          48 :       break;
    4709          18 :     case Instruction::AShr:
    4710          18 :       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
    4711          18 :       break;
    4712          24 :     case Instruction::LShr:
    4713          24 :       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
    4714          24 :       break;
    4715             :     }
    4716          90 :     if (!ResultReg)
    4717             :       return false;
    4718             : 
    4719          76 :     updateValueMap(I, ResultReg);
    4720          76 :     return true;
    4721             :   }
    4722             : 
    4723          24 :   unsigned Op0Reg = getRegForValue(I->getOperand(0));
    4724          12 :   if (!Op0Reg)
    4725             :     return false;
    4726          12 :   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
    4727             : 
    4728          12 :   unsigned Op1Reg = getRegForValue(I->getOperand(1));
    4729          12 :   if (!Op1Reg)
    4730             :     return false;
    4731          12 :   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
    4732             : 
    4733             :   unsigned ResultReg = 0;
    4734          12 :   switch (I->getOpcode()) {
    4735           0 :   default: llvm_unreachable("Unexpected instruction.");
    4736           4 :   case Instruction::Shl:
    4737           4 :     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
    4738           4 :     break;
    4739           4 :   case Instruction::AShr:
    4740           4 :     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
    4741           4 :     break;
    4742           4 :   case Instruction::LShr:
    4743           4 :     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
    4744           4 :     break;
    4745             :   }
    4746             : 
    4747          12 :   if (!ResultReg)
    4748             :     return false;
    4749             : 
    4750          12 :   updateValueMap(I, ResultReg);
    4751          12 :   return true;
    4752             : }
    4753             : 
    4754          23 : bool AArch64FastISel::selectBitCast(const Instruction *I) {
    4755          23 :   MVT RetVT, SrcVT;
    4756             : 
    4757          46 :   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
    4758             :     return false;
    4759          23 :   if (!isTypeLegal(I->getType(), RetVT))
    4760             :     return false;
    4761             : 
    4762             :   unsigned Opc;
    4763          24 :   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
    4764             :     Opc = AArch64::FMOVWSr;
    4765          27 :   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
    4766             :     Opc = AArch64::FMOVXDr;
    4767          20 :   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
    4768             :     Opc = AArch64::FMOVSWr;
    4769          32 :   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
    4770             :     Opc = AArch64::FMOVDXr;
    4771             :   else
    4772             :     return false;
    4773             : 
    4774             :   const TargetRegisterClass *RC = nullptr;
    4775           8 :   switch (RetVT.SimpleTy) {
    4776           0 :   default: llvm_unreachable("Unexpected value type.");
    4777             :   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
    4778           3 :   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
    4779           1 :   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
    4780           3 :   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
    4781             :   }
    4782          16 :   unsigned Op0Reg = getRegForValue(I->getOperand(0));
    4783           8 :   if (!Op0Reg)
    4784             :     return false;
    4785           8 :   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
    4786           8 :   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
    4787             : 
    4788           8 :   if (!ResultReg)
    4789             :     return false;
    4790             : 
    4791           8 :   updateValueMap(I, ResultReg);
    4792           8 :   return true;
    4793             : }
    4794             : 
    4795           4 : bool AArch64FastISel::selectFRem(const Instruction *I) {
    4796           4 :   MVT RetVT;
    4797           4 :   if (!isTypeLegal(I->getType(), RetVT))
    4798             :     return false;
    4799             : 
    4800             :   RTLIB::Libcall LC;
    4801           4 :   switch (RetVT.SimpleTy) {
    4802             :   default:
    4803             :     return false;
    4804             :   case MVT::f32:
    4805             :     LC = RTLIB::REM_F32;
    4806             :     break;
    4807           2 :   case MVT::f64:
    4808             :     LC = RTLIB::REM_F64;
    4809           2 :     break;
    4810             :   }
    4811             : 
    4812             :   ArgListTy Args;
    4813           4 :   Args.reserve(I->getNumOperands());
    4814             : 
    4815             :   // Populate the argument list.
    4816          24 :   for (auto &Arg : I->operands()) {
    4817             :     ArgListEntry Entry;
    4818           8 :     Entry.Val = Arg;
    4819           8 :     Entry.Ty = Arg->getType();
    4820           8 :     Args.push_back(Entry);
    4821             :   }
    4822             : 
    4823           8 :   CallLoweringInfo CLI;
    4824           4 :   MCContext &Ctx = MF->getContext();
    4825           4 :   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
    4826          16 :                 TLI.getLibcallName(LC), std::move(Args));
    4827           4 :   if (!lowerCallTo(CLI))
    4828             :     return false;
    4829           4 :   updateValueMap(I, CLI.ResultReg);
    4830           4 :   return true;
    4831             : }
    4832             : 
    4833          14 : bool AArch64FastISel::selectSDiv(const Instruction *I) {
    4834          14 :   MVT VT;
    4835          14 :   if (!isTypeLegal(I->getType(), VT))
    4836             :     return false;
    4837             : 
    4838          28 :   if (!isa<ConstantInt>(I->getOperand(1)))
    4839           0 :     return selectBinaryOp(I, ISD::SDIV);
    4840             : 
    4841             :   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
    4842          56 :   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
    4843          46 :       !(C.isPowerOf2() || (-C).isPowerOf2()))
    4844           0 :     return selectBinaryOp(I, ISD::SDIV);
    4845             : 
    4846          14 :   unsigned Lg2 = C.countTrailingZeros();
    4847          28 :   unsigned Src0Reg = getRegForValue(I->getOperand(0));
    4848          14 :   if (!Src0Reg)
    4849             :     return false;
    4850          14 :   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
    4851             : 
    4852          14 :   if (cast<BinaryOperator>(I)->isExact()) {
    4853           3 :     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
    4854           3 :     if (!ResultReg)
    4855             :       return false;
    4856           3 :     updateValueMap(I, ResultReg);
    4857           3 :     return true;
    4858             :   }
    4859             : 
    4860          11 :   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
    4861          11 :   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
    4862          11 :   if (!AddReg)
    4863             :     return false;
    4864             : 
    4865             :   // (Src0 < 0) ? Pow2 - 1 : 0;
    4866          11 :   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
    4867             :     return false;
    4868             : 
    4869             :   unsigned SelectOpc;
    4870             :   const TargetRegisterClass *RC;
    4871          11 :   if (VT == MVT::i64) {
    4872             :     SelectOpc = AArch64::CSELXr;
    4873             :     RC = &AArch64::GPR64RegClass;
    4874             :   } else {
    4875             :     SelectOpc = AArch64::CSELWr;
    4876             :     RC = &AArch64::GPR32RegClass;
    4877             :   }
    4878             :   unsigned SelectReg =
    4879          11 :       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
    4880          11 :                        Src0IsKill, AArch64CC::LT);
    4881          11 :   if (!SelectReg)
    4882             :     return false;
    4883             : 
    4884             :   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
    4885             :   // negate the result.
    4886          11 :   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
    4887             :   unsigned ResultReg;
    4888          22 :   if (C.isNegative())
    4889           4 :     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
    4890             :                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
    4891             :   else
    4892           7 :     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
    4893             : 
    4894          11 :   if (!ResultReg)
    4895             :     return false;
    4896             : 
    4897          11 :   updateValueMap(I, ResultReg);
    4898          11 :   return true;
    4899             : }
    4900             : 
    4901             : /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
    4902             : /// have to duplicate it for AArch64, because otherwise we would fail during the
    4903             : /// sign-extend emission.
    4904           3 : std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
    4905           3 :   unsigned IdxN = getRegForValue(Idx);
    4906           3 :   if (IdxN == 0)
    4907             :     // Unhandled operand. Halt "fast" selection and bail.
    4908           0 :     return std::pair<unsigned, bool>(0, false);
    4909             : 
    4910           3 :   bool IdxNIsKill = hasTrivialKill(Idx);
    4911             : 
    4912             :   // If the index is smaller or larger than intptr_t, truncate or extend it.
    4913           3 :   MVT PtrVT = TLI.getPointerTy(DL);
    4914           3 :   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
    4915           3 :   if (IdxVT.bitsLT(PtrVT)) {
    4916           1 :     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
    4917             :     IdxNIsKill = true;
    4918           2 :   } else if (IdxVT.bitsGT(PtrVT))
    4919           0 :     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
    4920           3 :   return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
    4921             : }
    4922             : 
    4923             : /// This is mostly a copy of the existing FastISel GEP code, but we have to
    4924             : /// duplicate it for AArch64, because otherwise we would bail out even for
    4925             : /// simple cases. This is because the standard fastEmit functions don't cover
    4926             : /// MUL at all and ADD is lowered very inefficientily.
    4927          16 : bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
    4928          32 :   unsigned N = getRegForValue(I->getOperand(0));
    4929          16 :   if (!N)
    4930             :     return false;
    4931          16 :   bool NIsKill = hasTrivialKill(I->getOperand(0));
    4932             : 
    4933             :   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
    4934             :   // into a single N = N + TotalOffset.
    4935             :   uint64_t TotalOffs = 0;
    4936          32 :   MVT VT = TLI.getPointerTy(DL);
    4937          34 :   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
    4938          34 :        GTI != E; ++GTI) {
    4939             :     const Value *Idx = GTI.getOperand();
    4940           2 :     if (auto *StTy = GTI.getStructTypeOrNull()) {
    4941           2 :       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
    4942             :       // N = N + Offset
    4943           2 :       if (Field)
    4944           4 :         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
    4945             :     } else {
    4946          16 :       Type *Ty = GTI.getIndexedType();
    4947             : 
    4948             :       // If this is a constant subscript, handle it quickly.
    4949          11 :       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
    4950          13 :         if (CI->isZero())
    4951          15 :           continue;
    4952             :         // N = N + Offset
    4953          11 :         TotalOffs +=
    4954          22 :             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
    4955          11 :         continue;
    4956             :       }
    4957           3 :       if (TotalOffs) {
    4958           0 :         N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
    4959           0 :         if (!N)
    4960           0 :           return false;
    4961             :         NIsKill = true;
    4962             :         TotalOffs = 0;
    4963             :       }
    4964             : 
    4965             :       // N = N + Idx * ElementSize;
    4966           3 :       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
    4967           3 :       std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
    4968           3 :       unsigned IdxN = Pair.first;
    4969           3 :       bool IdxNIsKill = Pair.second;
    4970           3 :       if (!IdxN)
    4971             :         return false;
    4972             : 
    4973           3 :       if (ElementSize != 1) {
    4974           3 :         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
    4975           3 :         if (!C)
    4976             :           return false;
    4977           3 :         IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
    4978           3 :         if (!IdxN)
    4979             :           return false;
    4980             :         IdxNIsKill = true;
    4981             :       }
    4982           3 :       N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
    4983           3 :       if (!N)
    4984             :         return false;
    4985             :     }
    4986             :   }
    4987          16 :   if (TotalOffs) {
    4988          13 :     N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
    4989          13 :     if (!N)
    4990             :       return false;
    4991             :   }
    4992          16 :   updateValueMap(I, N);
    4993          16 :   return true;
    4994             : }
    4995             : 
    4996           3 : bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
    4997             :   assert(TM.getOptLevel() == CodeGenOpt::None &&
    4998             :          "cmpxchg survived AtomicExpand at optlevel > -O0");
    4999             : 
    5000           3 :   auto *RetPairTy = cast<StructType>(I->getType());
    5001           3 :   Type *RetTy = RetPairTy->getTypeAtIndex(0U);
    5002             :   assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
    5003             :          "cmpxchg has a non-i1 status result");
    5004             : 
    5005           3 :   MVT VT;
    5006           3 :   if (!isTypeLegal(RetTy, VT))
    5007             :     return false;
    5008             : 
    5009             :   const TargetRegisterClass *ResRC;
    5010             :   unsigned Opc, CmpOpc;
    5011             :   // This only supports i32/i64, because i8/i16 aren't legal, and the generic
    5012             :   // extractvalue selection doesn't support that.
    5013           3 :   if (VT == MVT::i32) {
    5014             :     Opc = AArch64::CMP_SWAP_32;
    5015             :     CmpOpc = AArch64::SUBSWrs;
    5016             :     ResRC = &AArch64::GPR32RegClass;
    5017           1 :   } else if (VT == MVT::i64) {
    5018             :     Opc = AArch64::CMP_SWAP_64;
    5019             :     CmpOpc = AArch64::SUBSXrs;
    5020             :     ResRC = &AArch64::GPR64RegClass;
    5021             :   } else {
    5022             :     return false;
    5023             :   }
    5024             : 
    5025           3 :   const MCInstrDesc &II = TII.get(Opc);
    5026             : 
    5027           6 :   const unsigned AddrReg = constrainOperandRegClass(
    5028           3 :       II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
    5029           6 :   const unsigned DesiredReg = constrainOperandRegClass(
    5030           6 :       II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
    5031           6 :   const unsigned NewReg = constrainOperandRegClass(
    5032           6 :       II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
    5033             : 
    5034           3 :   const unsigned ResultReg1 = createResultReg(ResRC);
    5035           3 :   const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
    5036           3 :   const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
    5037             : 
    5038             :   // FIXME: MachineMemOperand doesn't support cmpxchg yet.
    5039           6 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    5040             :       .addDef(ResultReg1)
    5041             :       .addDef(ScratchReg)
    5042             :       .addUse(AddrReg)
    5043             :       .addUse(DesiredReg)
    5044             :       .addUse(NewReg);
    5045             : 
    5046           9 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
    5047           3 :       .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
    5048             :       .addUse(ResultReg1)
    5049             :       .addUse(DesiredReg)
    5050             :       .addImm(0);
    5051             : 
    5052           9 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
    5053             :       .addDef(ResultReg2)
    5054             :       .addUse(AArch64::WZR)
    5055             :       .addUse(AArch64::WZR)
    5056             :       .addImm(AArch64CC::NE);
    5057             : 
    5058             :   assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
    5059           3 :   updateValueMap(I, ResultReg1, 2);
    5060           3 :   return true;
    5061             : }
    5062             : 
    5063        3790 : bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
    5064        3790 :   switch (I->getOpcode()) {
    5065             :   default:
    5066             :     break;
    5067         262 :   case Instruction::Add:
    5068             :   case Instruction::Sub:
    5069         262 :     return selectAddSub(I);
    5070          10 :   case Instruction::Mul:
    5071          10 :     return selectMul(I);
    5072          14 :   case Instruction::SDiv:
    5073          14 :     return selectSDiv(I);
    5074           4 :   case Instruction::SRem:
    5075           4 :     if (!selectBinaryOp(I, ISD::SREM))
    5076           4 :       return selectRem(I, ISD::SREM);
    5077             :     return true;
    5078           4 :   case Instruction::URem:
    5079           4 :     if (!selectBinaryOp(I, ISD::UREM))
    5080           4 :       return selectRem(I, ISD::UREM);
    5081             :     return true;
    5082         102 :   case Instruction::Shl:
    5083             :   case Instruction::LShr:
    5084             :   case Instruction::AShr:
    5085         102 :     return selectShift(I);
    5086          89 :   case Instruction::And:
    5087             :   case Instruction::Or:
    5088             :   case Instruction::Xor:
    5089          89 :     return selectLogicalOp(I);
    5090         272 :   case Instruction::Br:
    5091         272 :     return selectBranch(I);
    5092           0 :   case Instruction::IndirectBr:
    5093           0 :     return selectIndirectBr(I);
    5094          52 :   case Instruction::BitCast:
    5095          52 :     if (!FastISel::selectBitCast(I))
    5096          23 :       return selectBitCast(I);
    5097             :     return true;
    5098           4 :   case Instruction::FPToSI:
    5099           4 :     if (!selectCast(I, ISD::FP_TO_SINT))
    5100           1 :       return selectFPToInt(I, /*Signed=*/true);
    5101             :     return true;
    5102           6 :   case Instruction::FPToUI:
    5103           6 :     return selectFPToInt(I, /*Signed=*/false);
    5104         201 :   case Instruction::ZExt:
    5105             :   case Instruction::SExt:
    5106         201 :     return selectIntExt(I);
    5107          26 :   case Instruction::Trunc:
    5108          26 :     if (!selectCast(I, ISD::TRUNCATE))
    5109          14 :       return selectTrunc(I);
    5110             :     return true;
    5111           5 :   case Instruction::FPExt:
    5112           5 :     return selectFPExt(I);
    5113           2 :   case Instruction::FPTrunc:
    5114           2 :     return selectFPTrunc(I);
    5115          17 :   case Instruction::SIToFP:
    5116          17 :     if (!selectCast(I, ISD::SINT_TO_FP))
    5117           9 :       return selectIntToFP(I, /*Signed=*/true);
    5118             :     return true;
    5119          13 :   case Instruction::UIToFP:
    5120          13 :     return selectIntToFP(I, /*Signed=*/false);
    5121         346 :   case Instruction::Load:
    5122         346 :     return selectLoad(I);
    5123         372 :   case Instruction::Store:
    5124         372 :     return selectStore(I);
    5125          57 :   case Instruction::FCmp:
    5126             :   case Instruction::ICmp:
    5127          57 :     return selectCmp(I);
    5128          53 :   case Instruction::Select:
    5129          53 :     return selectSelect(I);
    5130        1273 :   case Instruction::Ret:
    5131        1273 :     return selectRet(I);
    5132           4 :   case Instruction::FRem:
    5133           4 :     return selectFRem(I);
    5134          16 :   case Instruction::GetElementPtr:
    5135          16 :     return selectGetElementPtr(I);
    5136             :   case Instruction::AtomicCmpXchg:
    5137           3 :     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
    5138             :   }
    5139             : 
    5140             :   // Silence warnings.
    5141             :   (void)&CC_AArch64_DarwinPCS_VarArg;
    5142             :   (void)&CC_AArch64_Win64_VarArg;
    5143             : 
    5144             :   // fall-back to target-independent instruction selection.
    5145         583 :   return selectOperator(I, I->getOpcode());
    5146             : }
    5147             : 
    5148             : namespace llvm {
    5149             : 
    5150        1193 : FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
    5151             :                                         const TargetLibraryInfo *LibInfo) {
    5152        1193 :   return new AArch64FastISel(FuncInfo, LibInfo);
    5153             : }
    5154             : 
    5155             : } // end namespace llvm

Generated by: LCOV version 1.13