LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64FastISel.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 2344 2477 94.6 %
Date: 2017-09-14 15:23:50 Functions: 77 78 98.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file defines the AArch64-specific support for the FastISel class. Some
      11             : // of the target-specific code is generated by tablegen in the file
      12             : // AArch64GenFastISel.inc, which is #included here.
      13             : //
      14             : //===----------------------------------------------------------------------===//
      15             : 
      16             : #include "AArch64.h"
      17             : #include "AArch64CallingConvention.h"
      18             : #include "AArch64RegisterInfo.h"
      19             : #include "AArch64Subtarget.h"
      20             : #include "MCTargetDesc/AArch64AddressingModes.h"
      21             : #include "Utils/AArch64BaseInfo.h"
      22             : #include "llvm/ADT/APFloat.h"
      23             : #include "llvm/ADT/APInt.h"
      24             : #include "llvm/ADT/DenseMap.h"
      25             : #include "llvm/ADT/SmallVector.h"
      26             : #include "llvm/Analysis/BranchProbabilityInfo.h"
      27             : #include "llvm/CodeGen/CallingConvLower.h"
      28             : #include "llvm/CodeGen/FastISel.h"
      29             : #include "llvm/CodeGen/FunctionLoweringInfo.h"
      30             : #include "llvm/CodeGen/ISDOpcodes.h"
      31             : #include "llvm/CodeGen/MachineBasicBlock.h"
      32             : #include "llvm/CodeGen/MachineConstantPool.h"
      33             : #include "llvm/CodeGen/MachineFrameInfo.h"
      34             : #include "llvm/CodeGen/MachineInstr.h"
      35             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      36             : #include "llvm/CodeGen/MachineMemOperand.h"
      37             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      38             : #include "llvm/CodeGen/MachineValueType.h"
      39             : #include "llvm/CodeGen/RuntimeLibcalls.h"
      40             : #include "llvm/CodeGen/ValueTypes.h"
      41             : #include "llvm/IR/Argument.h"
      42             : #include "llvm/IR/Attributes.h"
      43             : #include "llvm/IR/BasicBlock.h"
      44             : #include "llvm/IR/CallingConv.h"
      45             : #include "llvm/IR/Constant.h"
      46             : #include "llvm/IR/Constants.h"
      47             : #include "llvm/IR/DataLayout.h"
      48             : #include "llvm/IR/DerivedTypes.h"
      49             : #include "llvm/IR/Function.h"
      50             : #include "llvm/IR/GetElementPtrTypeIterator.h"
      51             : #include "llvm/IR/GlobalValue.h"
      52             : #include "llvm/IR/InstrTypes.h"
      53             : #include "llvm/IR/Instruction.h"
      54             : #include "llvm/IR/Instructions.h"
      55             : #include "llvm/IR/IntrinsicInst.h"
      56             : #include "llvm/IR/Intrinsics.h"
      57             : #include "llvm/IR/Operator.h"
      58             : #include "llvm/IR/Type.h"
      59             : #include "llvm/IR/User.h"
      60             : #include "llvm/IR/Value.h"
      61             : #include "llvm/MC/MCInstrDesc.h"
      62             : #include "llvm/MC/MCRegisterInfo.h"
      63             : #include "llvm/MC/MCSymbol.h"
      64             : #include "llvm/Support/AtomicOrdering.h"
      65             : #include "llvm/Support/Casting.h"
      66             : #include "llvm/Support/CodeGen.h"
      67             : #include "llvm/Support/Compiler.h"
      68             : #include "llvm/Support/ErrorHandling.h"
      69             : #include "llvm/Support/MathExtras.h"
      70             : #include <algorithm>
      71             : #include <cassert>
      72             : #include <cstdint>
      73             : #include <iterator>
      74             : #include <utility>
      75             : 
      76             : using namespace llvm;
      77             : 
      78             : namespace {
      79             : 
      80        1213 : class AArch64FastISel final : public FastISel {
      81             :   class Address {
      82             :   public:
      83             :     using BaseKind = enum {
      84             :       RegBase,
      85             :       FrameIndexBase
      86             :     };
      87             : 
      88             :   private:
      89             :     BaseKind Kind = RegBase;
      90             :     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
      91             :     union {
      92             :       unsigned Reg;
      93             :       int FI;
      94             :     } Base;
      95             :     unsigned OffsetReg = 0;
      96             :     unsigned Shift = 0;
      97             :     int64_t Offset = 0;
      98             :     const GlobalValue *GV = nullptr;
      99             : 
     100             :   public:
     101         917 :     Address() { Base.Reg = 0; }
     102             : 
     103         154 :     void setKind(BaseKind K) { Kind = K; }
     104             :     BaseKind getKind() const { return Kind; }
     105          84 :     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
     106             :     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
     107             :     bool isRegBase() const { return Kind == RegBase; }
     108             :     bool isFIBase() const { return Kind == FrameIndexBase; }
     109             : 
     110             :     void setReg(unsigned Reg) {
     111             :       assert(isRegBase() && "Invalid base register access!");
     112        1113 :       Base.Reg = Reg;
     113             :     }
     114             : 
     115             :     unsigned getReg() const {
     116             :       assert(isRegBase() && "Invalid base register access!");
     117             :       return Base.Reg;
     118             :     }
     119             : 
     120             :     void setOffsetReg(unsigned Reg) {
     121         551 :       OffsetReg = Reg;
     122             :     }
     123             : 
     124             :     unsigned getOffsetReg() const {
     125             :       return OffsetReg;
     126             :     }
     127             : 
     128             :     void setFI(unsigned FI) {
     129             :       assert(isFIBase() && "Invalid base frame index  access!");
     130         151 :       Base.FI = FI;
     131             :     }
     132             : 
     133             :     unsigned getFI() const {
     134             :       assert(isFIBase() && "Invalid base frame index access!");
     135         156 :       return Base.FI;
     136             :     }
     137             : 
     138         221 :     void setOffset(int64_t O) { Offset = O; }
     139             :     int64_t getOffset() { return Offset; }
     140          64 :     void setShift(unsigned S) { Shift = S; }
     141             :     unsigned getShift() { return Shift; }
     142             : 
     143          91 :     void setGlobalValue(const GlobalValue *G) { GV = G; }
     144             :     const GlobalValue *getGlobalValue() { return GV; }
     145             :   };
     146             : 
     147             :   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
     148             :   /// make the right decision when generating code for different targets.
     149             :   const AArch64Subtarget *Subtarget;
     150             :   LLVMContext *Context;
     151             : 
     152             :   bool fastLowerArguments() override;
     153             :   bool fastLowerCall(CallLoweringInfo &CLI) override;
     154             :   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
     155             : 
     156             : private:
     157             :   // Selection routines.
     158             :   bool selectAddSub(const Instruction *I);
     159             :   bool selectLogicalOp(const Instruction *I);
     160             :   bool selectLoad(const Instruction *I);
     161             :   bool selectStore(const Instruction *I);
     162             :   bool selectBranch(const Instruction *I);
     163             :   bool selectIndirectBr(const Instruction *I);
     164             :   bool selectCmp(const Instruction *I);
     165             :   bool selectSelect(const Instruction *I);
     166             :   bool selectFPExt(const Instruction *I);
     167             :   bool selectFPTrunc(const Instruction *I);
     168             :   bool selectFPToInt(const Instruction *I, bool Signed);
     169             :   bool selectIntToFP(const Instruction *I, bool Signed);
     170             :   bool selectRem(const Instruction *I, unsigned ISDOpcode);
     171             :   bool selectRet(const Instruction *I);
     172             :   bool selectTrunc(const Instruction *I);
     173             :   bool selectIntExt(const Instruction *I);
     174             :   bool selectMul(const Instruction *I);
     175             :   bool selectShift(const Instruction *I);
     176             :   bool selectBitCast(const Instruction *I);
     177             :   bool selectFRem(const Instruction *I);
     178             :   bool selectSDiv(const Instruction *I);
     179             :   bool selectGetElementPtr(const Instruction *I);
     180             :   bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
     181             : 
     182             :   // Utility helper routines.
     183             :   bool isTypeLegal(Type *Ty, MVT &VT);
     184             :   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
     185             :   bool isValueAvailable(const Value *V) const;
     186             :   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
     187             :   bool computeCallAddress(const Value *V, Address &Addr);
     188             :   bool simplifyAddress(Address &Addr, MVT VT);
     189             :   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
     190             :                             MachineMemOperand::Flags Flags,
     191             :                             unsigned ScaleFactor, MachineMemOperand *MMO);
     192             :   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
     193             :   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
     194             :                           unsigned Alignment);
     195             :   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
     196             :                          const Value *Cond);
     197             :   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
     198             :   bool optimizeSelect(const SelectInst *SI);
     199             :   std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
     200             : 
     201             :   // Emit helper routines.
     202             :   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
     203             :                       const Value *RHS, bool SetFlags = false,
     204             :                       bool WantResult = true,  bool IsZExt = false);
     205             :   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
     206             :                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
     207             :                          bool SetFlags = false, bool WantResult = true);
     208             :   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
     209             :                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
     210             :                          bool WantResult = true);
     211             :   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
     212             :                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
     213             :                          AArch64_AM::ShiftExtendType ShiftType,
     214             :                          uint64_t ShiftImm, bool SetFlags = false,
     215             :                          bool WantResult = true);
     216             :   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
     217             :                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
     218             :                           AArch64_AM::ShiftExtendType ExtType,
     219             :                           uint64_t ShiftImm, bool SetFlags = false,
     220             :                          bool WantResult = true);
     221             : 
     222             :   // Emit functions.
     223             :   bool emitCompareAndBranch(const BranchInst *BI);
     224             :   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
     225             :   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
     226             :   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
     227             :   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
     228             :   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
     229             :                     MachineMemOperand *MMO = nullptr);
     230             :   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
     231             :                  MachineMemOperand *MMO = nullptr);
     232             :   bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
     233             :                         MachineMemOperand *MMO = nullptr);
     234             :   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
     235             :   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
     236             :   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
     237             :                    bool SetFlags = false, bool WantResult = true,
     238             :                    bool IsZExt = false);
     239             :   unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
     240             :   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
     241             :                    bool SetFlags = false, bool WantResult = true,
     242             :                    bool IsZExt = false);
     243             :   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
     244             :                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
     245             :   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
     246             :                        unsigned RHSReg, bool RHSIsKill,
     247             :                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
     248             :                        bool WantResult = true);
     249             :   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
     250             :                          const Value *RHS);
     251             :   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
     252             :                             bool LHSIsKill, uint64_t Imm);
     253             :   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
     254             :                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
     255             :                             uint64_t ShiftImm);
     256             :   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
     257             :   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
     258             :                       unsigned Op1, bool Op1IsKill);
     259             :   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
     260             :                         unsigned Op1, bool Op1IsKill);
     261             :   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
     262             :                         unsigned Op1, bool Op1IsKill);
     263             :   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
     264             :                       unsigned Op1Reg, bool Op1IsKill);
     265             :   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
     266             :                       uint64_t Imm, bool IsZExt = true);
     267             :   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
     268             :                       unsigned Op1Reg, bool Op1IsKill);
     269             :   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
     270             :                       uint64_t Imm, bool IsZExt = true);
     271             :   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
     272             :                       unsigned Op1Reg, bool Op1IsKill);
     273             :   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
     274             :                       uint64_t Imm, bool IsZExt = false);
     275             : 
     276             :   unsigned materializeInt(const ConstantInt *CI, MVT VT);
     277             :   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
     278             :   unsigned materializeGV(const GlobalValue *GV);
     279             : 
     280             :   // Call handling routines.
     281             : private:
     282             :   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
     283             :   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
     284             :                        unsigned &NumBytes);
     285             :   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
     286             : 
     287             : public:
     288             :   // Backend specific FastISel code.
     289             :   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
     290             :   unsigned fastMaterializeConstant(const Constant *C) override;
     291             :   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
     292             : 
     293        1214 :   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
     294             :                            const TargetLibraryInfo *LibInfo)
     295        1214 :       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
     296        1214 :     Subtarget =
     297        1214 :         &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
     298        1214 :     Context = &FuncInfo.Fn->getContext();
     299        1214 :   }
     300             : 
     301             :   bool fastSelectInstruction(const Instruction *I) override;
     302             : 
     303             : #include "AArch64GenFastISel.inc"
     304             : };
     305             : 
     306             : } // end anonymous namespace
     307             : 
     308             : #include "AArch64GenCallingConv.inc"
     309             : 
     310             : /// \brief Check if the sign-/zero-extend will be a noop.
     311          83 : static bool isIntExtFree(const Instruction *I) {
     312             :   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
     313             :          "Unexpected integer extend instruction.");
     314             :   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
     315             :          "Unexpected value type.");
     316         166 :   bool IsZExt = isa<ZExtInst>(I);
     317             : 
     318         166 :   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
     319           0 :     if (LI->hasOneUse())
     320             :       return true;
     321             : 
     322         246 :   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
     323          80 :     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
     324             :       return true;
     325             : 
     326             :   return false;
     327             : }
     328             : 
     329             : /// \brief Determine the implicit scale factor that is applied by a memory
     330             : /// operation for a given value type.
     331             : static unsigned getImplicitScaleFactor(MVT VT) {
     332             :   switch (VT.SimpleTy) {
     333             :   default:
     334             :     return 0;    // invalid
     335             :   case MVT::i1:  // fall-through
     336             :   case MVT::i8:
     337             :     return 1;
     338             :   case MVT::i16:
     339             :     return 2;
     340             :   case MVT::i32: // fall-through
     341             :   case MVT::f32:
     342             :     return 4;
     343             :   case MVT::i64: // fall-through
     344             :   case MVT::f64:
     345             :     return 8;
     346             :   }
     347             : }
     348             : 
     349             : CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
     350         194 :   if (CC == CallingConv::WebKit_JS)
     351             :     return CC_AArch64_WebKit_JS;
     352         187 :   if (CC == CallingConv::GHC)
     353             :     return CC_AArch64_GHC;
     354         228 :   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
     355             : }
     356             : 
     357          17 : unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
     358             :   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
     359             :          "Alloca should always return a pointer.");
     360             : 
     361             :   // Don't handle dynamic allocas.
     362          34 :   if (!FuncInfo.StaticAllocaMap.count(AI))
     363             :     return 0;
     364             : 
     365             :   DenseMap<const AllocaInst *, int>::iterator SI =
     366          17 :       FuncInfo.StaticAllocaMap.find(AI);
     367             : 
     368          51 :   if (SI != FuncInfo.StaticAllocaMap.end()) {
     369          17 :     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
     370          34 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
     371          34 :             ResultReg)
     372          34 :         .addFrameIndex(SI->second)
     373          17 :         .addImm(0)
     374          17 :         .addImm(0);
     375          17 :     return ResultReg;
     376             :   }
     377             : 
     378             :   return 0;
     379             : }
     380             : 
     381         371 : unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
     382         371 :   if (VT > MVT::i64)
     383             :     return 0;
     384             : 
     385         371 :   if (!CI->isZero())
     386         245 :     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
     387             : 
     388             :   // Create a copy from the zero register to materialize a "0" value.
     389         252 :   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
     390         126 :                                                    : &AArch64::GPR32RegClass;
     391         126 :   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
     392         126 :   unsigned ResultReg = createResultReg(RC);
     393         378 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
     394         378 :           ResultReg).addReg(ZeroReg, getKillRegState(true));
     395         126 :   return ResultReg;
     396             : }
     397             : 
     398          47 : unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
     399             :   // Positive zero (+0.0) has to be materialized with a fmov from the zero
     400             :   // register, because the immediate version of fmov cannot encode zero.
     401          47 :   if (CFP->isNullValue())
     402           2 :     return fastMaterializeFloatZero(CFP);
     403             : 
     404          45 :   if (VT != MVT::f32 && VT != MVT::f64)
     405             :     return 0;
     406             : 
     407          86 :   const APFloat Val = CFP->getValueAPF();
     408          43 :   bool Is64Bit = (VT == MVT::f64);
     409             :   // This checks to see if we can use FMOV instructions to materialize
     410             :   // a constant, otherwise we have to materialize via the constant pool.
     411          86 :   if (TLI.isFPImmLegal(Val, VT)) {
     412             :     int Imm =
     413          10 :         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
     414             :     assert((Imm != -1) && "Cannot encode floating-point constant.");
     415          10 :     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
     416          10 :     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
     417             :   }
     418             : 
     419             :   // For the MachO large code model materialize the FP constant in code.
     420          66 :   if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
     421           4 :     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
     422           4 :     const TargetRegisterClass *RC = Is64Bit ?
     423             :         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
     424             : 
     425           4 :     unsigned TmpReg = createResultReg(RC);
     426          12 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
     427          16 :         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
     428             : 
     429           4 :     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
     430           8 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
     431           8 :             TII.get(TargetOpcode::COPY), ResultReg)
     432           4 :         .addReg(TmpReg, getKillRegState(true));
     433             : 
     434           4 :     return ResultReg;
     435             :   }
     436             : 
     437             :   // Materialize via constant pool.  MachineConstantPool wants an explicit
     438             :   // alignment.
     439          29 :   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
     440          29 :   if (Align == 0)
     441           0 :     Align = DL.getTypeAllocSize(CFP->getType());
     442             : 
     443          58 :   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
     444          29 :   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
     445          87 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
     446          87 :           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
     447             : 
     448          29 :   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
     449          29 :   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
     450          87 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
     451          29 :       .addReg(ADRPReg)
     452          29 :       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
     453          29 :   return ResultReg;
     454             : }
     455             : 
     456         117 : unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
     457             :   // We can't handle thread-local variables quickly yet.
     458         117 :   if (GV->isThreadLocal())
     459             :     return 0;
     460             : 
     461             :   // MachO still uses GOT for large code-model accesses, but ELF requires
     462             :   // movz/movk sequences, which FastISel doesn't handle yet.
     463         125 :   if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
     464             :     return 0;
     465             : 
     466         107 :   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
     467             : 
     468         214 :   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
     469         107 :   if (!DestEVT.isSimple())
     470             :     return 0;
     471             : 
     472         107 :   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
     473             :   unsigned ResultReg;
     474             : 
     475         107 :   if (OpFlags & AArch64II::MO_GOT) {
     476             :     // ADRP + LDRX
     477         144 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
     478          96 :             ADRPReg)
     479          48 :       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
     480             : 
     481          48 :     ResultReg = createResultReg(&AArch64::GPR64RegClass);
     482         144 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
     483          96 :             ResultReg)
     484          48 :       .addReg(ADRPReg)
     485          48 :       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
     486             :                         AArch64II::MO_NC);
     487             :   } else {
     488             :     // ADRP + ADDX
     489         177 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
     490         118 :             ADRPReg)
     491          59 :       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
     492             : 
     493          59 :     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
     494         177 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
     495         118 :             ResultReg)
     496          59 :       .addReg(ADRPReg)
     497          59 :       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
     498          59 :       .addImm(0);
     499             :   }
     500             :   return ResultReg;
     501             : }
     502             : 
     503         574 : unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
     504         574 :   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
     505             : 
     506             :   // Only handle simple types.
     507         574 :   if (!CEVT.isSimple())
     508             :     return 0;
     509         574 :   MVT VT = CEVT.getSimpleVT();
     510             : 
     511         365 :   if (const auto *CI = dyn_cast<ConstantInt>(C))
     512         365 :     return materializeInt(CI, VT);
     513          47 :   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
     514          47 :     return materializeFP(CFP, VT);
     515         110 :   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
     516         110 :     return materializeGV(GV);
     517             : 
     518             :   return 0;
     519             : }
     520             : 
     521           2 : unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
     522             :   assert(CFP->isNullValue() &&
     523             :          "Floating-point constant is not a positive zero.");
     524           2 :   MVT VT;
     525           2 :   if (!isTypeLegal(CFP->getType(), VT))
     526             :     return 0;
     527             : 
     528           4 :   if (VT != MVT::f32 && VT != MVT::f64)
     529             :     return 0;
     530             : 
     531           2 :   bool Is64Bit = (VT == MVT::f64);
     532           2 :   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
     533           2 :   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
     534           2 :   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
     535             : }
     536             : 
     537             : /// \brief Check if the multiply is by a power-of-2 constant.
     538         557 : static bool isMulPowOf2(const Value *I) {
     539          23 :   if (const auto *MI = dyn_cast<MulOperator>(I)) {
     540          46 :     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
     541           0 :       if (C->getValue().isPowerOf2())
     542             :         return true;
     543          67 :     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
     544          21 :       if (C->getValue().isPowerOf2())
     545             :         return true;
     546             :   }
     547             :   return false;
     548             : }
     549             : 
     550             : // Computes the address to get to an object.
     551        1189 : bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
     552             : {
     553        1189 :   const User *U = nullptr;
     554        1189 :   unsigned Opcode = Instruction::UserOp1;
     555         575 :   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
     556             :     // Don't walk into other basic blocks unless the object is an alloca from
     557             :     // another block, otherwise it may not have a virtual register assigned.
     558        1423 :     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
     559         863 :         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
     560         560 :       Opcode = I->getOpcode();
     561         560 :       U = I;
     562             :     }
     563          25 :   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
     564          25 :     Opcode = C->getOpcode();
     565          25 :     U = C;
     566             :   }
     567             : 
     568        2016 :   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
     569         827 :     if (Ty->getAddressSpace() > 255)
     570             :       // Fast instruction selection doesn't support the special
     571             :       // address spaces.
     572             :       return false;
     573             : 
     574        1179 :   switch (Opcode) {
     575             :   default:
     576             :     break;
     577          17 :   case Instruction::BitCast:
     578             :     // Look through bitcasts.
     579          17 :     return computeAddress(U->getOperand(0), Addr, Ty);
     580             : 
     581         136 :   case Instruction::IntToPtr:
     582             :     // Look past no-op inttoptrs.
     583         544 :     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
     584         136 :         TLI.getPointerTy(DL))
     585         136 :       return computeAddress(U->getOperand(0), Addr, Ty);
     586             :     break;
     587             : 
     588           6 :   case Instruction::PtrToInt:
     589             :     // Look past no-op ptrtoints.
     590          18 :     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
     591           6 :       return computeAddress(U->getOperand(0), Addr, Ty);
     592             :     break;
     593             : 
     594          49 :   case Instruction::GetElementPtr: {
     595          49 :     Address SavedAddr = Addr;
     596          49 :     uint64_t TmpOffset = Addr.getOffset();
     597             : 
     598             :     // Iterate through the GEP folding the constants into offsets where
     599             :     // we can.
     600         133 :     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
     601         133 :          GTI != E; ++GTI) {
     602         174 :       const Value *Op = GTI.getOperand();
     603          21 :       if (StructType *STy = GTI.getStructTypeOrNull()) {
     604          21 :         const StructLayout *SL = DL.getStructLayout(STy);
     605          42 :         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
     606          21 :         TmpOffset += SL->getElementOffset(Idx);
     607             :       } else {
     608          66 :         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
     609             :         while (true) {
     610          63 :           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
     611             :             // Constant-offset addressing.
     612          63 :             TmpOffset += CI->getSExtValue() * S;
     613          63 :             break;
     614             :           }
     615           3 :           if (canFoldAddIntoGEP(U, Op)) {
     616             :             // A compatible add with a constant operand. Fold the constant.
     617             :             ConstantInt *CI =
     618           0 :                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
     619           0 :             TmpOffset += CI->getSExtValue() * S;
     620             :             // Iterate on the other operand.
     621           0 :             Op = cast<AddOperator>(Op)->getOperand(0);
     622           0 :             continue;
     623             :           }
     624             :           // Unsupported
     625           3 :           goto unsupported_gep;
     626           0 :         }
     627             :       }
     628             :     }
     629             : 
     630             :     // Try to grab the base operand now.
     631          92 :     Addr.setOffset(TmpOffset);
     632          46 :     if (computeAddress(U->getOperand(0), Addr, Ty))
     633          45 :       return true;
     634             : 
     635             :     // We failed, restore everything and try the other options.
     636           1 :     Addr = SavedAddr;
     637             : 
     638           4 :   unsupported_gep:
     639           4 :     break;
     640             :   }
     641         151 :   case Instruction::Alloca: {
     642         151 :     const AllocaInst *AI = cast<AllocaInst>(Obj);
     643             :     DenseMap<const AllocaInst *, int>::iterator SI =
     644         151 :         FuncInfo.StaticAllocaMap.find(AI);
     645         453 :     if (SI != FuncInfo.StaticAllocaMap.end()) {
     646         302 :       Addr.setKind(Address::FrameIndexBase);
     647         302 :       Addr.setFI(SI->second);
     648         151 :       return true;
     649             :     }
     650           0 :     break;
     651             :   }
     652         117 :   case Instruction::Add: {
     653             :     // Adds of constants are common and easy enough.
     654         117 :     const Value *LHS = U->getOperand(0);
     655         117 :     const Value *RHS = U->getOperand(1);
     656             : 
     657         234 :     if (isa<ConstantInt>(LHS))
     658             :       std::swap(LHS, RHS);
     659             : 
     660         146 :     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
     661          87 :       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
     662         146 :       return computeAddress(LHS, Addr, Ty);
     663             :     }
     664             : 
     665          88 :     Address Backup = Addr;
     666          88 :     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
     667             :       return true;
     668           0 :     Addr = Backup;
     669             : 
     670           0 :     break;
     671             :   }
     672          21 :   case Instruction::Sub: {
     673             :     // Subs of constants are common and easy enough.
     674          21 :     const Value *LHS = U->getOperand(0);
     675          21 :     const Value *RHS = U->getOperand(1);
     676             : 
     677          21 :     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
     678          63 :       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
     679          21 :       return computeAddress(LHS, Addr, Ty);
     680             :     }
     681             :     break;
     682             :   }
     683          26 :   case Instruction::Shl: {
     684          26 :     if (Addr.getOffsetReg())
     685             :       break;
     686             : 
     687          46 :     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
     688             :     if (!CI)
     689             :       break;
     690             : 
     691          23 :     unsigned Val = CI->getZExtValue();
     692          23 :     if (Val < 1 || Val > 3)
     693             :       break;
     694             : 
     695          23 :     uint64_t NumBytes = 0;
     696          23 :     if (Ty && Ty->isSized()) {
     697          23 :       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
     698          23 :       NumBytes = NumBits / 8;
     699          23 :       if (!isPowerOf2_64(NumBits))
     700             :         NumBytes = 0;
     701             :     }
     702             : 
     703          23 :     if (NumBytes != (1ULL << Val))
     704             :       break;
     705             : 
     706          46 :     Addr.setShift(Val);
     707          46 :     Addr.setExtendType(AArch64_AM::LSL);
     708             : 
     709          23 :     const Value *Src = U->getOperand(0);
     710          17 :     if (const auto *I = dyn_cast<Instruction>(Src)) {
     711          34 :       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
     712             :         // Fold the zext or sext when it won't become a noop.
     713          17 :         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
     714           9 :           if (!isIntExtFree(ZE) &&
     715           8 :               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
     716           8 :             Addr.setExtendType(AArch64_AM::UXTW);
     717           4 :             Src = ZE->getOperand(0);
     718             :           }
     719           8 :         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
     720          15 :           if (!isIntExtFree(SE) &&
     721          14 :               SE->getOperand(0)->getType()->isIntegerTy(32)) {
     722          14 :             Addr.setExtendType(AArch64_AM::SXTW);
     723           7 :             Src = SE->getOperand(0);
     724             :           }
     725             :         }
     726             :       }
     727             :     }
     728             : 
     729           5 :     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
     730           5 :       if (AI->getOpcode() == Instruction::And) {
     731           4 :         const Value *LHS = AI->getOperand(0);
     732           4 :         const Value *RHS = AI->getOperand(1);
     733             : 
     734           4 :         if (const auto *C = dyn_cast<ConstantInt>(LHS))
     735           0 :           if (C->getValue() == 0xffffffff)
     736             :             std::swap(LHS, RHS);
     737             : 
     738           7 :         if (const auto *C = dyn_cast<ConstantInt>(RHS))
     739           3 :           if (C->getValue() == 0xffffffff) {
     740           6 :             Addr.setExtendType(AArch64_AM::UXTW);
     741           3 :             unsigned Reg = getRegForValue(LHS);
     742           3 :             if (!Reg)
     743             :               return false;
     744           3 :             bool RegIsKill = hasTrivialKill(LHS);
     745           6 :             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
     746           3 :                                              AArch64::sub_32);
     747           6 :             Addr.setOffsetReg(Reg);
     748           3 :             return true;
     749             :           }
     750             :       }
     751             : 
     752          20 :     unsigned Reg = getRegForValue(Src);
     753          20 :     if (!Reg)
     754             :       return false;
     755          40 :     Addr.setOffsetReg(Reg);
     756          20 :     return true;
     757             :   }
     758          13 :   case Instruction::Mul: {
     759          13 :     if (Addr.getOffsetReg())
     760             :       break;
     761             : 
     762          13 :     if (!isMulPowOf2(U))
     763             :       break;
     764             : 
     765          13 :     const Value *LHS = U->getOperand(0);
     766          13 :     const Value *RHS = U->getOperand(1);
     767             : 
     768             :     // Canonicalize power-of-2 value to the RHS.
     769          13 :     if (const auto *C = dyn_cast<ConstantInt>(LHS))
     770           0 :       if (C->getValue().isPowerOf2())
     771             :         std::swap(LHS, RHS);
     772             : 
     773             :     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
     774          26 :     const auto *C = cast<ConstantInt>(RHS);
     775          26 :     unsigned Val = C->getValue().logBase2();
     776          13 :     if (Val < 1 || Val > 3)
     777             :       break;
     778             : 
     779          13 :     uint64_t NumBytes = 0;
     780          13 :     if (Ty && Ty->isSized()) {
     781          13 :       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
     782          13 :       NumBytes = NumBits / 8;
     783          13 :       if (!isPowerOf2_64(NumBits))
     784             :         NumBytes = 0;
     785             :     }
     786             : 
     787          13 :     if (NumBytes != (1ULL << Val))
     788             :       break;
     789             : 
     790          26 :     Addr.setShift(Val);
     791          26 :     Addr.setExtendType(AArch64_AM::LSL);
     792             : 
     793          13 :     const Value *Src = LHS;
     794           9 :     if (const auto *I = dyn_cast<Instruction>(Src)) {
     795          18 :       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
     796             :         // Fold the zext or sext when it won't become a noop.
     797           8 :         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
     798           7 :           if (!isIntExtFree(ZE) &&
     799           6 :               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
     800           6 :             Addr.setExtendType(AArch64_AM::UXTW);
     801           3 :             Src = ZE->getOperand(0);
     802             :           }
     803           4 :         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
     804           7 :           if (!isIntExtFree(SE) &&
     805           6 :               SE->getOperand(0)->getType()->isIntegerTy(32)) {
     806           6 :             Addr.setExtendType(AArch64_AM::SXTW);
     807           3 :             Src = SE->getOperand(0);
     808             :           }
     809             :         }
     810             :       }
     811             :     }
     812             : 
     813          13 :     unsigned Reg = getRegForValue(Src);
     814          13 :     if (!Reg)
     815             :       return false;
     816          26 :     Addr.setOffsetReg(Reg);
     817          13 :     return true;
     818             :   }
     819           3 :   case Instruction::And: {
     820           3 :     if (Addr.getOffsetReg())
     821             :       break;
     822             : 
     823           3 :     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
     824             :       break;
     825             : 
     826           1 :     const Value *LHS = U->getOperand(0);
     827           1 :     const Value *RHS = U->getOperand(1);
     828             : 
     829           1 :     if (const auto *C = dyn_cast<ConstantInt>(LHS))
     830           0 :       if (C->getValue() == 0xffffffff)
     831             :         std::swap(LHS, RHS);
     832             : 
     833           2 :     if (const auto *C = dyn_cast<ConstantInt>(RHS))
     834           1 :       if (C->getValue() == 0xffffffff) {
     835           2 :         Addr.setShift(0);
     836           1 :         Addr.setExtendType(AArch64_AM::LSL);
     837           2 :         Addr.setExtendType(AArch64_AM::UXTW);
     838             : 
     839           1 :         unsigned Reg = getRegForValue(LHS);
     840           1 :         if (!Reg)
     841             :           return false;
     842           1 :         bool RegIsKill = hasTrivialKill(LHS);
     843           2 :         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
     844           1 :                                          AArch64::sub_32);
     845           2 :         Addr.setOffsetReg(Reg);
     846           1 :         return true;
     847             :       }
     848             :     break;
     849             :   }
     850          20 :   case Instruction::SExt:
     851             :   case Instruction::ZExt: {
     852          20 :     if (!Addr.getReg() || Addr.getOffsetReg())
     853             :       break;
     854             : 
     855          20 :     const Value *Src = nullptr;
     856             :     // Fold the zext or sext when it won't become a noop.
     857           0 :     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
     858           0 :       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
     859           0 :         Addr.setExtendType(AArch64_AM::UXTW);
     860           0 :         Src = ZE->getOperand(0);
     861             :       }
     862          20 :     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
     863          40 :       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
     864          40 :         Addr.setExtendType(AArch64_AM::SXTW);
     865          20 :         Src = SE->getOperand(0);
     866             :       }
     867             :     }
     868             : 
     869          20 :     if (!Src)
     870             :       break;
     871             : 
     872          40 :     Addr.setShift(0);
     873          20 :     unsigned Reg = getRegForValue(Src);
     874          20 :     if (!Reg)
     875             :       return false;
     876          40 :     Addr.setOffsetReg(Reg);
     877          20 :     return true;
     878             :   }
     879             :   } // end switch
     880             : 
     881         629 :   if (Addr.isRegBase() && !Addr.getReg()) {
     882         595 :     unsigned Reg = getRegForValue(Obj);
     883         595 :     if (!Reg)
     884             :       return false;
     885        1174 :     Addr.setReg(Reg);
     886         587 :     return true;
     887             :   }
     888             : 
     889          34 :   if (!Addr.getOffsetReg()) {
     890          34 :     unsigned Reg = getRegForValue(Obj);
     891          34 :     if (!Reg)
     892             :       return false;
     893          68 :     Addr.setOffsetReg(Reg);
     894          34 :     return true;
     895             :   }
     896             : 
     897             :   return false;
     898             : }
     899             : 
     900         111 : bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
     901         126 :   const User *U = nullptr;
     902         126 :   unsigned Opcode = Instruction::UserOp1;
     903         126 :   bool InMBB = true;
     904             : 
     905          15 :   if (const auto *I = dyn_cast<Instruction>(V)) {
     906          15 :     Opcode = I->getOpcode();
     907          15 :     U = I;
     908          15 :     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
     909           3 :   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
     910           3 :     Opcode = C->getOpcode();
     911           3 :     U = C;
     912             :   }
     913             : 
     914         126 :   switch (Opcode) {
     915             :   default: break;
     916           2 :   case Instruction::BitCast:
     917             :     // Look past bitcasts if its operand is in the same BB.
     918           2 :     if (InMBB)
     919           2 :       return computeCallAddress(U->getOperand(0), Addr);
     920             :     break;
     921          13 :   case Instruction::IntToPtr:
     922             :     // Look past no-op inttoptrs if its operand is in the same BB.
     923          13 :     if (InMBB &&
     924          39 :         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
     925          13 :             TLI.getPointerTy(DL))
     926          13 :       return computeCallAddress(U->getOperand(0), Addr);
     927             :     break;
     928           0 :   case Instruction::PtrToInt:
     929             :     // Look past no-op ptrtoints if its operand is in the same BB.
     930           0 :     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
     931           0 :       return computeCallAddress(U->getOperand(0), Addr);
     932             :     break;
     933             :   }
     934             : 
     935          91 :   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     936         182 :     Addr.setGlobalValue(GV);
     937          91 :     return true;
     938             :   }
     939             : 
     940             :   // If all else fails, try to materialize the value in a register.
     941          20 :   if (!Addr.getGlobalValue()) {
     942          40 :     Addr.setReg(getRegForValue(V));
     943          20 :     return Addr.getReg() != 0;
     944             :   }
     945             : 
     946             :   return false;
     947             : }
     948             : 
     949        3661 : bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
     950        3661 :   EVT evt = TLI.getValueType(DL, Ty, true);
     951             : 
     952             :   // Only handle simple types.
     953        7312 :   if (evt == MVT::Other || !evt.isSimple())
     954             :     return false;
     955        3650 :   VT = evt.getSimpleVT();
     956             : 
     957             :   // This is a legal type, but it's not something we handle in fast-isel.
     958        3650 :   if (VT == MVT::f128)
     959             :     return false;
     960             : 
     961             :   // Handle all other legal types, i.e. a register that will directly hold this
     962             :   // value.
     963       10872 :   return TLI.isTypeLegal(VT);
     964             : }
     965             : 
     966             : /// \brief Determine if the value type is supported by FastISel.
     967             : ///
     968             : /// FastISel for AArch64 can handle more value types than are legal. This adds
     969             : /// simple value type such as i1, i8, and i16.
     970        1913 : bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
     971        1913 :   if (Ty->isVectorTy() && !IsVectorAllowed)
     972             :     return false;
     973             : 
     974        1907 :   if (isTypeLegal(Ty, VT))
     975             :     return true;
     976             : 
     977             :   // If this is a type than can be sign or zero-extended to a basic operation
     978             :   // go ahead and accept it now.
     979         972 :   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
     980             :     return true;
     981             : 
     982             :   return false;
     983             : }
     984             : 
     985        1292 : bool AArch64FastISel::isValueAvailable(const Value *V) const {
     986        2584 :   if (!isa<Instruction>(V))
     987             :     return true;
     988             : 
     989        1574 :   const auto *I = cast<Instruction>(V);
     990        1574 :   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
     991             : }
     992             : 
     993         827 : bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
     994         611 :   unsigned ScaleFactor = getImplicitScaleFactor(VT);
     995         611 :   if (!ScaleFactor)
     996             :     return false;
     997             : 
     998         611 :   bool ImmediateOffsetNeedsLowering = false;
     999         611 :   bool RegisterOffsetNeedsLowering = false;
    1000         611 :   int64_t Offset = Addr.getOffset();
    1001         647 :   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
    1002             :     ImmediateOffsetNeedsLowering = true;
    1003         715 :   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
    1004         108 :            !isUInt<12>(Offset / ScaleFactor))
    1005             :     ImmediateOffsetNeedsLowering = true;
    1006             : 
    1007             :   // Cannot encode an offset register and an immediate offset in the same
    1008             :   // instruction. Fold the immediate offset into the load/store instruction and
    1009             :   // emit an additional add to take care of the offset register.
    1010         598 :   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
    1011             :     RegisterOffsetNeedsLowering = true;
    1012             : 
    1013             :   // Cannot encode zero register as base.
    1014         611 :   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
    1015             :     RegisterOffsetNeedsLowering = true;
    1016             : 
    1017             :   // If this is a stack pointer and the offset needs to be simplified then put
    1018             :   // the alloca address into a register, set the base type back to register and
    1019             :   // continue. This should almost never happen.
    1020         611 :   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
    1021             :   {
    1022           3 :     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
    1023           6 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
    1024           6 :             ResultReg)
    1025           6 :       .addFrameIndex(Addr.getFI())
    1026           3 :       .addImm(0)
    1027           3 :       .addImm(0);
    1028           6 :     Addr.setKind(Address::RegBase);
    1029           3 :     Addr.setReg(ResultReg);
    1030             :   }
    1031             : 
    1032         611 :   if (RegisterOffsetNeedsLowering) {
    1033           7 :     unsigned ResultReg = 0;
    1034           7 :     if (Addr.getReg()) {
    1035           7 :       if (Addr.getExtendType() == AArch64_AM::SXTW ||
    1036           3 :           Addr.getExtendType() == AArch64_AM::UXTW   )
    1037           2 :         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
    1038             :                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
    1039             :                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
    1040           2 :                                   Addr.getShift());
    1041             :       else
    1042           6 :         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
    1043             :                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
    1044             :                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
    1045           6 :                                   Addr.getShift());
    1046             :     } else {
    1047           3 :       if (Addr.getExtendType() == AArch64_AM::UXTW)
    1048           0 :         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
    1049           0 :                                /*Op0IsKill=*/false, Addr.getShift(),
    1050           0 :                                /*IsZExt=*/true);
    1051           3 :       else if (Addr.getExtendType() == AArch64_AM::SXTW)
    1052           2 :         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
    1053           1 :                                /*Op0IsKill=*/false, Addr.getShift(),
    1054           2 :                                /*IsZExt=*/false);
    1055             :       else
    1056           4 :         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
    1057           6 :                                /*Op0IsKill=*/false, Addr.getShift());
    1058             :     }
    1059           7 :     if (!ResultReg)
    1060             :       return false;
    1061             : 
    1062          14 :     Addr.setReg(ResultReg);
    1063          14 :     Addr.setOffsetReg(0);
    1064          14 :     Addr.setShift(0);
    1065           7 :     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
    1066             :   }
    1067             : 
    1068             :   // Since the offset is too large for the load/store instruction get the
    1069             :   // reg+offset into a register.
    1070         611 :   if (ImmediateOffsetNeedsLowering) {
    1071             :     unsigned ResultReg;
    1072          13 :     if (Addr.getReg())
    1073             :       // Try to fold the immediate into the add instruction.
    1074          26 :       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
    1075             :     else
    1076           0 :       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
    1077             : 
    1078          13 :     if (!ResultReg)
    1079             :       return false;
    1080          22 :     Addr.setReg(ResultReg);
    1081          11 :     Addr.setOffset(0);
    1082             :   }
    1083             :   return true;
    1084             : }
    1085             : 
    1086         609 : void AArch64FastISel::addLoadStoreOperands(Address &Addr,
    1087             :                                            const MachineInstrBuilder &MIB,
    1088             :                                            MachineMemOperand::Flags Flags,
    1089             :                                            unsigned ScaleFactor,
    1090             :                                            MachineMemOperand *MMO) {
    1091         609 :   int64_t Offset = Addr.getOffset() / ScaleFactor;
    1092             :   // Frame base works a bit differently. Handle it separately.
    1093         609 :   if (Addr.isFIBase()) {
    1094         312 :     int FI = Addr.getFI();
    1095             :     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
    1096             :     // and alignment should be based on the VT.
    1097         780 :     MMO = FuncInfo.MF->getMachineMemOperand(
    1098         156 :         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
    1099         468 :         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
    1100             :     // Now add the rest of the operands.
    1101         156 :     MIB.addFrameIndex(FI).addImm(Offset);
    1102             :   } else {
    1103             :     assert(Addr.isRegBase() && "Unexpected address kind.");
    1104         453 :     const MCInstrDesc &II = MIB->getDesc();
    1105         453 :     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
    1106        1359 :     Addr.setReg(
    1107             :       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
    1108         906 :     Addr.setOffsetReg(
    1109         453 :       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
    1110         453 :     if (Addr.getOffsetReg()) {
    1111             :       assert(Addr.getOffset() == 0 && "Unexpected offset");
    1112         140 :       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
    1113         140 :                       Addr.getExtendType() == AArch64_AM::SXTX;
    1114          84 :       MIB.addReg(Addr.getReg());
    1115          84 :       MIB.addReg(Addr.getOffsetReg());
    1116         168 :       MIB.addImm(IsSigned);
    1117          84 :       MIB.addImm(Addr.getShift() != 0);
    1118             :     } else
    1119         369 :       MIB.addReg(Addr.getReg()).addImm(Offset);
    1120             :   }
    1121             : 
    1122         609 :   if (MMO)
    1123             :     MIB.addMemOperand(MMO);
    1124         609 : }
    1125             : 
    1126         295 : unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
    1127             :                                      const Value *RHS, bool SetFlags,
    1128             :                                      bool WantResult,  bool IsZExt) {
    1129         295 :   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
    1130         295 :   bool NeedExtend = false;
    1131         295 :   switch (RetVT.SimpleTy) {
    1132             :   default:
    1133             :     return 0;
    1134           2 :   case MVT::i1:
    1135           2 :     NeedExtend = true;
    1136           2 :     break;
    1137           4 :   case MVT::i8:
    1138           4 :     NeedExtend = true;
    1139           4 :     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
    1140             :     break;
    1141           5 :   case MVT::i16:
    1142           5 :     NeedExtend = true;
    1143           5 :     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
    1144             :     break;
    1145             :   case MVT::i32:  // fall-through
    1146             :   case MVT::i64:
    1147             :     break;
    1148             :   }
    1149         295 :   MVT SrcVT = RetVT;
    1150         590 :   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
    1151             : 
    1152             :   // Canonicalize immediates to the RHS first.
    1153         296 :   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
    1154             :     std::swap(LHS, RHS);
    1155             : 
    1156             :   // Canonicalize mul by power of 2 to the RHS.
    1157         507 :   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
    1158         176 :     if (isMulPowOf2(LHS))
    1159             :       std::swap(LHS, RHS);
    1160             : 
    1161             :   // Canonicalize shift immediate to the RHS.
    1162         507 :   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
    1163         255 :     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
    1164         158 :       if (isa<ConstantInt>(SI->getOperand(1)))
    1165           8 :         if (SI->getOpcode() == Instruction::Shl  ||
    1166          11 :             SI->getOpcode() == Instruction::LShr ||
    1167           3 :             SI->getOpcode() == Instruction::AShr   )
    1168             :           std::swap(LHS, RHS);
    1169             : 
    1170         295 :   unsigned LHSReg = getRegForValue(LHS);
    1171         295 :   if (!LHSReg)
    1172             :     return 0;
    1173         295 :   bool LHSIsKill = hasTrivialKill(LHS);
    1174             : 
    1175         295 :   if (NeedExtend)
    1176          11 :     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
    1177             : 
    1178         295 :   unsigned ResultReg = 0;
    1179         336 :   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
    1180          81 :     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
    1181          41 :     if (C->isNegative())
    1182           6 :       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
    1183             :                                 SetFlags, WantResult);
    1184             :     else
    1185          35 :       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
    1186             :                                 WantResult);
    1187         256 :   } else if (const auto *C = dyn_cast<Constant>(RHS))
    1188           2 :     if (C->isNullValue())
    1189           2 :       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
    1190             :                                 WantResult);
    1191             : 
    1192          43 :   if (ResultReg)
    1193             :     return ResultReg;
    1194             : 
    1195             :   // Only extend the RHS within the instruction if there is a valid extend type.
    1196         267 :   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
    1197           6 :       isValueAvailable(RHS)) {
    1198           6 :     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
    1199           0 :       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
    1200           0 :         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
    1201           0 :           unsigned RHSReg = getRegForValue(SI->getOperand(0));
    1202           0 :           if (!RHSReg)
    1203             :             return 0;
    1204           0 :           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
    1205           0 :           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
    1206             :                                RHSIsKill, ExtendType, C->getZExtValue(),
    1207           0 :                                SetFlags, WantResult);
    1208             :         }
    1209           6 :     unsigned RHSReg = getRegForValue(RHS);
    1210           6 :     if (!RHSReg)
    1211             :       return 0;
    1212           6 :     bool RHSIsKill = hasTrivialKill(RHS);
    1213           6 :     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
    1214           6 :                          ExtendType, 0, SetFlags, WantResult);
    1215             :   }
    1216             : 
    1217             :   // Check if the mul can be folded into the instruction.
    1218         498 :   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
    1219         208 :     if (isMulPowOf2(RHS)) {
    1220           0 :       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
    1221           0 :       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
    1222             : 
    1223           0 :       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
    1224           0 :         if (C->getValue().isPowerOf2())
    1225             :           std::swap(MulLHS, MulRHS);
    1226             : 
    1227             :       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
    1228           0 :       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
    1229           0 :       unsigned RHSReg = getRegForValue(MulLHS);
    1230           0 :       if (!RHSReg)
    1231             :         return 0;
    1232           0 :       bool RHSIsKill = hasTrivialKill(MulLHS);
    1233           0 :       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
    1234             :                                 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
    1235             :                                 WantResult);
    1236           0 :       if (ResultReg)
    1237             :         return ResultReg;
    1238             :     }
    1239             :   }
    1240             : 
    1241             :   // Check if the shift can be folded into the instruction.
    1242         498 :   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
    1243         260 :     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
    1244          64 :       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
    1245          12 :         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
    1246          12 :         switch (SI->getOpcode()) {
    1247             :         default: break;
    1248             :         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
    1249             :         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
    1250             :         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
    1251             :         }
    1252          12 :         uint64_t ShiftVal = C->getZExtValue();
    1253          12 :         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
    1254          24 :           unsigned RHSReg = getRegForValue(SI->getOperand(0));
    1255          12 :           if (!RHSReg)
    1256             :             return 0;
    1257          24 :           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
    1258          12 :           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
    1259             :                                     RHSIsKill, ShiftType, ShiftVal, SetFlags,
    1260             :                                     WantResult);
    1261          12 :           if (ResultReg)
    1262             :             return ResultReg;
    1263             :         }
    1264             :       }
    1265             :     }
    1266             :   }
    1267             : 
    1268         239 :   unsigned RHSReg = getRegForValue(RHS);
    1269         239 :   if (!RHSReg)
    1270             :     return 0;
    1271         239 :   bool RHSIsKill = hasTrivialKill(RHS);
    1272             : 
    1273         239 :   if (NeedExtend)
    1274           1 :     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
    1275             : 
    1276         239 :   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
    1277         239 :                        SetFlags, WantResult);
    1278             : }
    1279             : 
    1280         251 : unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
    1281             :                                         bool LHSIsKill, unsigned RHSReg,
    1282             :                                         bool RHSIsKill, bool SetFlags,
    1283             :                                         bool WantResult) {
    1284             :   assert(LHSReg && RHSReg && "Invalid register number.");
    1285             : 
    1286         251 :   if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
    1287         249 :       RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
    1288             :     return 0;
    1289             : 
    1290         249 :   if (RetVT != MVT::i32 && RetVT != MVT::i64)
    1291             :     return 0;
    1292             : 
    1293             :   static const unsigned OpcTable[2][2][2] = {
    1294             :     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
    1295             :       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
    1296             :     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
    1297             :       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
    1298             :   };
    1299         498 :   bool Is64Bit = RetVT == MVT::i64;
    1300         249 :   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
    1301         249 :   const TargetRegisterClass *RC =
    1302         249 :       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    1303             :   unsigned ResultReg;
    1304         249 :   if (WantResult)
    1305         203 :     ResultReg = createResultReg(RC);
    1306             :   else
    1307          46 :     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
    1308             : 
    1309         498 :   const MCInstrDesc &II = TII.get(Opc);
    1310         498 :   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
    1311         249 :   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
    1312         498 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    1313         249 :       .addReg(LHSReg, getKillRegState(LHSIsKill))
    1314         249 :       .addReg(RHSReg, getKillRegState(RHSIsKill));
    1315         249 :   return ResultReg;
    1316             : }
    1317             : 
    1318          91 : unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
    1319             :                                         bool LHSIsKill, uint64_t Imm,
    1320             :                                         bool SetFlags, bool WantResult) {
    1321             :   assert(LHSReg && "Invalid register number.");
    1322             : 
    1323          91 :   if (RetVT != MVT::i32 && RetVT != MVT::i64)
    1324             :     return 0;
    1325             : 
    1326             :   unsigned ShiftImm;
    1327          91 :   if (isUInt<12>(Imm))
    1328             :     ShiftImm = 0;
    1329          15 :   else if ((Imm & 0xfff000) == Imm) {
    1330           4 :     ShiftImm = 12;
    1331           4 :     Imm >>= 12;
    1332             :   } else
    1333             :     return 0;
    1334             : 
    1335             :   static const unsigned OpcTable[2][2][2] = {
    1336             :     { { AArch64::SUBWri,  AArch64::SUBXri  },
    1337             :       { AArch64::ADDWri,  AArch64::ADDXri  }  },
    1338             :     { { AArch64::SUBSWri, AArch64::SUBSXri },
    1339             :       { AArch64::ADDSWri, AArch64::ADDSXri }  }
    1340             :   };
    1341         160 :   bool Is64Bit = RetVT == MVT::i64;
    1342          80 :   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
    1343             :   const TargetRegisterClass *RC;
    1344          80 :   if (SetFlags)
    1345          32 :     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    1346             :   else
    1347          48 :     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
    1348             :   unsigned ResultReg;
    1349          80 :   if (WantResult)
    1350          53 :     ResultReg = createResultReg(RC);
    1351             :   else
    1352          27 :     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
    1353             : 
    1354         160 :   const MCInstrDesc &II = TII.get(Opc);
    1355         160 :   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
    1356         160 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    1357          80 :       .addReg(LHSReg, getKillRegState(LHSIsKill))
    1358         160 :       .addImm(Imm)
    1359         160 :       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
    1360          80 :   return ResultReg;
    1361             : }
    1362             : 
    1363          28 : unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
    1364             :                                         bool LHSIsKill, unsigned RHSReg,
    1365             :                                         bool RHSIsKill,
    1366             :                                         AArch64_AM::ShiftExtendType ShiftType,
    1367             :                                         uint64_t ShiftImm, bool SetFlags,
    1368             :                                         bool WantResult) {
    1369             :   assert(LHSReg && RHSReg && "Invalid register number.");
    1370             :   assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
    1371             :          RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
    1372             : 
    1373          56 :   if (RetVT != MVT::i32 && RetVT != MVT::i64)
    1374             :     return 0;
    1375             : 
    1376             :   // Don't deal with undefined shifts.
    1377          28 :   if (ShiftImm >= RetVT.getSizeInBits())
    1378             :     return 0;
    1379             : 
    1380             :   static const unsigned OpcTable[2][2][2] = {
    1381             :     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
    1382             :       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
    1383             :     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
    1384             :       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
    1385             :   };
    1386          52 :   bool Is64Bit = RetVT == MVT::i64;
    1387          26 :   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
    1388          26 :   const TargetRegisterClass *RC =
    1389          26 :       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    1390             :   unsigned ResultReg;
    1391          26 :   if (WantResult)
    1392          17 :     ResultReg = createResultReg(RC);
    1393             :   else
    1394           9 :     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
    1395             : 
    1396          52 :   const MCInstrDesc &II = TII.get(Opc);
    1397          52 :   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
    1398          26 :   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
    1399          52 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    1400          26 :       .addReg(LHSReg, getKillRegState(LHSIsKill))
    1401          26 :       .addReg(RHSReg, getKillRegState(RHSIsKill))
    1402          78 :       .addImm(getShifterImm(ShiftType, ShiftImm));
    1403          26 :   return ResultReg;
    1404             : }
    1405             : 
    1406           7 : unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
    1407             :                                         bool LHSIsKill, unsigned RHSReg,
    1408             :                                         bool RHSIsKill,
    1409             :                                         AArch64_AM::ShiftExtendType ExtType,
    1410             :                                         uint64_t ShiftImm, bool SetFlags,
    1411             :                                         bool WantResult) {
    1412             :   assert(LHSReg && RHSReg && "Invalid register number.");
    1413             :   assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
    1414             :          RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
    1415             : 
    1416           7 :   if (RetVT != MVT::i32 && RetVT != MVT::i64)
    1417             :     return 0;
    1418             : 
    1419           7 :   if (ShiftImm >= 4)
    1420             :     return 0;
    1421             : 
    1422             :   static const unsigned OpcTable[2][2][2] = {
    1423             :     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
    1424             :       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
    1425             :     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
    1426             :       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
    1427             :   };
    1428          14 :   bool Is64Bit = RetVT == MVT::i64;
    1429           7 :   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
    1430           7 :   const TargetRegisterClass *RC = nullptr;
    1431           7 :   if (SetFlags)
    1432           5 :     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    1433             :   else
    1434           2 :     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
    1435             :   unsigned ResultReg;
    1436           7 :   if (WantResult)
    1437           2 :     ResultReg = createResultReg(RC);
    1438             :   else
    1439           5 :     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
    1440             : 
    1441          14 :   const MCInstrDesc &II = TII.get(Opc);
    1442          14 :   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
    1443           7 :   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
    1444          14 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    1445           7 :       .addReg(LHSReg, getKillRegState(LHSIsKill))
    1446           7 :       .addReg(RHSReg, getKillRegState(RHSIsKill))
    1447          21 :       .addImm(getArithExtendImm(ExtType, ShiftImm));
    1448           7 :   return ResultReg;
    1449             : }
    1450             : 
    1451         110 : bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
    1452         110 :   Type *Ty = LHS->getType();
    1453         110 :   EVT EVT = TLI.getValueType(DL, Ty, true);
    1454         110 :   if (!EVT.isSimple())
    1455             :     return false;
    1456         110 :   MVT VT = EVT.getSimpleVT();
    1457             : 
    1458         110 :   switch (VT.SimpleTy) {
    1459             :   default:
    1460             :     return false;
    1461          63 :   case MVT::i1:
    1462             :   case MVT::i8:
    1463             :   case MVT::i16:
    1464             :   case MVT::i32:
    1465             :   case MVT::i64:
    1466         126 :     return emitICmp(VT, LHS, RHS, IsZExt);
    1467          47 :   case MVT::f32:
    1468             :   case MVT::f64:
    1469          47 :     return emitFCmp(VT, LHS, RHS);
    1470             :   }
    1471             : }
    1472             : 
    1473             : bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
    1474             :                                bool IsZExt) {
    1475         126 :   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
    1476          63 :                  IsZExt) != 0;
    1477             : }
    1478             : 
    1479             : bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
    1480             :                                   uint64_t Imm) {
    1481          11 :   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
    1482             :                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
    1483             : }
    1484             : 
    1485          47 : bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
    1486          47 :   if (RetVT != MVT::f32 && RetVT != MVT::f64)
    1487             :     return false;
    1488             : 
    1489             :   // Check to see if the 2nd operand is a constant that we can encode directly
    1490             :   // in the compare.
    1491          47 :   bool UseImm = false;
    1492           4 :   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
    1493           6 :     if (CFP->isZero() && !CFP->isNegative())
    1494             :       UseImm = true;
    1495             : 
    1496          47 :   unsigned LHSReg = getRegForValue(LHS);
    1497          47 :   if (!LHSReg)
    1498             :     return false;
    1499          47 :   bool LHSIsKill = hasTrivialKill(LHS);
    1500             : 
    1501          47 :   if (UseImm) {
    1502           2 :     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
    1503           6 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
    1504           2 :         .addReg(LHSReg, getKillRegState(LHSIsKill));
    1505           2 :     return true;
    1506             :   }
    1507             : 
    1508          45 :   unsigned RHSReg = getRegForValue(RHS);
    1509          45 :   if (!RHSReg)
    1510             :     return false;
    1511          45 :   bool RHSIsKill = hasTrivialKill(RHS);
    1512             : 
    1513          45 :   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
    1514         135 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
    1515          45 :       .addReg(LHSReg, getKillRegState(LHSIsKill))
    1516          45 :       .addReg(RHSReg, getKillRegState(RHSIsKill));
    1517          45 :   return true;
    1518             : }
    1519             : 
    1520             : unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
    1521             :                                   bool SetFlags, bool WantResult, bool IsZExt) {
    1522             :   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
    1523         212 :                     IsZExt);
    1524             : }
    1525             : 
    1526             : /// \brief This method is a wrapper to simplify add emission.
    1527             : ///
    1528             : /// First try to emit an add with an immediate operand using emitAddSub_ri. If
    1529             : /// that fails, then try to materialize the immediate into a register and use
    1530             : /// emitAddSub_rr instead.
    1531          37 : unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
    1532             :                                       int64_t Imm) {
    1533             :   unsigned ResultReg;
    1534          37 :   if (Imm < 0)
    1535           2 :     ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
    1536             :   else
    1537          35 :     ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
    1538             : 
    1539          37 :   if (ResultReg)
    1540             :     return ResultReg;
    1541             : 
    1542           8 :   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
    1543           8 :   if (!CReg)
    1544             :     return 0;
    1545             : 
    1546           8 :   ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
    1547           8 :   return ResultReg;
    1548             : }
    1549             : 
    1550             : unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
    1551             :                                   bool SetFlags, bool WantResult, bool IsZExt) {
    1552             :   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
    1553          83 :                     IsZExt);
    1554             : }
    1555             : 
    1556             : unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
    1557             :                                       bool LHSIsKill, unsigned RHSReg,
    1558             :                                       bool RHSIsKill, bool WantResult) {
    1559             :   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
    1560           4 :                        RHSIsKill, /*SetFlags=*/true, WantResult);
    1561             : }
    1562             : 
    1563             : unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
    1564             :                                       bool LHSIsKill, unsigned RHSReg,
    1565             :                                       bool RHSIsKill,
    1566             :                                       AArch64_AM::ShiftExtendType ShiftType,
    1567             :                                       uint64_t ShiftImm, bool WantResult) {
    1568             :   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
    1569             :                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
    1570           9 :                        WantResult);
    1571             : }
    1572             : 
    1573          89 : unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
    1574             :                                         const Value *LHS, const Value *RHS) {
    1575             :   // Canonicalize immediates to the RHS first.
    1576         178 :   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
    1577             :     std::swap(LHS, RHS);
    1578             : 
    1579             :   // Canonicalize mul by power-of-2 to the RHS.
    1580         178 :   if (LHS->hasOneUse() && isValueAvailable(LHS))
    1581          89 :     if (isMulPowOf2(LHS))
    1582             :       std::swap(LHS, RHS);
    1583             : 
    1584             :   // Canonicalize shift immediate to the RHS.
    1585         178 :   if (LHS->hasOneUse() && isValueAvailable(LHS))
    1586          89 :     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
    1587           0 :       if (isa<ConstantInt>(SI->getOperand(1)))
    1588             :         std::swap(LHS, RHS);
    1589             : 
    1590          89 :   unsigned LHSReg = getRegForValue(LHS);
    1591          89 :   if (!LHSReg)
    1592             :     return 0;
    1593          89 :   bool LHSIsKill = hasTrivialKill(LHS);
    1594             : 
    1595          89 :   unsigned ResultReg = 0;
    1596         107 :   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
    1597          18 :     uint64_t Imm = C->getZExtValue();
    1598          18 :     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
    1599             :   }
    1600          18 :   if (ResultReg)
    1601             :     return ResultReg;
    1602             : 
    1603             :   // Check if the mul can be folded into the instruction.
    1604         142 :   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
    1605          71 :     if (isMulPowOf2(RHS)) {
    1606          18 :       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
    1607          18 :       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
    1608             : 
    1609           6 :       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
    1610           0 :         if (C->getValue().isPowerOf2())
    1611             :           std::swap(MulLHS, MulRHS);
    1612             : 
    1613             :       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
    1614          24 :       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
    1615             : 
    1616           6 :       unsigned RHSReg = getRegForValue(MulLHS);
    1617           6 :       if (!RHSReg)
    1618             :         return 0;
    1619           6 :       bool RHSIsKill = hasTrivialKill(MulLHS);
    1620           6 :       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
    1621             :                                    RHSIsKill, ShiftVal);
    1622           6 :       if (ResultReg)
    1623             :         return ResultReg;
    1624             :     }
    1625             :   }
    1626             : 
    1627             :   // Check if the shift can be folded into the instruction.
    1628         130 :   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
    1629          89 :     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
    1630          72 :       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
    1631          24 :         uint64_t ShiftVal = C->getZExtValue();
    1632          48 :         unsigned RHSReg = getRegForValue(SI->getOperand(0));
    1633          24 :         if (!RHSReg)
    1634             :           return 0;
    1635          48 :         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
    1636          24 :         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
    1637             :                                      RHSIsKill, ShiftVal);
    1638          24 :         if (ResultReg)
    1639             :           return ResultReg;
    1640             :       }
    1641             :   }
    1642             : 
    1643          53 :   unsigned RHSReg = getRegForValue(RHS);
    1644          53 :   if (!RHSReg)
    1645             :     return 0;
    1646          53 :   bool RHSIsKill = hasTrivialKill(RHS);
    1647             : 
    1648         159 :   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
    1649          53 :   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
    1650          53 :   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
    1651          12 :     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
    1652          24 :     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
    1653             :   }
    1654             :   return ResultReg;
    1655             : }
    1656             : 
    1657         203 : unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
    1658             :                                            unsigned LHSReg, bool LHSIsKill,
    1659             :                                            uint64_t Imm) {
    1660             :   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
    1661             :                 "ISD nodes are not consecutive!");
    1662             :   static const unsigned OpcTable[3][2] = {
    1663             :     { AArch64::ANDWri, AArch64::ANDXri },
    1664             :     { AArch64::ORRWri, AArch64::ORRXri },
    1665             :     { AArch64::EORWri, AArch64::EORXri }
    1666             :   };
    1667             :   const TargetRegisterClass *RC;
    1668             :   unsigned Opc;
    1669             :   unsigned RegSize;
    1670         203 :   switch (RetVT.SimpleTy) {
    1671             :   default:
    1672             :     return 0;
    1673         197 :   case MVT::i1:
    1674             :   case MVT::i8:
    1675             :   case MVT::i16:
    1676             :   case MVT::i32: {
    1677         197 :     unsigned Idx = ISDOpc - ISD::AND;
    1678         197 :     Opc = OpcTable[Idx][0];
    1679         197 :     RC = &AArch64::GPR32spRegClass;
    1680         197 :     RegSize = 32;
    1681         197 :     break;
    1682             :   }
    1683           6 :   case MVT::i64:
    1684           6 :     Opc = OpcTable[ISDOpc - ISD::AND][1];
    1685           6 :     RC = &AArch64::GPR64spRegClass;
    1686           6 :     RegSize = 64;
    1687           6 :     break;
    1688             :   }
    1689             : 
    1690         203 :   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
    1691             :     return 0;
    1692             : 
    1693             :   unsigned ResultReg =
    1694         406 :       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
    1695         203 :                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
    1696         203 :   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
    1697           4 :     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
    1698           4 :     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
    1699             :   }
    1700             :   return ResultReg;
    1701             : }
    1702             : 
    1703          30 : unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
    1704             :                                            unsigned LHSReg, bool LHSIsKill,
    1705             :                                            unsigned RHSReg, bool RHSIsKill,
    1706             :                                            uint64_t ShiftImm) {
    1707             :   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
    1708             :                 "ISD nodes are not consecutive!");
    1709             :   static const unsigned OpcTable[3][2] = {
    1710             :     { AArch64::ANDWrs, AArch64::ANDXrs },
    1711             :     { AArch64::ORRWrs, AArch64::ORRXrs },
    1712             :     { AArch64::EORWrs, AArch64::EORXrs }
    1713             :   };
    1714             : 
    1715             :   // Don't deal with undefined shifts.
    1716          30 :   if (ShiftImm >= RetVT.getSizeInBits())
    1717             :     return 0;
    1718             : 
    1719             :   const TargetRegisterClass *RC;
    1720             :   unsigned Opc;
    1721          18 :   switch (RetVT.SimpleTy) {
    1722             :   default:
    1723             :     return 0;
    1724          12 :   case MVT::i1:
    1725             :   case MVT::i8:
    1726             :   case MVT::i16:
    1727             :   case MVT::i32:
    1728          12 :     Opc = OpcTable[ISDOpc - ISD::AND][0];
    1729          12 :     RC = &AArch64::GPR32RegClass;
    1730          12 :     break;
    1731           6 :   case MVT::i64:
    1732           6 :     Opc = OpcTable[ISDOpc - ISD::AND][1];
    1733           6 :     RC = &AArch64::GPR64RegClass;
    1734           6 :     break;
    1735             :   }
    1736             :   unsigned ResultReg =
    1737          36 :       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
    1738          54 :                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
    1739          18 :   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
    1740           6 :     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
    1741          12 :     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
    1742             :   }
    1743             :   return ResultReg;
    1744             : }
    1745             : 
    1746           4 : unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
    1747             :                                      uint64_t Imm) {
    1748         184 :   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
    1749             : }
    1750             : 
    1751         381 : unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
    1752             :                                    bool WantZExt, MachineMemOperand *MMO) {
    1753         762 :   if (!TLI.allowsMisalignedMemoryAccesses(VT))
    1754             :     return 0;
    1755             : 
    1756             :   // Simplify this down to something we can handle.
    1757         379 :   if (!simplifyAddress(Addr, VT))
    1758             :     return 0;
    1759             : 
    1760         303 :   unsigned ScaleFactor = getImplicitScaleFactor(VT);
    1761         303 :   if (!ScaleFactor)
    1762           0 :     llvm_unreachable("Unexpected value type.");
    1763             : 
    1764             :   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
    1765             :   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
    1766         303 :   bool UseScaled = true;
    1767         303 :   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
    1768             :     UseScaled = false;
    1769             :     ScaleFactor = 1;
    1770             :   }
    1771             : 
    1772             :   static const unsigned GPOpcTable[2][8][4] = {
    1773             :     // Sign-extend.
    1774             :     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
    1775             :         AArch64::LDURXi  },
    1776             :       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
    1777             :         AArch64::LDURXi  },
    1778             :       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
    1779             :         AArch64::LDRXui  },
    1780             :       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
    1781             :         AArch64::LDRXui  },
    1782             :       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
    1783             :         AArch64::LDRXroX },
    1784             :       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
    1785             :         AArch64::LDRXroX },
    1786             :       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
    1787             :         AArch64::LDRXroW },
    1788             :       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
    1789             :         AArch64::LDRXroW }
    1790             :     },
    1791             :     // Zero-extend.
    1792             :     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
    1793             :         AArch64::LDURXi  },
    1794             :       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
    1795             :         AArch64::LDURXi  },
    1796             :       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
    1797             :         AArch64::LDRXui  },
    1798             :       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
    1799             :         AArch64::LDRXui  },
    1800             :       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
    1801             :         AArch64::LDRXroX },
    1802             :       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
    1803             :         AArch64::LDRXroX },
    1804             :       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
    1805             :         AArch64::LDRXroW },
    1806             :       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
    1807             :         AArch64::LDRXroW }
    1808             :     }
    1809             :   };
    1810             : 
    1811             :   static const unsigned FPOpcTable[4][2] = {
    1812             :     { AArch64::LDURSi,  AArch64::LDURDi  },
    1813             :     { AArch64::LDRSui,  AArch64::LDRDui  },
    1814             :     { AArch64::LDRSroX, AArch64::LDRDroX },
    1815             :     { AArch64::LDRSroW, AArch64::LDRDroW }
    1816             :   };
    1817             : 
    1818             :   unsigned Opc;
    1819             :   const TargetRegisterClass *RC;
    1820         482 :   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
    1821         401 :                       Addr.getOffsetReg();
    1822         222 :   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
    1823         595 :   if (Addr.getExtendType() == AArch64_AM::UXTW ||
    1824         292 :       Addr.getExtendType() == AArch64_AM::SXTW)
    1825          39 :     Idx++;
    1826             : 
    1827         606 :   bool IsRet64Bit = RetVT == MVT::i64;
    1828         303 :   switch (VT.SimpleTy) {
    1829           0 :   default:
    1830           0 :     llvm_unreachable("Unexpected value type.");
    1831          55 :   case MVT::i1: // Intentional fall-through.
    1832             :   case MVT::i8:
    1833          55 :     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
    1834          55 :     RC = (IsRet64Bit && !WantZExt) ?
    1835             :              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
    1836             :     break;
    1837          42 :   case MVT::i16:
    1838          42 :     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
    1839          42 :     RC = (IsRet64Bit && !WantZExt) ?
    1840             :              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
    1841             :     break;
    1842          87 :   case MVT::i32:
    1843          87 :     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
    1844          87 :     RC = (IsRet64Bit && !WantZExt) ?
    1845             :              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
    1846             :     break;
    1847          98 :   case MVT::i64:
    1848          98 :     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
    1849          98 :     RC = &AArch64::GPR64RegClass;
    1850          98 :     break;
    1851           7 :   case MVT::f32:
    1852           7 :     Opc = FPOpcTable[Idx][0];
    1853           7 :     RC = &AArch64::FPR32RegClass;
    1854           7 :     break;
    1855          14 :   case MVT::f64:
    1856          14 :     Opc = FPOpcTable[Idx][1];
    1857          14 :     RC = &AArch64::FPR64RegClass;
    1858          14 :     break;
    1859             :   }
    1860             : 
    1861             :   // Create the base instruction, then add the operands.
    1862         303 :   unsigned ResultReg = createResultReg(RC);
    1863         303 :   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    1864         909 :                                     TII.get(Opc), ResultReg);
    1865         303 :   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
    1866             : 
    1867             :   // Loading an i1 requires special handling.
    1868         303 :   if (VT == MVT::i1) {
    1869           6 :     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
    1870             :     assert(ANDReg && "Unexpected AND instruction emission failure.");
    1871           3 :     ResultReg = ANDReg;
    1872             :   }
    1873             : 
    1874             :   // For zero-extending loads to 64bit we emit a 32bit load and then convert
    1875             :   // the 32bit reg to a 64bit reg.
    1876         303 :   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
    1877          21 :     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
    1878          42 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    1879          42 :             TII.get(AArch64::SUBREG_TO_REG), Reg64)
    1880          21 :         .addImm(0)
    1881          21 :         .addReg(ResultReg, getKillRegState(true))
    1882          21 :         .addImm(AArch64::sub_32);
    1883          21 :     ResultReg = Reg64;
    1884             :   }
    1885             :   return ResultReg;
    1886             : }
    1887             : 
    1888         268 : bool AArch64FastISel::selectAddSub(const Instruction *I) {
    1889         268 :   MVT VT;
    1890         268 :   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
    1891             :     return false;
    1892             : 
    1893         268 :   if (VT.isVector())
    1894         142 :     return selectOperator(I, I->getOpcode());
    1895             : 
    1896             :   unsigned ResultReg;
    1897         197 :   switch (I->getOpcode()) {
    1898           0 :   default:
    1899           0 :     llvm_unreachable("Unexpected instruction.");
    1900         190 :   case Instruction::Add:
    1901         760 :     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
    1902         190 :     break;
    1903           7 :   case Instruction::Sub:
    1904          28 :     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
    1905           7 :     break;
    1906             :   }
    1907         197 :   if (!ResultReg)
    1908             :     return false;
    1909             : 
    1910         197 :   updateValueMap(I, ResultReg);
    1911         197 :   return true;
    1912             : }
    1913             : 
    1914          89 : bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
    1915          89 :   MVT VT;
    1916          89 :   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
    1917             :     return false;
    1918             : 
    1919          89 :   if (VT.isVector())
    1920           0 :     return selectOperator(I, I->getOpcode());
    1921             : 
    1922             :   unsigned ResultReg;
    1923          89 :   switch (I->getOpcode()) {
    1924           0 :   default:
    1925           0 :     llvm_unreachable("Unexpected instruction.");
    1926          50 :   case Instruction::And:
    1927         150 :     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
    1928          50 :     break;
    1929          20 :   case Instruction::Or:
    1930          60 :     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
    1931          20 :     break;
    1932          19 :   case Instruction::Xor:
    1933          57 :     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
    1934          19 :     break;
    1935             :   }
    1936          89 :   if (!ResultReg)
    1937             :     return false;
    1938             : 
    1939          89 :   updateValueMap(I, ResultReg);
    1940          89 :   return true;
    1941             : }
    1942             : 
    1943         367 : bool AArch64FastISel::selectLoad(const Instruction *I) {
    1944         367 :   MVT VT;
    1945             :   // Verify we have a legal type before going any further.  Currently, we handle
    1946             :   // simple types that will directly fit in a register (i32/f32/i64/f64) or
    1947             :   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
    1948         729 :   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
    1949         362 :       cast<LoadInst>(I)->isAtomic())
    1950             :     return false;
    1951             : 
    1952         720 :   const Value *SV = I->getOperand(0);
    1953         360 :   if (TLI.supportSwiftError()) {
    1954             :     // Swifterror values can come from either a function parameter with
    1955             :     // swifterror attribute or an alloca with swifterror attribute.
    1956         103 :     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
    1957         103 :       if (Arg->hasSwiftErrorAttr())
    1958             :         return false;
    1959             :     }
    1960             : 
    1961          60 :     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
    1962          60 :       if (Alloca->isSwiftError())
    1963             :         return false;
    1964             :     }
    1965             :   }
    1966             : 
    1967             :   // See if we can handle this address.
    1968         355 :   Address Addr;
    1969         710 :   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
    1970             :     return false;
    1971             : 
    1972             :   // Fold the following sign-/zero-extend into the load instruction.
    1973         340 :   bool WantZExt = true;
    1974         340 :   MVT RetVT = VT;
    1975         340 :   const Value *IntExtVal = nullptr;
    1976         680 :   if (I->hasOneUse()) {
    1977         776 :     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
    1978          38 :       if (isTypeSupported(ZE->getType(), RetVT))
    1979             :         IntExtVal = ZE;
    1980             :       else
    1981           0 :         RetVT = VT;
    1982         662 :     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
    1983          38 :       if (isTypeSupported(SE->getType(), RetVT))
    1984             :         IntExtVal = SE;
    1985             :       else
    1986           0 :         RetVT = VT;
    1987             :       WantZExt = false;
    1988             :     }
    1989             :   }
    1990             : 
    1991             :   unsigned ResultReg =
    1992         340 :       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
    1993         340 :   if (!ResultReg)
    1994             :     return false;
    1995             : 
    1996             :   // There are a few different cases we have to handle, because the load or the
    1997             :   // sign-/zero-extend might not be selected by FastISel if we fall-back to
    1998             :   // SelectionDAG. There is also an ordering issue when both instructions are in
    1999             :   // different basic blocks.
    2000             :   // 1.) The load instruction is selected by FastISel, but the integer extend
    2001             :   //     not. This usually happens when the integer extend is in a different
    2002             :   //     basic block and SelectionDAG took over for that basic block.
    2003             :   // 2.) The load instruction is selected before the integer extend. This only
    2004             :   //     happens when the integer extend is in a different basic block.
    2005             :   // 3.) The load instruction is selected by SelectionDAG and the integer extend
    2006             :   //     by FastISel. This happens if there are instructions between the load
    2007             :   //     and the integer extend that couldn't be selected by FastISel.
    2008         262 :   if (IntExtVal) {
    2009             :     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
    2010             :     // could select it. Emit a copy to subreg if necessary. FastISel will remove
    2011             :     // it when it selects the integer extend.
    2012          76 :     unsigned Reg = lookUpRegForValue(IntExtVal);
    2013          76 :     auto *MI = MRI.getUniqueVRegDef(Reg);
    2014          76 :     if (!MI) {
    2015           3 :       if (RetVT == MVT::i64 && VT <= MVT::i32) {
    2016           1 :         if (WantZExt) {
    2017             :           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
    2018           3 :           std::prev(FuncInfo.InsertPt)->eraseFromParent();
    2019           3 :           ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
    2020             :         } else
    2021           0 :           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
    2022             :                                                  /*IsKill=*/true,
    2023           0 :                                                  AArch64::sub_32);
    2024             :       }
    2025           1 :       updateValueMap(I, ResultReg);
    2026           1 :       return true;
    2027             :     }
    2028             : 
    2029             :     // The integer extend has already been emitted - delete all the instructions
    2030             :     // that have been emitted by the integer extend lowering code and use the
    2031             :     // result from the load instruction directly.
    2032         189 :     while (MI) {
    2033         114 :       Reg = 0;
    2034         153 :       for (auto &Opnd : MI->uses()) {
    2035         153 :         if (Opnd.isReg()) {
    2036         114 :           Reg = Opnd.getReg();
    2037         114 :           break;
    2038             :         }
    2039             :       }
    2040         114 :       MI->eraseFromParent();
    2041         114 :       MI = nullptr;
    2042         114 :       if (Reg)
    2043         114 :         MI = MRI.getUniqueVRegDef(Reg);
    2044             :     }
    2045          75 :     updateValueMap(IntExtVal, ResultReg);
    2046          75 :     return true;
    2047             :   }
    2048             : 
    2049         186 :   updateValueMap(I, ResultReg);
    2050         186 :   return true;
    2051             : }
    2052             : 
    2053          18 : bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
    2054             :                                        unsigned AddrReg,
    2055             :                                        MachineMemOperand *MMO) {
    2056             :   unsigned Opc;
    2057          18 :   switch (VT.SimpleTy) {
    2058             :   default: return false;
    2059             :   case MVT::i8:  Opc = AArch64::STLRB; break;
    2060           4 :   case MVT::i16: Opc = AArch64::STLRH; break;
    2061           6 :   case MVT::i32: Opc = AArch64::STLRW; break;
    2062           4 :   case MVT::i64: Opc = AArch64::STLRX; break;
    2063             :   }
    2064             : 
    2065          36 :   const MCInstrDesc &II = TII.get(Opc);
    2066          18 :   SrcReg = constrainOperandRegClass(II, SrcReg, 0);
    2067          18 :   AddrReg = constrainOperandRegClass(II, AddrReg, 1);
    2068          36 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    2069          18 :       .addReg(SrcReg)
    2070          18 :       .addReg(AddrReg)
    2071          18 :       .addMemOperand(MMO);
    2072          18 :   return true;
    2073             : }
    2074             : 
    2075         448 : bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
    2076             :                                 MachineMemOperand *MMO) {
    2077         896 :   if (!TLI.allowsMisalignedMemoryAccesses(VT))
    2078             :     return false;
    2079             : 
    2080             :   // Simplify this down to something we can handle.
    2081         448 :   if (!simplifyAddress(Addr, VT))
    2082             :     return false;
    2083             : 
    2084         306 :   unsigned ScaleFactor = getImplicitScaleFactor(VT);
    2085         306 :   if (!ScaleFactor)
    2086           0 :     llvm_unreachable("Unexpected value type.");
    2087             : 
    2088             :   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
    2089             :   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
    2090         306 :   bool UseScaled = true;
    2091         306 :   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
    2092             :     UseScaled = false;
    2093             :     ScaleFactor = 1;
    2094             :   }
    2095             : 
    2096             :   static const unsigned OpcTable[4][6] = {
    2097             :     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
    2098             :       AArch64::STURSi,   AArch64::STURDi },
    2099             :     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
    2100             :       AArch64::STRSui,   AArch64::STRDui },
    2101             :     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
    2102             :       AArch64::STRSroX,  AArch64::STRDroX },
    2103             :     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
    2104             :       AArch64::STRSroW,  AArch64::STRDroW }
    2105             :   };
    2106             : 
    2107             :   unsigned Opc;
    2108         306 :   bool VTIsi1 = false;
    2109         467 :   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
    2110         464 :                       Addr.getOffsetReg();
    2111         303 :   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
    2112         612 :   if (Addr.getExtendType() == AArch64_AM::UXTW ||
    2113         306 :       Addr.getExtendType() == AArch64_AM::SXTW)
    2114           0 :     Idx++;
    2115             : 
    2116         306 :   switch (VT.SimpleTy) {
    2117           0 :   default: llvm_unreachable("Unexpected value type.");
    2118           8 :   case MVT::i1:  VTIsi1 = true; LLVM_FALLTHROUGH;
    2119          44 :   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
    2120          21 :   case MVT::i16: Opc = OpcTable[Idx][1]; break;
    2121          94 :   case MVT::i32: Opc = OpcTable[Idx][2]; break;
    2122         111 :   case MVT::i64: Opc = OpcTable[Idx][3]; break;
    2123          11 :   case MVT::f32: Opc = OpcTable[Idx][4]; break;
    2124          25 :   case MVT::f64: Opc = OpcTable[Idx][5]; break;
    2125             :   }
    2126             : 
    2127             :   // Storing an i1 requires special handling.
    2128         306 :   if (VTIsi1 && SrcReg != AArch64::WZR) {
    2129          14 :     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
    2130             :     assert(ANDReg && "Unexpected AND instruction emission failure.");
    2131           7 :     SrcReg = ANDReg;
    2132             :   }
    2133             :   // Create the base instruction, then add the operands.
    2134         612 :   const MCInstrDesc &II = TII.get(Opc);
    2135         612 :   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
    2136             :   MachineInstrBuilder MIB =
    2137         306 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
    2138         306 :   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
    2139             : 
    2140         306 :   return true;
    2141             : }
    2142             : 
    2143         413 : bool AArch64FastISel::selectStore(const Instruction *I) {
    2144         413 :   MVT VT;
    2145         826 :   const Value *Op0 = I->getOperand(0);
    2146             :   // Verify we have a legal type before going any further.  Currently, we handle
    2147             :   // simple types that will directly fit in a register (i32/f32/i64/f64) or
    2148             :   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
    2149         413 :   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
    2150             :     return false;
    2151             : 
    2152         794 :   const Value *PtrV = I->getOperand(1);
    2153         397 :   if (TLI.supportSwiftError()) {
    2154             :     // Swifterror values can come from either a function parameter with
    2155             :     // swifterror attribute or an alloca with swifterror attribute.
    2156         216 :     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
    2157         216 :       if (Arg->hasSwiftErrorAttr())
    2158             :         return false;
    2159             :     }
    2160             : 
    2161          85 :     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
    2162          85 :       if (Alloca->isSwiftError())
    2163             :         return false;
    2164             :     }
    2165             :   }
    2166             : 
    2167             :   // Get the value to be stored into a register. Use the zero register directly
    2168             :   // when possible to avoid an unnecessary copy and a wasted register.
    2169         395 :   unsigned SrcReg = 0;
    2170          55 :   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
    2171          55 :     if (CI->isZero())
    2172          66 :       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
    2173           2 :   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
    2174           4 :     if (CF->isZero() && !CF->isNegative()) {
    2175           2 :       VT = MVT::getIntegerVT(VT.getSizeInBits());
    2176           4 :       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
    2177             :     }
    2178             :   }
    2179             : 
    2180             :   if (!SrcReg)
    2181         360 :     SrcReg = getRegForValue(Op0);
    2182             : 
    2183         395 :   if (!SrcReg)
    2184             :     return false;
    2185             : 
    2186         395 :   auto *SI = cast<StoreInst>(I);
    2187             : 
    2188             :   // Try to emit a STLR for seq_cst/release.
    2189         395 :   if (SI->isAtomic()) {
    2190          26 :     AtomicOrdering Ord = SI->getOrdering();
    2191             :     // The non-atomic instructions are sufficient for relaxed stores.
    2192          26 :     if (isReleaseOrStronger(Ord)) {
    2193             :       // The STLR addressing mode only supports a base reg; pass that directly.
    2194          18 :       unsigned AddrReg = getRegForValue(PtrV);
    2195          18 :       return emitStoreRelease(VT, SrcReg, AddrReg,
    2196          18 :                               createMachineMemOperandFor(I));
    2197             :     }
    2198             :   }
    2199             : 
    2200             :   // See if we can handle this address.
    2201         377 :   Address Addr;
    2202         377 :   if (!computeAddress(PtrV, Addr, Op0->getType()))
    2203             :     return false;
    2204             : 
    2205         375 :   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
    2206             :     return false;
    2207         235 :   return true;
    2208             : }
    2209             : 
    2210             : static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
    2211             :   switch (Pred) {
    2212             :   case CmpInst::FCMP_ONE:
    2213             :   case CmpInst::FCMP_UEQ:
    2214             :   default:
    2215             :     // AL is our "false" for now. The other two need more compares.
    2216             :     return AArch64CC::AL;
    2217             :   case CmpInst::ICMP_EQ:
    2218             :   case CmpInst::FCMP_OEQ:
    2219             :     return AArch64CC::EQ;
    2220             :   case CmpInst::ICMP_SGT:
    2221             :   case CmpInst::FCMP_OGT:
    2222             :     return AArch64CC::GT;
    2223             :   case CmpInst::ICMP_SGE:
    2224             :   case CmpInst::FCMP_OGE:
    2225             :     return AArch64CC::GE;
    2226             :   case CmpInst::ICMP_UGT:
    2227             :   case CmpInst::FCMP_UGT:
    2228             :     return AArch64CC::HI;
    2229             :   case CmpInst::FCMP_OLT:
    2230             :     return AArch64CC::MI;
    2231             :   case CmpInst::ICMP_ULE:
    2232             :   case CmpInst::FCMP_OLE:
    2233             :     return AArch64CC::LS;
    2234             :   case CmpInst::FCMP_ORD:
    2235             :     return AArch64CC::VC;
    2236             :   case CmpInst::FCMP_UNO:
    2237             :     return AArch64CC::VS;
    2238             :   case CmpInst::FCMP_UGE:
    2239             :     return AArch64CC::PL;
    2240             :   case CmpInst::ICMP_SLT:
    2241             :   case CmpInst::FCMP_ULT:
    2242             :     return AArch64CC::LT;
    2243             :   case CmpInst::ICMP_SLE:
    2244             :   case CmpInst::FCMP_ULE:
    2245             :     return AArch64CC::LE;
    2246             :   case CmpInst::FCMP_UNE:
    2247             :   case CmpInst::ICMP_NE:
    2248             :     return AArch64CC::NE;
    2249             :   case CmpInst::ICMP_UGE:
    2250             :     return AArch64CC::HS;
    2251             :   case CmpInst::ICMP_ULT:
    2252             :     return AArch64CC::LO;
    2253             :   }
    2254             : }
    2255             : 
    2256             : /// \brief Try to emit a combined compare-and-branch instruction.
    2257          92 : bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
    2258             :   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
    2259         184 :   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
    2260          92 :   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
    2261             : 
    2262          92 :   const Value *LHS = CI->getOperand(0);
    2263          92 :   const Value *RHS = CI->getOperand(1);
    2264             : 
    2265          92 :   MVT VT;
    2266          92 :   if (!isTypeSupported(LHS->getType(), VT))
    2267             :     return false;
    2268             : 
    2269          92 :   unsigned BW = VT.getSizeInBits();
    2270          92 :   if (BW > 64)
    2271             :     return false;
    2272             : 
    2273         184 :   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
    2274         184 :   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
    2275             : 
    2276             :   // Try to take advantage of fallthrough opportunities.
    2277          92 :   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
    2278          53 :     std::swap(TBB, FBB);
    2279          53 :     Predicate = CmpInst::getInversePredicate(Predicate);
    2280             :   }
    2281             : 
    2282          92 :   int TestBit = -1;
    2283             :   bool IsCmpNE;
    2284             :   switch (Predicate) {
    2285             :   default:
    2286             :     return false;
    2287          44 :   case CmpInst::ICMP_EQ:
    2288             :   case CmpInst::ICMP_NE:
    2289          44 :     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
    2290             :       std::swap(LHS, RHS);
    2291             : 
    2292         126 :     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
    2293             :       return false;
    2294             : 
    2295          54 :     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
    2296          13 :       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
    2297          12 :         const Value *AndLHS = AI->getOperand(0);
    2298          12 :         const Value *AndRHS = AI->getOperand(1);
    2299             : 
    2300          12 :         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
    2301           0 :           if (C->getValue().isPowerOf2())
    2302             :             std::swap(AndLHS, AndRHS);
    2303             : 
    2304          24 :         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
    2305          12 :           if (C->getValue().isPowerOf2()) {
    2306          24 :             TestBit = C->getValue().logBase2();
    2307          12 :             LHS = AndLHS;
    2308             :           }
    2309             :       }
    2310             : 
    2311          82 :     if (VT == MVT::i1)
    2312           1 :       TestBit = 0;
    2313             : 
    2314          41 :     IsCmpNE = Predicate == CmpInst::ICMP_NE;
    2315          41 :     break;
    2316          14 :   case CmpInst::ICMP_SLT:
    2317             :   case CmpInst::ICMP_SGE:
    2318          30 :     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
    2319             :       return false;
    2320             : 
    2321           7 :     TestBit = BW - 1;
    2322           7 :     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
    2323           7 :     break;
    2324          10 :   case CmpInst::ICMP_SGT:
    2325             :   case CmpInst::ICMP_SLE:
    2326          20 :     if (!isa<ConstantInt>(RHS))
    2327             :       return false;
    2328             : 
    2329          40 :     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
    2330             :       return false;
    2331             : 
    2332           8 :     TestBit = BW - 1;
    2333           8 :     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
    2334           8 :     break;
    2335             :   } // end switch
    2336             : 
    2337             :   static const unsigned OpcTable[2][2][2] = {
    2338             :     { {AArch64::CBZW,  AArch64::CBZX },
    2339             :       {AArch64::CBNZW, AArch64::CBNZX} },
    2340             :     { {AArch64::TBZW,  AArch64::TBZX },
    2341             :       {AArch64::TBNZW, AArch64::TBNZX} }
    2342             :   };
    2343             : 
    2344          56 :   bool IsBitTest = TestBit != -1;
    2345          56 :   bool Is64Bit = BW == 64;
    2346          56 :   if (TestBit < 32 && TestBit >= 0)
    2347          23 :     Is64Bit = false;
    2348             : 
    2349          56 :   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
    2350         112 :   const MCInstrDesc &II = TII.get(Opc);
    2351             : 
    2352          56 :   unsigned SrcReg = getRegForValue(LHS);
    2353          56 :   if (!SrcReg)
    2354             :     return false;
    2355          56 :   bool SrcIsKill = hasTrivialKill(LHS);
    2356             : 
    2357          56 :   if (BW == 64 && !Is64Bit)
    2358           4 :     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
    2359           2 :                                         AArch64::sub_32);
    2360             : 
    2361          56 :   if ((BW < 32) && !IsBitTest)
    2362           8 :     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
    2363             : 
    2364             :   // Emit the combined compare and branch instruction.
    2365          56 :   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
    2366             :   MachineInstrBuilder MIB =
    2367         168 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
    2368          56 :           .addReg(SrcReg, getKillRegState(SrcIsKill));
    2369          56 :   if (IsBitTest)
    2370          28 :     MIB.addImm(TestBit);
    2371         112 :   MIB.addMBB(TBB);
    2372             : 
    2373          56 :   finishCondBranch(BI->getParent(), TBB, FBB);
    2374          56 :   return true;
    2375             : }
    2376             : 
    2377         297 : bool AArch64FastISel::selectBranch(const Instruction *I) {
    2378         297 :   const BranchInst *BI = cast<BranchInst>(I);
    2379         297 :   if (BI->isUnconditional()) {
    2380         332 :     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
    2381         332 :     fastEmitBranch(MSucc, BI->getDebugLoc());
    2382         166 :     return true;
    2383             :   }
    2384             : 
    2385         262 :   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
    2386         262 :   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
    2387             : 
    2388         224 :   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
    2389         186 :     if (CI->hasOneUse() && isValueAvailable(CI)) {
    2390             :       // Try to optimize or fold the cmp.
    2391          92 :       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
    2392          92 :       switch (Predicate) {
    2393             :       default:
    2394             :         break;
    2395           0 :       case CmpInst::FCMP_FALSE:
    2396           0 :         fastEmitBranch(FBB, DbgLoc);
    2397           0 :         return true;
    2398           0 :       case CmpInst::FCMP_TRUE:
    2399           0 :         fastEmitBranch(TBB, DbgLoc);
    2400           0 :         return true;
    2401             :       }
    2402             : 
    2403             :       // Try to emit a combined compare-and-branch first.
    2404          92 :       if (emitCompareAndBranch(BI))
    2405             :         return true;
    2406             : 
    2407             :       // Try to take advantage of fallthrough opportunities.
    2408          36 :       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
    2409           9 :         std::swap(TBB, FBB);
    2410           9 :         Predicate = CmpInst::getInversePredicate(Predicate);
    2411             :       }
    2412             : 
    2413             :       // Emit the cmp.
    2414         108 :       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
    2415             :         return false;
    2416             : 
    2417             :       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
    2418             :       // instruction.
    2419          36 :       AArch64CC::CondCode CC = getCompareCC(Predicate);
    2420          36 :       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
    2421          36 :       switch (Predicate) {
    2422             :       default:
    2423             :         break;
    2424             :       case CmpInst::FCMP_UEQ:
    2425           1 :         ExtraCC = AArch64CC::EQ;
    2426           1 :         CC = AArch64CC::VS;
    2427             :         break;
    2428             :       case CmpInst::FCMP_ONE:
    2429             :         ExtraCC = AArch64CC::MI;
    2430             :         CC = AArch64CC::GT;
    2431             :         break;
    2432             :       }
    2433             :       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
    2434             : 
    2435             :       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
    2436             :       if (ExtraCC != AArch64CC::AL) {
    2437           4 :         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
    2438           4 :             .addImm(ExtraCC)
    2439           4 :             .addMBB(TBB);
    2440             :       }
    2441             : 
    2442             :       // Emit the branch.
    2443          72 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
    2444          72 :           .addImm(CC)
    2445          72 :           .addMBB(TBB);
    2446             : 
    2447          36 :       finishCondBranch(BI->getParent(), TBB, FBB);
    2448          36 :       return true;
    2449             :     }
    2450          40 :   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
    2451           2 :     uint64_t Imm = CI->getZExtValue();
    2452           2 :     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
    2453           6 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
    2454           2 :         .addMBB(Target);
    2455             : 
    2456             :     // Obtain the branch probability and add the target to the successor list.
    2457           2 :     if (FuncInfo.BPI) {
    2458             :       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
    2459           0 :           BI->getParent(), Target->getBasicBlock());
    2460           0 :       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
    2461             :     } else
    2462           2 :       FuncInfo.MBB->addSuccessorWithoutProb(Target);
    2463             :     return true;
    2464             :   } else {
    2465          36 :     AArch64CC::CondCode CC = AArch64CC::NE;
    2466          36 :     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
    2467             :       // Fake request the condition, otherwise the intrinsic might be completely
    2468             :       // optimized away.
    2469          28 :       unsigned CondReg = getRegForValue(BI->getCondition());
    2470          14 :       if (!CondReg)
    2471          14 :         return false;
    2472             : 
    2473             :       // Emit the branch.
    2474          28 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
    2475          28 :         .addImm(CC)
    2476          28 :         .addMBB(TBB);
    2477             : 
    2478          14 :       finishCondBranch(BI->getParent(), TBB, FBB);
    2479          14 :       return true;
    2480             :     }
    2481             :   }
    2482             : 
    2483          46 :   unsigned CondReg = getRegForValue(BI->getCondition());
    2484          23 :   if (CondReg == 0)
    2485             :     return false;
    2486          46 :   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
    2487             : 
    2488             :   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
    2489          23 :   unsigned Opcode = AArch64::TBNZW;
    2490          23 :   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
    2491          17 :     std::swap(TBB, FBB);
    2492          17 :     Opcode = AArch64::TBZW;
    2493             :   }
    2494             : 
    2495          46 :   const MCInstrDesc &II = TII.get(Opcode);
    2496             :   unsigned ConstrainedCondReg
    2497          23 :     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
    2498          46 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    2499          23 :       .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
    2500          23 :       .addImm(0)
    2501          46 :       .addMBB(TBB);
    2502             : 
    2503          23 :   finishCondBranch(BI->getParent(), TBB, FBB);
    2504          23 :   return true;
    2505             : }
    2506             : 
    2507           1 : bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
    2508           1 :   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
    2509           2 :   unsigned AddrReg = getRegForValue(BI->getOperand(0));
    2510           1 :   if (AddrReg == 0)
    2511             :     return false;
    2512             : 
    2513             :   // Emit the indirect branch.
    2514           2 :   const MCInstrDesc &II = TII.get(AArch64::BR);
    2515           1 :   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
    2516           1 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
    2517             : 
    2518             :   // Make sure the CFG is up-to-date.
    2519           7 :   for (auto *Succ : BI->successors())
    2520           6 :     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
    2521             : 
    2522           1 :   return true;
    2523             : }
    2524             : 
    2525          57 : bool AArch64FastISel::selectCmp(const Instruction *I) {
    2526          57 :   const CmpInst *CI = cast<CmpInst>(I);
    2527             : 
    2528             :   // Vectors of i1 are weird: bail out.
    2529         114 :   if (CI->getType()->isVectorTy())
    2530             :     return false;
    2531             : 
    2532             :   // Try to optimize or fold the cmp.
    2533          51 :   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
    2534          51 :   unsigned ResultReg = 0;
    2535          51 :   switch (Predicate) {
    2536             :   default:
    2537             :     break;
    2538           1 :   case CmpInst::FCMP_FALSE:
    2539           1 :     ResultReg = createResultReg(&AArch64::GPR32RegClass);
    2540           2 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    2541           2 :             TII.get(TargetOpcode::COPY), ResultReg)
    2542           1 :         .addReg(AArch64::WZR, getKillRegState(true));
    2543           1 :     break;
    2544           1 :   case CmpInst::FCMP_TRUE:
    2545           2 :     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
    2546           1 :     break;
    2547             :   }
    2548             : 
    2549           2 :   if (ResultReg) {
    2550           2 :     updateValueMap(I, ResultReg);
    2551           2 :     return true;
    2552             :   }
    2553             : 
    2554             :   // Emit the cmp.
    2555         147 :   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
    2556             :     return false;
    2557             : 
    2558          49 :   ResultReg = createResultReg(&AArch64::GPR32RegClass);
    2559             : 
    2560             :   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
    2561             :   // condition codes are inverted, because they are used by CSINC.
    2562             :   static unsigned CondCodeTable[2][2] = {
    2563             :     { AArch64CC::NE, AArch64CC::VC },
    2564             :     { AArch64CC::PL, AArch64CC::LE }
    2565             :   };
    2566          49 :   unsigned *CondCodes = nullptr;
    2567          49 :   switch (Predicate) {
    2568             :   default:
    2569             :     break;
    2570             :   case CmpInst::FCMP_UEQ:
    2571             :     CondCodes = &CondCodeTable[0][0];
    2572             :     break;
    2573           1 :   case CmpInst::FCMP_ONE:
    2574           1 :     CondCodes = &CondCodeTable[1][0];
    2575           1 :     break;
    2576             :   }
    2577             : 
    2578             :   if (CondCodes) {
    2579           2 :     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
    2580           6 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
    2581           4 :             TmpReg1)
    2582           2 :         .addReg(AArch64::WZR, getKillRegState(true))
    2583           2 :         .addReg(AArch64::WZR, getKillRegState(true))
    2584           4 :         .addImm(CondCodes[0]);
    2585           6 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
    2586           4 :             ResultReg)
    2587           2 :         .addReg(TmpReg1, getKillRegState(true))
    2588           2 :         .addReg(AArch64::WZR, getKillRegState(true))
    2589           4 :         .addImm(CondCodes[1]);
    2590             : 
    2591           2 :     updateValueMap(I, ResultReg);
    2592           2 :     return true;
    2593             :   }
    2594             : 
    2595             :   // Now set a register based on the comparison.
    2596          47 :   AArch64CC::CondCode CC = getCompareCC(Predicate);
    2597             :   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
    2598          47 :   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
    2599         141 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
    2600          94 :           ResultReg)
    2601          47 :       .addReg(AArch64::WZR, getKillRegState(true))
    2602          47 :       .addReg(AArch64::WZR, getKillRegState(true))
    2603          94 :       .addImm(invertedCC);
    2604             : 
    2605          47 :   updateValueMap(I, ResultReg);
    2606          47 :   return true;
    2607             : }
    2608             : 
    2609             : /// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
    2610             : /// value.
    2611          53 : bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
    2612          53 :   if (!SI->getType()->isIntegerTy(1))
    2613             :     return false;
    2614             : 
    2615             :   const Value *Src1Val, *Src2Val;
    2616           6 :   unsigned Opc = 0;
    2617           6 :   bool NeedExtraOp = false;
    2618           9 :   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
    2619           3 :     if (CI->isOne()) {
    2620           2 :       Src1Val = SI->getCondition();
    2621           2 :       Src2Val = SI->getFalseValue();
    2622           2 :       Opc = AArch64::ORRWrr;
    2623             :     } else {
    2624             :       assert(CI->isZero());
    2625           1 :       Src1Val = SI->getFalseValue();
    2626           1 :       Src2Val = SI->getCondition();
    2627           1 :       Opc = AArch64::BICWrr;
    2628             :     }
    2629           5 :   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
    2630           2 :     if (CI->isOne()) {
    2631           1 :       Src1Val = SI->getCondition();
    2632           1 :       Src2Val = SI->getTrueValue();
    2633           1 :       Opc = AArch64::ORRWrr;
    2634           1 :       NeedExtraOp = true;
    2635             :     } else {
    2636             :       assert(CI->isZero());
    2637           1 :       Src1Val = SI->getCondition();
    2638           1 :       Src2Val = SI->getTrueValue();
    2639           1 :       Opc = AArch64::ANDWrr;
    2640             :     }
    2641             :   }
    2642             : 
    2643           6 :   if (!Opc)
    2644             :     return false;
    2645             : 
    2646           5 :   unsigned Src1Reg = getRegForValue(Src1Val);
    2647           5 :   if (!Src1Reg)
    2648             :     return false;
    2649           5 :   bool Src1IsKill = hasTrivialKill(Src1Val);
    2650             : 
    2651           5 :   unsigned Src2Reg = getRegForValue(Src2Val);
    2652           5 :   if (!Src2Reg)
    2653             :     return false;
    2654           5 :   bool Src2IsKill = hasTrivialKill(Src2Val);
    2655             : 
    2656           5 :   if (NeedExtraOp) {
    2657           2 :     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
    2658           1 :     Src1IsKill = true;
    2659             :   }
    2660           5 :   unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
    2661           5 :                                        Src1IsKill, Src2Reg, Src2IsKill);
    2662           5 :   updateValueMap(SI, ResultReg);
    2663           5 :   return true;
    2664             : }
    2665             : 
    2666          53 : bool AArch64FastISel::selectSelect(const Instruction *I) {
    2667             :   assert(isa<SelectInst>(I) && "Expected a select instruction.");
    2668          53 :   MVT VT;
    2669          53 :   if (!isTypeSupported(I->getType(), VT))
    2670             :     return false;
    2671             : 
    2672             :   unsigned Opc;
    2673             :   const TargetRegisterClass *RC;
    2674          53 :   switch (VT.SimpleTy) {
    2675             :   default:
    2676             :     return false;
    2677             :   case MVT::i1:
    2678             :   case MVT::i8:
    2679             :   case MVT::i16:
    2680             :   case MVT::i32:
    2681             :     Opc = AArch64::CSELWr;
    2682             :     RC = &AArch64::GPR32RegClass;
    2683             :     break;
    2684           8 :   case MVT::i64:
    2685           8 :     Opc = AArch64::CSELXr;
    2686           8 :     RC = &AArch64::GPR64RegClass;
    2687           8 :     break;
    2688          26 :   case MVT::f32:
    2689          26 :     Opc = AArch64::FCSELSrrr;
    2690          26 :     RC = &AArch64::FPR32RegClass;
    2691          26 :     break;
    2692           1 :   case MVT::f64:
    2693           1 :     Opc = AArch64::FCSELDrrr;
    2694           1 :     RC = &AArch64::FPR64RegClass;
    2695           1 :     break;
    2696             :   }
    2697             : 
    2698          53 :   const SelectInst *SI = cast<SelectInst>(I);
    2699          53 :   const Value *Cond = SI->getCondition();
    2700          53 :   AArch64CC::CondCode CC = AArch64CC::NE;
    2701          53 :   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
    2702             : 
    2703          53 :   if (optimizeSelect(SI))
    2704             :     return true;
    2705             : 
    2706             :   // Try to pickup the flags, so we don't have to emit another compare.
    2707          48 :   if (foldXALUIntrinsic(CC, I, Cond)) {
    2708             :     // Fake request the condition to force emission of the XALU intrinsic.
    2709          12 :     unsigned CondReg = getRegForValue(Cond);
    2710          12 :     if (!CondReg)
    2711             :       return false;
    2712         147 :   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
    2713          27 :              isValueAvailable(Cond)) {
    2714          54 :     const auto *Cmp = cast<CmpInst>(Cond);
    2715             :     // Try to optimize or fold the cmp.
    2716          27 :     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
    2717          27 :     const Value *FoldSelect = nullptr;
    2718          27 :     switch (Predicate) {
    2719             :     default:
    2720             :       break;
    2721           1 :     case CmpInst::FCMP_FALSE:
    2722           1 :       FoldSelect = SI->getFalseValue();
    2723           1 :       break;
    2724           1 :     case CmpInst::FCMP_TRUE:
    2725           1 :       FoldSelect = SI->getTrueValue();
    2726           1 :       break;
    2727             :     }
    2728             : 
    2729           2 :     if (FoldSelect) {
    2730           2 :       unsigned SrcReg = getRegForValue(FoldSelect);
    2731           2 :       if (!SrcReg)
    2732             :         return false;
    2733           2 :       unsigned UseReg = lookUpRegForValue(SI);
    2734           2 :       if (UseReg)
    2735           2 :         MRI.clearKillFlags(UseReg);
    2736             : 
    2737           2 :       updateValueMap(I, SrcReg);
    2738           2 :       return true;
    2739             :     }
    2740             : 
    2741             :     // Emit the cmp.
    2742          75 :     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
    2743             :       return false;
    2744             : 
    2745             :     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
    2746          25 :     CC = getCompareCC(Predicate);
    2747          25 :     switch (Predicate) {
    2748             :     default:
    2749             :       break;
    2750           1 :     case CmpInst::FCMP_UEQ:
    2751           1 :       ExtraCC = AArch64CC::EQ;
    2752           1 :       CC = AArch64CC::VS;
    2753           1 :       break;
    2754           1 :     case CmpInst::FCMP_ONE:
    2755           1 :       ExtraCC = AArch64CC::MI;
    2756           1 :       CC = AArch64CC::GT;
    2757           1 :       break;
    2758             :     }
    2759             :     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
    2760             :   } else {
    2761           9 :     unsigned CondReg = getRegForValue(Cond);
    2762           9 :     if (!CondReg)
    2763             :       return false;
    2764           9 :     bool CondIsKill = hasTrivialKill(Cond);
    2765             : 
    2766          18 :     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
    2767           9 :     CondReg = constrainOperandRegClass(II, CondReg, 1);
    2768             : 
    2769             :     // Emit a TST instruction (ANDS wzr, reg, #imm).
    2770          18 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
    2771           9 :             AArch64::WZR)
    2772           9 :         .addReg(CondReg, getKillRegState(CondIsKill))
    2773          18 :         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
    2774             :   }
    2775             : 
    2776          92 :   unsigned Src1Reg = getRegForValue(SI->getTrueValue());
    2777          92 :   bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
    2778             : 
    2779          92 :   unsigned Src2Reg = getRegForValue(SI->getFalseValue());
    2780          92 :   bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
    2781             : 
    2782          46 :   if (!Src1Reg || !Src2Reg)
    2783             :     return false;
    2784             : 
    2785          46 :   if (ExtraCC != AArch64CC::AL) {
    2786           2 :     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
    2787             :                                Src2IsKill, ExtraCC);
    2788           2 :     Src2IsKill = true;
    2789             :   }
    2790          46 :   unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
    2791          46 :                                         Src2IsKill, CC);
    2792          46 :   updateValueMap(I, ResultReg);
    2793          46 :   return true;
    2794             : }
    2795             : 
    2796           7 : bool AArch64FastISel::selectFPExt(const Instruction *I) {
    2797          14 :   Value *V = I->getOperand(0);
    2798          19 :   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
    2799             :     return false;
    2800             : 
    2801           5 :   unsigned Op = getRegForValue(V);
    2802           5 :   if (Op == 0)
    2803             :     return false;
    2804             : 
    2805           5 :   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
    2806          15 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
    2807          15 :           ResultReg).addReg(Op);
    2808           5 :   updateValueMap(I, ResultReg);
    2809           5 :   return true;
    2810             : }
    2811             : 
    2812           2 : bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
    2813           4 :   Value *V = I->getOperand(0);
    2814           5 :   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
    2815             :     return false;
    2816             : 
    2817           1 :   unsigned Op = getRegForValue(V);
    2818           1 :   if (Op == 0)
    2819             :     return false;
    2820             : 
    2821           1 :   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
    2822           3 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
    2823           3 :           ResultReg).addReg(Op);
    2824           1 :   updateValueMap(I, ResultReg);
    2825           1 :   return true;
    2826             : }
    2827             : 
    2828             : // FPToUI and FPToSI
    2829          19 : bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
    2830          19 :   MVT DestVT;
    2831          38 :   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
    2832             :     return false;
    2833             : 
    2834          36 :   unsigned SrcReg = getRegForValue(I->getOperand(0));
    2835          18 :   if (SrcReg == 0)
    2836             :     return false;
    2837             : 
    2838          36 :   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
    2839          34 :   if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
    2840             :     return false;
    2841             : 
    2842             :   unsigned Opc;
    2843          28 :   if (SrcVT == MVT::f64) {
    2844           7 :     if (Signed)
    2845           0 :       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
    2846             :     else
    2847          14 :       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
    2848             :   } else {
    2849           7 :     if (Signed)
    2850           0 :       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
    2851             :     else
    2852          14 :       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
    2853             :   }
    2854          28 :   unsigned ResultReg = createResultReg(
    2855          42 :       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
    2856          42 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
    2857          14 :       .addReg(SrcReg);
    2858          14 :   updateValueMap(I, ResultReg);
    2859          14 :   return true;
    2860             : }
    2861             : 
    2862          30 : bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
    2863          30 :   MVT DestVT;
    2864          60 :   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
    2865             :     return false;
    2866             :   // Let regular ISEL handle FP16
    2867          28 :   if (DestVT == MVT::f16)
    2868             :     return false;
    2869             : 
    2870             :   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
    2871             :          "Unexpected value type.");
    2872             : 
    2873          36 :   unsigned SrcReg = getRegForValue(I->getOperand(0));
    2874          18 :   if (!SrcReg)
    2875             :     return false;
    2876          36 :   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
    2877             : 
    2878          36 :   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
    2879             : 
    2880             :   // Handle sign-extension.
    2881          48 :   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
    2882           6 :     SrcReg =
    2883          18 :         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
    2884           6 :     if (!SrcReg)
    2885             :       return false;
    2886             :     SrcIsKill = true;
    2887             :   }
    2888             : 
    2889             :   unsigned Opc;
    2890          36 :   if (SrcVT == MVT::i64) {
    2891           6 :     if (Signed)
    2892           0 :       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
    2893             :     else
    2894          12 :       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
    2895             :   } else {
    2896          12 :     if (Signed)
    2897           6 :       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
    2898             :     else
    2899          18 :       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
    2900             :   }
    2901             : 
    2902          18 :   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
    2903          18 :                                       SrcIsKill);
    2904          18 :   updateValueMap(I, ResultReg);
    2905          18 :   return true;
    2906             : }
    2907             : 
    2908        1214 : bool AArch64FastISel::fastLowerArguments() {
    2909        1214 :   if (!FuncInfo.CanLowerReturn)
    2910             :     return false;
    2911             : 
    2912        1214 :   const Function *F = FuncInfo.Fn;
    2913        1214 :   if (F->isVarArg())
    2914             :     return false;
    2915             : 
    2916        1213 :   CallingConv::ID CC = F->getCallingConv();
    2917        1213 :   if (CC != CallingConv::C && CC != CallingConv::Swift)
    2918             :     return false;
    2919             : 
    2920             :   // Only handle simple cases of up to 8 GPR and FPR each.
    2921        1207 :   unsigned GPRCnt = 0;
    2922        1207 :   unsigned FPRCnt = 0;
    2923        3110 :   for (auto const &Arg : F->args()) {
    2924        4028 :     if (Arg.hasAttribute(Attribute::ByVal) ||
    2925        4028 :         Arg.hasAttribute(Attribute::InReg) ||
    2926        4026 :         Arg.hasAttribute(Attribute::StructRet) ||
    2927        4017 :         Arg.hasAttribute(Attribute::SwiftSelf) ||
    2928        6017 :         Arg.hasAttribute(Attribute::SwiftError) ||
    2929        1998 :         Arg.hasAttribute(Attribute::Nest))
    2930         111 :       return false;
    2931             : 
    2932        1998 :     Type *ArgTy = Arg.getType();
    2933        3996 :     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
    2934             :       return false;
    2935             : 
    2936        1992 :     EVT ArgVT = TLI.getValueType(DL, ArgTy);
    2937        1992 :     if (!ArgVT.isSimple())
    2938             :       return false;
    2939             : 
    2940        3984 :     MVT VT = ArgVT.getSimpleVT().SimpleTy;
    2941        1992 :     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
    2942             :       return false;
    2943             : 
    2944        2112 :     if (VT.isVector() &&
    2945         240 :         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
    2946             :       return false;
    2947             : 
    2948        1921 :     if (VT >= MVT::i1 && VT <= MVT::i64)
    2949        1593 :       ++GPRCnt;
    2950         693 :     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
    2951          41 :              VT.is128BitVector())
    2952         316 :       ++FPRCnt;
    2953             :     else
    2954             :       return false;
    2955             : 
    2956        1909 :     if (GPRCnt > 8 || FPRCnt > 8)
    2957             :       return false;
    2958             :   }
    2959             : 
    2960             :   static const MCPhysReg Registers[6][8] = {
    2961             :     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
    2962             :       AArch64::W5, AArch64::W6, AArch64::W7 },
    2963             :     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
    2964             :       AArch64::X5, AArch64::X6, AArch64::X7 },
    2965             :     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
    2966             :       AArch64::H5, AArch64::H6, AArch64::H7 },
    2967             :     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
    2968             :       AArch64::S5, AArch64::S6, AArch64::S7 },
    2969             :     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
    2970             :       AArch64::D5, AArch64::D6, AArch64::D7 },
    2971             :     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
    2972             :       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
    2973             :   };
    2974             : 
    2975        1096 :   unsigned GPRIdx = 0;
    2976        1096 :   unsigned FPRIdx = 0;
    2977        2930 :   for (auto const &Arg : F->args()) {
    2978        3668 :     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
    2979             :     unsigned SrcReg;
    2980             :     const TargetRegisterClass *RC;
    2981        1834 :     if (VT >= MVT::i1 && VT <= MVT::i32) {
    2982         657 :       SrcReg = Registers[0][GPRIdx++];
    2983         657 :       RC = &AArch64::GPR32RegClass;
    2984         657 :       VT = MVT::i32;
    2985        1177 :     } else if (VT == MVT::i64) {
    2986         870 :       SrcReg = Registers[1][GPRIdx++];
    2987         870 :       RC = &AArch64::GPR64RegClass;
    2988         307 :     } else if (VT == MVT::f16) {
    2989           2 :       SrcReg = Registers[2][FPRIdx++];
    2990           2 :       RC = &AArch64::FPR16RegClass;
    2991         305 :     } else if (VT ==  MVT::f32) {
    2992         193 :       SrcReg = Registers[3][FPRIdx++];
    2993         193 :       RC = &AArch64::FPR32RegClass;
    2994         112 :     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
    2995          83 :       SrcReg = Registers[4][FPRIdx++];
    2996          83 :       RC = &AArch64::FPR64RegClass;
    2997          29 :     } else if (VT.is128BitVector()) {
    2998          29 :       SrcReg = Registers[5][FPRIdx++];
    2999          29 :       RC = &AArch64::FPR128RegClass;
    3000             :     } else
    3001           0 :       llvm_unreachable("Unexpected value type.");
    3002             : 
    3003        1834 :     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
    3004             :     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
    3005             :     // Without this, EmitLiveInCopies may eliminate the livein if its only
    3006             :     // use is a bitcast (which isn't turned into an instruction).
    3007        1834 :     unsigned ResultReg = createResultReg(RC);
    3008        3668 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3009        3668 :             TII.get(TargetOpcode::COPY), ResultReg)
    3010        1834 :         .addReg(DstReg, getKillRegState(true));
    3011        1834 :     updateValueMap(&Arg, ResultReg);
    3012             :   }
    3013             :   return true;
    3014             : }
    3015             : 
    3016         127 : bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
    3017             :                                       SmallVectorImpl<MVT> &OutVTs,
    3018             :                                       unsigned &NumBytes) {
    3019         127 :   CallingConv::ID CC = CLI.CallConv;
    3020         254 :   SmallVector<CCValAssign, 16> ArgLocs;
    3021         254 :   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
    3022         254 :   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
    3023             : 
    3024             :   // Get a count of how many bytes are to be pushed on the stack.
    3025         127 :   NumBytes = CCInfo.getNextStackOffset();
    3026             : 
    3027             :   // Issue CALLSEQ_START
    3028         127 :   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
    3029         254 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
    3030         381 :     .addImm(NumBytes).addImm(0);
    3031             : 
    3032             :   // Process the args.
    3033        1701 :   for (CCValAssign &VA : ArgLocs) {
    3034        2648 :     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
    3035        2648 :     MVT ArgVT = OutVTs[VA.getValNo()];
    3036             : 
    3037        1324 :     unsigned ArgReg = getRegForValue(ArgVal);
    3038        1324 :     if (!ArgReg)
    3039           4 :       return false;
    3040             : 
    3041             :     // Handle arg promotion: SExt, ZExt, AExt.
    3042        1322 :     switch (VA.getLocInfo()) {
    3043             :     case CCValAssign::Full:
    3044             :       break;
    3045          15 :     case CCValAssign::SExt: {
    3046          15 :       MVT DestVT = VA.getLocVT();
    3047          15 :       MVT SrcVT = ArgVT;
    3048          15 :       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
    3049          15 :       if (!ArgReg)
    3050           0 :         return false;
    3051          15 :       break;
    3052             :     }
    3053          84 :     case CCValAssign::AExt:
    3054             :     // Intentional fall-through.
    3055             :     case CCValAssign::ZExt: {
    3056          84 :       MVT DestVT = VA.getLocVT();
    3057          84 :       MVT SrcVT = ArgVT;
    3058          84 :       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
    3059          84 :       if (!ArgReg)
    3060           0 :         return false;
    3061          84 :       break;
    3062             :     }
    3063           0 :     default:
    3064           0 :       llvm_unreachable("Unknown arg promotion!");
    3065             :     }
    3066             : 
    3067             :     // Now copy/store arg to correct locations.
    3068        1580 :     if (VA.isRegLoc() && !VA.needsCustom()) {
    3069         516 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3070         774 :               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
    3071         258 :       CLI.OutRegs.push_back(VA.getLocReg());
    3072        1064 :     } else if (VA.needsCustom()) {
    3073             :       // FIXME: Handle custom args.
    3074             :       return false;
    3075             :     } else {
    3076             :       assert(VA.isMemLoc() && "Assuming store on stack.");
    3077             : 
    3078             :       // Don't emit stores for undef values.
    3079        2128 :       if (isa<UndefValue>(ArgVal))
    3080        1032 :         continue;
    3081             : 
    3082             :       // Need to store on the stack.
    3083          32 :       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
    3084             : 
    3085          32 :       unsigned BEAlign = 0;
    3086          32 :       if (ArgSize < 8 && !Subtarget->isLittleEndian())
    3087           2 :         BEAlign = 8 - ArgSize;
    3088             : 
    3089          32 :       Address Addr;
    3090          32 :       Addr.setKind(Address::RegBase);
    3091          64 :       Addr.setReg(AArch64::SP);
    3092          64 :       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
    3093             : 
    3094          32 :       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
    3095         160 :       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
    3096          32 :           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
    3097          64 :           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
    3098             : 
    3099          32 :       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
    3100           2 :         return false;
    3101             :     }
    3102             :   }
    3103             :   return true;
    3104             : }
    3105             : 
    3106         123 : bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
    3107             :                                  unsigned NumBytes) {
    3108         123 :   CallingConv::ID CC = CLI.CallConv;
    3109             : 
    3110             :   // Issue CALLSEQ_END
    3111         123 :   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
    3112         369 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
    3113         369 :     .addImm(NumBytes).addImm(0);
    3114             : 
    3115             :   // Now the return value.
    3116         246 :   if (RetVT != MVT::isVoid) {
    3117         124 :     SmallVector<CCValAssign, 16> RVLocs;
    3118         124 :     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
    3119         134 :     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
    3120             : 
    3121             :     // Only handle a single return value.
    3122          67 :     if (RVLocs.size() != 1)
    3123          10 :       return false;
    3124             : 
    3125             :     // Copy all of the result registers out of their specified physreg.
    3126          67 :     MVT CopyVT = RVLocs[0].getValVT();
    3127             : 
    3128             :     // TODO: Handle big-endian results
    3129          67 :     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
    3130             :       return false;
    3131             : 
    3132          57 :     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
    3133         114 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3134         114 :             TII.get(TargetOpcode::COPY), ResultReg)
    3135          57 :         .addReg(RVLocs[0].getLocReg());
    3136         114 :     CLI.InRegs.push_back(RVLocs[0].getLocReg());
    3137             : 
    3138          57 :     CLI.ResultReg = ResultReg;
    3139          57 :     CLI.NumResultRegs = 1;
    3140             :   }
    3141             : 
    3142             :   return true;
    3143             : }
    3144             : 
    3145         238 : bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
    3146         238 :   CallingConv::ID CC  = CLI.CallConv;
    3147         238 :   bool IsTailCall     = CLI.IsTailCall;
    3148         238 :   bool IsVarArg       = CLI.IsVarArg;
    3149         238 :   const Value *Callee = CLI.Callee;
    3150         238 :   MCSymbol *Symbol = CLI.Symbol;
    3151             : 
    3152         238 :   if (!Callee && !Symbol)
    3153             :     return false;
    3154             : 
    3155             :   // Allow SelectionDAG isel to handle tail calls.
    3156         238 :   if (IsTailCall)
    3157             :     return false;
    3158             : 
    3159         216 :   CodeModel::Model CM = TM.getCodeModel();
    3160             :   // Only support the small-addressing and large code models.
    3161         216 :   if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
    3162             :     return false;
    3163             : 
    3164             :   // FIXME: Add large code model support for ELF.
    3165         232 :   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
    3166             :     return false;
    3167             : 
    3168             :   // Let SDISel handle vararg functions.
    3169         216 :   if (IsVarArg)
    3170             :     return false;
    3171             : 
    3172             :   // FIXME: Only handle *simple* calls for now.
    3173         212 :   MVT RetVT;
    3174         424 :   if (CLI.RetTy->isVoidTy())
    3175          60 :     RetVT = MVT::isVoid;
    3176         152 :   else if (!isTypeLegal(CLI.RetTy, RetVT))
    3177             :     return false;
    3178             : 
    3179        2000 :   for (auto Flag : CLI.OutFlags)
    3180        7010 :     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
    3181        4202 :         Flag.isSwiftSelf() || Flag.isSwiftError())
    3182           5 :       return false;
    3183             : 
    3184             :   // Set up the argument vectors.
    3185         196 :   SmallVector<MVT, 16> OutVTs;
    3186         588 :   OutVTs.reserve(CLI.OutVals.size());
    3187             : 
    3188        1915 :   for (auto *Val : CLI.OutVals) {
    3189        1396 :     MVT VT;
    3190        1504 :     if (!isTypeLegal(Val->getType(), VT) &&
    3191         306 :         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
    3192          69 :       return false;
    3193             : 
    3194             :     // We don't handle vector parameters yet.
    3195        1387 :     if (VT.isVector() || VT.getSizeInBits() > 64)
    3196             :       return false;
    3197             : 
    3198        1327 :     OutVTs.push_back(VT);
    3199             :   }
    3200             : 
    3201         127 :   Address Addr;
    3202         127 :   if (Callee && !computeCallAddress(Callee, Addr))
    3203             :     return false;
    3204             : 
    3205             :   // Handle the arguments now that we've gotten them.
    3206             :   unsigned NumBytes;
    3207         127 :   if (!processCallArgs(CLI, OutVTs, NumBytes))
    3208             :     return false;
    3209             : 
    3210             :   // Issue the call.
    3211         123 :   MachineInstrBuilder MIB;
    3212         246 :   if (Subtarget->useSmallAddressing()) {
    3213         214 :     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
    3214         107 :     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
    3215         107 :     if (Symbol)
    3216             :       MIB.addSym(Symbol, 0);
    3217          92 :     else if (Addr.getGlobalValue())
    3218          73 :       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
    3219          19 :     else if (Addr.getReg()) {
    3220          19 :       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
    3221          19 :       MIB.addReg(Reg);
    3222             :     } else
    3223             :       return false;
    3224             :   } else {
    3225          16 :     unsigned CallReg = 0;
    3226          16 :     if (Symbol) {
    3227           8 :       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
    3228          24 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
    3229          16 :               ADRPReg)
    3230           8 :           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
    3231             : 
    3232           8 :       CallReg = createResultReg(&AArch64::GPR64RegClass);
    3233          16 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3234          16 :               TII.get(AArch64::LDRXui), CallReg)
    3235           8 :           .addReg(ADRPReg)
    3236           8 :           .addSym(Symbol,
    3237             :                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
    3238           8 :     } else if (Addr.getGlobalValue())
    3239           7 :       CallReg = materializeGV(Addr.getGlobalValue());
    3240           1 :     else if (Addr.getReg())
    3241             :       CallReg = Addr.getReg();
    3242             : 
    3243          16 :     if (!CallReg)
    3244             :       return false;
    3245             : 
    3246          32 :     const MCInstrDesc &II = TII.get(AArch64::BLR);
    3247          16 :     CallReg = constrainOperandRegClass(II, CallReg, 0);
    3248          16 :     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
    3249             :   }
    3250             : 
    3251             :   // Add implicit physical register uses to the call.
    3252         610 :   for (auto Reg : CLI.OutRegs)
    3253         241 :     MIB.addReg(Reg, RegState::Implicit);
    3254             : 
    3255             :   // Add a register mask with the call-preserved registers.
    3256             :   // Proper defs for return values will be added by setPhysRegsDeadExcept().
    3257         246 :   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
    3258             : 
    3259         123 :   CLI.Call = MIB;
    3260             : 
    3261             :   // Finish off the call including any return values.
    3262         123 :   return finishCall(CLI, RetVT, NumBytes);
    3263             : }
    3264             : 
    3265             : bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
    3266          31 :   if (Alignment)
    3267          31 :     return Len / Alignment <= 4;
    3268             :   else
    3269           0 :     return Len < 32;
    3270             : }
    3271             : 
    3272          13 : bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
    3273             :                                          uint64_t Len, unsigned Alignment) {
    3274             :   // Make sure we don't bloat code by inlining very large memcpy's.
    3275          26 :   if (!isMemCpySmall(Len, Alignment))
    3276             :     return false;
    3277             : 
    3278             :   int64_t UnscaledOffset = 0;
    3279             :   Address OrigDest = Dest;
    3280             :   Address OrigSrc = Src;
    3281             : 
    3282          95 :   while (Len) {
    3283          41 :     MVT VT;
    3284          41 :     if (!Alignment || Alignment >= 8) {
    3285          29 :       if (Len >= 8)
    3286          27 :         VT = MVT::i64;
    3287           2 :       else if (Len >= 4)
    3288           0 :         VT = MVT::i32;
    3289           2 :       else if (Len >= 2)
    3290           0 :         VT = MVT::i16;
    3291             :       else {
    3292           2 :         VT = MVT::i8;
    3293             :       }
    3294             :     } else {
    3295             :       // Bound based on alignment.
    3296          12 :       if (Len >= 4 && Alignment == 4)
    3297           2 :         VT = MVT::i32;
    3298          10 :       else if (Len >= 2 && Alignment == 2)
    3299           3 :         VT = MVT::i16;
    3300             :       else {
    3301           7 :         VT = MVT::i8;
    3302             :       }
    3303             :     }
    3304             : 
    3305          41 :     unsigned ResultReg = emitLoad(VT, VT, Src);
    3306          41 :     if (!ResultReg)
    3307           0 :       return false;
    3308             : 
    3309          41 :     if (!emitStore(VT, ResultReg, Dest))
    3310             :       return false;
    3311             : 
    3312          41 :     int64_t Size = VT.getSizeInBits() / 8;
    3313          41 :     Len -= Size;
    3314          41 :     UnscaledOffset += Size;
    3315             : 
    3316             :     // We need to recompute the unscaled offset for each iteration.
    3317          82 :     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
    3318          82 :     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
    3319             :   }
    3320             : 
    3321             :   return true;
    3322             : }
    3323             : 
    3324             : /// \brief Check if it is possible to fold the condition from the XALU intrinsic
    3325             : /// into the user. The condition code will only be updated on success.
    3326          84 : bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
    3327             :                                         const Instruction *I,
    3328             :                                         const Value *Cond) {
    3329          84 :   if (!isa<ExtractValueInst>(Cond))
    3330             :     return false;
    3331             : 
    3332          52 :   const auto *EV = cast<ExtractValueInst>(Cond);
    3333          26 :   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
    3334             :     return false;
    3335             : 
    3336          52 :   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
    3337          26 :   MVT RetVT;
    3338          52 :   const Function *Callee = II->getCalledFunction();
    3339             :   Type *RetTy =
    3340          52 :   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
    3341          26 :   if (!isTypeLegal(RetTy, RetVT))
    3342             :     return false;
    3343             : 
    3344          52 :   if (RetVT != MVT::i32 && RetVT != MVT::i64)
    3345             :     return false;
    3346             : 
    3347          52 :   const Value *LHS = II->getArgOperand(0);
    3348          52 :   const Value *RHS = II->getArgOperand(1);
    3349             : 
    3350             :   // Canonicalize immediate to the RHS.
    3351          52 :   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
    3352           0 :       isCommutativeIntrinsic(II))
    3353             :     std::swap(LHS, RHS);
    3354             : 
    3355             :   // Simplify multiplies.
    3356          26 :   Intrinsic::ID IID = II->getIntrinsicID();
    3357          26 :   switch (IID) {
    3358             :   default:
    3359             :     break;
    3360           5 :   case Intrinsic::smul_with_overflow:
    3361           6 :     if (const auto *C = dyn_cast<ConstantInt>(RHS))
    3362           1 :       if (C->getValue() == 2)
    3363             :         IID = Intrinsic::sadd_with_overflow;
    3364             :     break;
    3365           5 :   case Intrinsic::umul_with_overflow:
    3366           6 :     if (const auto *C = dyn_cast<ConstantInt>(RHS))
    3367           1 :       if (C->getValue() == 2)
    3368             :         IID = Intrinsic::uadd_with_overflow;
    3369             :     break;
    3370             :   }
    3371             : 
    3372             :   AArch64CC::CondCode TmpCC;
    3373          24 :   switch (IID) {
    3374             :   default:
    3375             :     return false;
    3376             :   case Intrinsic::sadd_with_overflow:
    3377             :   case Intrinsic::ssub_with_overflow:
    3378             :     TmpCC = AArch64CC::VS;
    3379             :     break;
    3380             :   case Intrinsic::uadd_with_overflow:
    3381             :     TmpCC = AArch64CC::HS;
    3382             :     break;
    3383           4 :   case Intrinsic::usub_with_overflow:
    3384           4 :     TmpCC = AArch64CC::LO;
    3385           4 :     break;
    3386           8 :   case Intrinsic::smul_with_overflow:
    3387             :   case Intrinsic::umul_with_overflow:
    3388           8 :     TmpCC = AArch64CC::NE;
    3389           8 :     break;
    3390             :   }
    3391             : 
    3392             :   // Check if both instructions are in the same basic block.
    3393          26 :   if (!isValueAvailable(II))
    3394             :     return false;
    3395             : 
    3396             :   // Make sure nothing is in the way
    3397          26 :   BasicBlock::const_iterator Start(I);
    3398          26 :   BasicBlock::const_iterator End(II);
    3399          66 :   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
    3400             :     // We only expect extractvalue instructions between the intrinsic and the
    3401             :     // instruction to be selected.
    3402          40 :     if (!isa<ExtractValueInst>(Itr))
    3403             :       return false;
    3404             : 
    3405             :     // Check that the extractvalue operand comes from the intrinsic.
    3406          40 :     const auto *EVI = cast<ExtractValueInst>(Itr);
    3407          40 :     if (EVI->getAggregateOperand() != II)
    3408             :       return false;
    3409             :   }
    3410             : 
    3411          26 :   CC = TmpCC;
    3412          26 :   return true;
    3413             : }
    3414             : 
    3415          94 : bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
    3416             :   // FIXME: Handle more intrinsics.
    3417         188 :   switch (II->getIntrinsicID()) {
    3418             :   default: return false;
    3419           2 :   case Intrinsic::frameaddress: {
    3420           2 :     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
    3421           2 :     MFI.setFrameAddressIsTaken(true);
    3422             : 
    3423           4 :     const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
    3424           2 :     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
    3425           2 :     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
    3426           4 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3427           6 :             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
    3428             :     // Recursively load frame address
    3429             :     // ldr x0, [fp]
    3430             :     // ldr x0, [x0]
    3431             :     // ldr x0, [x0]
    3432             :     // ...
    3433             :     unsigned DestReg;
    3434           8 :     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
    3435           6 :     while (Depth--) {
    3436           2 :       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
    3437             :                                 SrcReg, /*IsKill=*/true, 0);
    3438             :       assert(DestReg && "Unexpected LDR instruction emission failure.");
    3439           2 :       SrcReg = DestReg;
    3440             :     }
    3441             : 
    3442           2 :     updateValueMap(II, SrcReg);
    3443           2 :     return true;
    3444             :   }
    3445          19 :   case Intrinsic::memcpy:
    3446             :   case Intrinsic::memmove: {
    3447          38 :     const auto *MTI = cast<MemTransferInst>(II);
    3448             :     // Don't handle volatile.
    3449          19 :     if (MTI->isVolatile())
    3450             :       return false;
    3451             : 
    3452             :     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
    3453             :     // we would emit dead code because we don't currently handle memmoves.
    3454          38 :     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
    3455          57 :     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
    3456             :       // Small memcpy's are common enough that we want to do them without a call
    3457             :       // if possible.
    3458          72 :       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
    3459          36 :       unsigned Alignment = MTI->getAlignment();
    3460          36 :       if (isMemCpySmall(Len, Alignment)) {
    3461          26 :         Address Dest, Src;
    3462          39 :         if (!computeAddress(MTI->getRawDest(), Dest) ||
    3463          13 :             !computeAddress(MTI->getRawSource(), Src))
    3464          13 :           return false;
    3465          13 :         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
    3466             :           return true;
    3467             :       }
    3468             :     }
    3469             : 
    3470          12 :     if (!MTI->getLength()->getType()->isIntegerTy(64))
    3471             :       return false;
    3472             : 
    3473          12 :     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
    3474             :       // Fast instruction selection doesn't support the special
    3475             :       // address spaces.
    3476             :       return false;
    3477             : 
    3478          12 :     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
    3479          12 :     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
    3480             :   }
    3481           1 :   case Intrinsic::memset: {
    3482           2 :     const MemSetInst *MSI = cast<MemSetInst>(II);
    3483             :     // Don't handle volatile.
    3484           1 :     if (MSI->isVolatile())
    3485             :       return false;
    3486             : 
    3487           2 :     if (!MSI->getLength()->getType()->isIntegerTy(64))
    3488             :       return false;
    3489             : 
    3490           2 :     if (MSI->getDestAddressSpace() > 255)
    3491             :       // Fast instruction selection doesn't support the special
    3492             :       // address spaces.
    3493             :       return false;
    3494             : 
    3495           2 :     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
    3496             :   }
    3497          12 :   case Intrinsic::sin:
    3498             :   case Intrinsic::cos:
    3499             :   case Intrinsic::pow: {
    3500          12 :     MVT RetVT;
    3501          12 :     if (!isTypeLegal(II->getType(), RetVT))
    3502             :       return false;
    3503             : 
    3504          24 :     if (RetVT != MVT::f32 && RetVT != MVT::f64)
    3505             :       return false;
    3506             : 
    3507             :     static const RTLIB::Libcall LibCallTable[3][2] = {
    3508             :       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
    3509             :       { RTLIB::COS_F32, RTLIB::COS_F64 },
    3510             :       { RTLIB::POW_F32, RTLIB::POW_F64 }
    3511             :     };
    3512             :     RTLIB::Libcall LC;
    3513          24 :     bool Is64Bit = RetVT == MVT::f64;
    3514          24 :     switch (II->getIntrinsicID()) {
    3515           0 :     default:
    3516           0 :       llvm_unreachable("Unexpected intrinsic.");
    3517           4 :     case Intrinsic::sin:
    3518           4 :       LC = LibCallTable[0][Is64Bit];
    3519           4 :       break;
    3520           4 :     case Intrinsic::cos:
    3521           4 :       LC = LibCallTable[1][Is64Bit];
    3522           4 :       break;
    3523           4 :     case Intrinsic::pow:
    3524           4 :       LC = LibCallTable[2][Is64Bit];
    3525           4 :       break;
    3526             :     }
    3527             : 
    3528          24 :     ArgListTy Args;
    3529          24 :     Args.reserve(II->getNumArgOperands());
    3530             : 
    3531             :     // Populate the argument list.
    3532          28 :     for (auto &Arg : II->arg_operands()) {
    3533          16 :       ArgListEntry Entry;
    3534          16 :       Entry.Val = Arg;
    3535          16 :       Entry.Ty = Arg->getType();
    3536          16 :       Args.push_back(Entry);
    3537             :     }
    3538             : 
    3539          24 :     CallLoweringInfo CLI;
    3540          12 :     MCContext &Ctx = MF->getContext();
    3541          12 :     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
    3542          60 :                   TLI.getLibcallName(LC), std::move(Args));
    3543          12 :     if (!lowerCallTo(CLI))
    3544             :       return false;
    3545          12 :     updateValueMap(II, CLI.ResultReg);
    3546          12 :     return true;
    3547             :   }
    3548           2 :   case Intrinsic::fabs: {
    3549           2 :     MVT VT;
    3550           2 :     if (!isTypeLegal(II->getType(), VT))
    3551             :       return false;
    3552             : 
    3553             :     unsigned Opc;
    3554           2 :     switch (VT.SimpleTy) {
    3555             :     default:
    3556             :       return false;
    3557             :     case MVT::f32:
    3558             :       Opc = AArch64::FABSSr;
    3559             :       break;
    3560           1 :     case MVT::f64:
    3561           1 :       Opc = AArch64::FABSDr;
    3562           1 :       break;
    3563             :     }
    3564           4 :     unsigned SrcReg = getRegForValue(II->getOperand(0));
    3565           2 :     if (!SrcReg)
    3566             :       return false;
    3567           4 :     bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
    3568           2 :     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
    3569           6 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
    3570           2 :       .addReg(SrcReg, getKillRegState(SrcRegIsKill));
    3571           2 :     updateValueMap(II, ResultReg);
    3572           2 :     return true;
    3573             :   }
    3574           1 :   case Intrinsic::trap:
    3575           3 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
    3576           1 :         .addImm(1);
    3577           1 :     return true;
    3578             : 
    3579             :   case Intrinsic::sqrt: {
    3580           6 :     Type *RetTy = II->getCalledFunction()->getReturnType();
    3581             : 
    3582           2 :     MVT VT;
    3583           2 :     if (!isTypeLegal(RetTy, VT))
    3584             :       return false;
    3585             : 
    3586           4 :     unsigned Op0Reg = getRegForValue(II->getOperand(0));
    3587           2 :     if (!Op0Reg)
    3588             :       return false;
    3589           4 :     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
    3590             : 
    3591           2 :     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
    3592           2 :     if (!ResultReg)
    3593             :       return false;
    3594             : 
    3595           2 :     updateValueMap(II, ResultReg);
    3596           2 :     return true;
    3597             :   }
    3598             :   case Intrinsic::sadd_with_overflow:
    3599             :   case Intrinsic::uadd_with_overflow:
    3600             :   case Intrinsic::ssub_with_overflow:
    3601             :   case Intrinsic::usub_with_overflow:
    3602             :   case Intrinsic::smul_with_overflow:
    3603             :   case Intrinsic::umul_with_overflow: {
    3604             :     // This implements the basic lowering of the xalu with overflow intrinsics.
    3605          96 :     const Function *Callee = II->getCalledFunction();
    3606          96 :     auto *Ty = cast<StructType>(Callee->getReturnType());
    3607          48 :     Type *RetTy = Ty->getTypeAtIndex(0U);
    3608             : 
    3609          48 :     MVT VT;
    3610          48 :     if (!isTypeLegal(RetTy, VT))
    3611             :       return false;
    3612             : 
    3613          96 :     if (VT != MVT::i32 && VT != MVT::i64)
    3614             :       return false;
    3615             : 
    3616          96 :     const Value *LHS = II->getArgOperand(0);
    3617          96 :     const Value *RHS = II->getArgOperand(1);
    3618             :     // Canonicalize immediate to the RHS.
    3619          96 :     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
    3620           0 :         isCommutativeIntrinsic(II))
    3621             :       std::swap(LHS, RHS);
    3622             : 
    3623             :     // Simplify multiplies.
    3624          96 :     Intrinsic::ID IID = II->getIntrinsicID();
    3625          48 :     switch (IID) {
    3626             :     default:
    3627             :       break;
    3628           8 :     case Intrinsic::smul_with_overflow:
    3629          10 :       if (const auto *C = dyn_cast<ConstantInt>(RHS))
    3630           2 :         if (C->getValue() == 2) {
    3631             :           IID = Intrinsic::sadd_with_overflow;
    3632             :           RHS = LHS;
    3633             :         }
    3634             :       break;
    3635           9 :     case Intrinsic::umul_with_overflow:
    3636          12 :       if (const auto *C = dyn_cast<ConstantInt>(RHS))
    3637           3 :         if (C->getValue() == 2) {
    3638             :           IID = Intrinsic::uadd_with_overflow;
    3639             :           RHS = LHS;
    3640             :         }
    3641             :       break;
    3642             :     }
    3643             : 
    3644          44 :     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
    3645          44 :     AArch64CC::CondCode CC = AArch64CC::Invalid;
    3646          44 :     switch (IID) {
    3647           0 :     default: llvm_unreachable("Unexpected intrinsic!");
    3648          14 :     case Intrinsic::sadd_with_overflow:
    3649          28 :       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
    3650          14 :       CC = AArch64CC::VS;
    3651          14 :       break;
    3652           8 :     case Intrinsic::uadd_with_overflow:
    3653          16 :       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
    3654           8 :       CC = AArch64CC::HS;
    3655           8 :       break;
    3656           7 :     case Intrinsic::ssub_with_overflow:
    3657          14 :       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
    3658           7 :       CC = AArch64CC::VS;
    3659           7 :       break;
    3660           6 :     case Intrinsic::usub_with_overflow:
    3661          12 :       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
    3662           6 :       CC = AArch64CC::LO;
    3663           6 :       break;
    3664           6 :     case Intrinsic::smul_with_overflow: {
    3665           6 :       CC = AArch64CC::NE;
    3666           6 :       unsigned LHSReg = getRegForValue(LHS);
    3667           6 :       if (!LHSReg)
    3668             :         return false;
    3669           6 :       bool LHSIsKill = hasTrivialKill(LHS);
    3670             : 
    3671           6 :       unsigned RHSReg = getRegForValue(RHS);
    3672           6 :       if (!RHSReg)
    3673             :         return false;
    3674           6 :       bool RHSIsKill = hasTrivialKill(RHS);
    3675             : 
    3676          12 :       if (VT == MVT::i32) {
    3677           6 :         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
    3678           3 :         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
    3679           6 :                                        /*IsKill=*/false, 32);
    3680           3 :         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
    3681             :                                             AArch64::sub_32);
    3682           3 :         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
    3683             :                                               AArch64::sub_32);
    3684             :         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
    3685             :                     AArch64_AM::ASR, 31, /*WantResult=*/false);
    3686             :       } else {
    3687             :         assert(VT == MVT::i64 && "Unexpected value type.");
    3688             :         // LHSReg and RHSReg cannot be killed by this Mul, since they are
    3689             :         // reused in the next instruction.
    3690           3 :         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
    3691             :                             /*IsKill=*/false);
    3692           3 :         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
    3693           3 :                                         RHSReg, RHSIsKill);
    3694             :         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
    3695             :                     AArch64_AM::ASR, 63, /*WantResult=*/false);
    3696             :       }
    3697             :       break;
    3698             :     }
    3699           7 :     case Intrinsic::umul_with_overflow: {
    3700           7 :       CC = AArch64CC::NE;
    3701           7 :       unsigned LHSReg = getRegForValue(LHS);
    3702           7 :       if (!LHSReg)
    3703             :         return false;
    3704           7 :       bool LHSIsKill = hasTrivialKill(LHS);
    3705             : 
    3706           7 :       unsigned RHSReg = getRegForValue(RHS);
    3707           7 :       if (!RHSReg)
    3708             :         return false;
    3709           7 :       bool RHSIsKill = hasTrivialKill(RHS);
    3710             : 
    3711          14 :       if (VT == MVT::i32) {
    3712           6 :         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
    3713           3 :         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
    3714             :                     /*IsKill=*/false, AArch64_AM::LSR, 32,
    3715           3 :                     /*WantResult=*/false);
    3716           3 :         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
    3717             :                                             AArch64::sub_32);
    3718             :       } else {
    3719             :         assert(VT == MVT::i64 && "Unexpected value type.");
    3720             :         // LHSReg and RHSReg cannot be killed by this Mul, since they are
    3721             :         // reused in the next instruction.
    3722           4 :         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
    3723             :                             /*IsKill=*/false);
    3724           4 :         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
    3725           4 :                                         RHSReg, RHSIsKill);
    3726             :         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
    3727             :                     /*IsKill=*/false, /*WantResult=*/false);
    3728             :       }
    3729             :       break;
    3730             :     }
    3731             :     }
    3732             : 
    3733          48 :     if (MulReg) {
    3734          13 :       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
    3735          26 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3736          39 :               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
    3737             :     }
    3738             : 
    3739          96 :     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
    3740             :                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
    3741          48 :                                   /*IsKill=*/true, getInvertedCondCode(CC));
    3742             :     (void)ResultReg2;
    3743             :     assert((ResultReg1 + 1) == ResultReg2 &&
    3744             :            "Nonconsecutive result registers.");
    3745          48 :     updateValueMap(II, ResultReg1, 2);
    3746          48 :     return true;
    3747             :   }
    3748             :   }
    3749             :   return false;
    3750             : }
    3751             : 
    3752        1300 : bool AArch64FastISel::selectRet(const Instruction *I) {
    3753        1300 :   const ReturnInst *Ret = cast<ReturnInst>(I);
    3754        1300 :   const Function &F = *I->getParent()->getParent();
    3755             : 
    3756        1300 :   if (!FuncInfo.CanLowerReturn)
    3757             :     return false;
    3758             : 
    3759        1300 :   if (F.isVarArg())
    3760             :     return false;
    3761             : 
    3762        2598 :   if (TLI.supportSwiftError() &&
    3763        1310 :       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
    3764             :     return false;
    3765             : 
    3766        1288 :   if (TLI.supportSplitCSR(FuncInfo.MF))
    3767             :     return false;
    3768             : 
    3769             :   // Build a list of return value registers.
    3770        1285 :   SmallVector<unsigned, 4> RetRegs;
    3771             : 
    3772        1285 :   if (Ret->getNumOperands() > 0) {
    3773         963 :     CallingConv::ID CC = F.getCallingConv();
    3774        1834 :     SmallVector<ISD::OutputArg, 4> Outs;
    3775        1926 :     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
    3776             : 
    3777             :     // Analyze operands of the call, assigning locations to each operand.
    3778        1834 :     SmallVector<CCValAssign, 16> ValLocs;
    3779        2797 :     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
    3780         963 :     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
    3781             :                                                      : RetCC_AArch64_AAPCS;
    3782         963 :     CCInfo.AnalyzeReturn(Outs, RetCC);
    3783             : 
    3784             :     // Only handle a single return value for now.
    3785         963 :     if (ValLocs.size() != 1)
    3786          92 :       return false;
    3787             : 
    3788         953 :     CCValAssign &VA = ValLocs[0];
    3789         953 :     const Value *RV = Ret->getOperand(0);
    3790             : 
    3791             :     // Don't bother handling odd stuff for now.
    3792        1025 :     if ((VA.getLocInfo() != CCValAssign::Full) &&
    3793          72 :         (VA.getLocInfo() != CCValAssign::BCvt))
    3794             :       return false;
    3795             : 
    3796             :     // Only handle register returns for now.
    3797         953 :     if (!VA.isRegLoc())
    3798             :       return false;
    3799             : 
    3800         953 :     unsigned Reg = getRegForValue(RV);
    3801         953 :     if (Reg == 0)
    3802             :       return false;
    3803             : 
    3804         949 :     unsigned SrcReg = Reg + VA.getValNo();
    3805         949 :     unsigned DestReg = VA.getLocReg();
    3806             :     // Avoid a cross-class copy. This is very unlikely.
    3807        2847 :     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
    3808             :       return false;
    3809             : 
    3810         949 :     EVT RVEVT = TLI.getValueType(DL, RV->getType());
    3811         949 :     if (!RVEVT.isSimple())
    3812             :       return false;
    3813             : 
    3814             :     // Vectors (of > 1 lane) in big endian need tricky handling.
    3815        1041 :     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
    3816          92 :         !Subtarget->isLittleEndian())
    3817             :       return false;
    3818             : 
    3819         889 :     MVT RVVT = RVEVT.getSimpleVT();
    3820         889 :     if (RVVT == MVT::f128)
    3821             :       return false;
    3822             : 
    3823         881 :     MVT DestVT = VA.getValVT();
    3824             :     // Special handling for extended integers.
    3825         881 :     if (RVVT != DestVT) {
    3826         163 :       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
    3827             :         return false;
    3828             : 
    3829         370 :       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
    3830             :         return false;
    3831             : 
    3832         153 :       bool IsZExt = Outs[0].Flags.isZExt();
    3833         153 :       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
    3834         153 :       if (SrcReg == 0)
    3835             :         return false;
    3836             :     }
    3837             : 
    3838             :     // Make the copy.
    3839        1742 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3840        2613 :             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
    3841             : 
    3842             :     // Add register to return instruction.
    3843         871 :     RetRegs.push_back(VA.getLocReg());
    3844             :   }
    3845             : 
    3846        1193 :   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3847        3579 :                                     TII.get(AArch64::RET_ReallyLR));
    3848        4450 :   for (unsigned RetReg : RetRegs)
    3849         871 :     MIB.addReg(RetReg, RegState::Implicit);
    3850             :   return true;
    3851             : }
    3852             : 
    3853          14 : bool AArch64FastISel::selectTrunc(const Instruction *I) {
    3854          14 :   Type *DestTy = I->getType();
    3855          28 :   Value *Op = I->getOperand(0);
    3856          14 :   Type *SrcTy = Op->getType();
    3857             : 
    3858          14 :   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
    3859          14 :   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
    3860          14 :   if (!SrcEVT.isSimple())
    3861             :     return false;
    3862          14 :   if (!DestEVT.isSimple())
    3863             :     return false;
    3864             : 
    3865          14 :   MVT SrcVT = SrcEVT.getSimpleVT();
    3866          14 :   MVT DestVT = DestEVT.getSimpleVT();
    3867             : 
    3868          16 :   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
    3869           2 :       SrcVT != MVT::i8)
    3870             :     return false;
    3871          17 :   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
    3872           5 :       DestVT != MVT::i1)
    3873             :     return false;
    3874             : 
    3875          12 :   unsigned SrcReg = getRegForValue(Op);
    3876          12 :   if (!SrcReg)
    3877             :     return false;
    3878          12 :   bool SrcIsKill = hasTrivialKill(Op);
    3879             : 
    3880             :   // If we're truncating from i64 to a smaller non-legal type then generate an
    3881             :   // AND. Otherwise, we know the high bits are undefined and a truncate only
    3882             :   // generate a COPY. We cannot mark the source register also as result
    3883             :   // register, because this can incorrectly transfer the kill flag onto the
    3884             :   // source register.
    3885             :   unsigned ResultReg;
    3886          12 :   if (SrcVT == MVT::i64) {
    3887           6 :     uint64_t Mask = 0;
    3888           6 :     switch (DestVT.SimpleTy) {
    3889             :     default:
    3890             :       // Trunc i64 to i32 is handled by the target-independent fast-isel.
    3891             :       return false;
    3892             :     case MVT::i1:
    3893             :       Mask = 0x1;
    3894             :       break;
    3895           2 :     case MVT::i8:
    3896           2 :       Mask = 0xff;
    3897           2 :       break;
    3898           1 :     case MVT::i16:
    3899           1 :       Mask = 0xffff;
    3900           1 :       break;
    3901             :     }
    3902             :     // Issue an extract_subreg to get the lower 32-bits.
    3903          12 :     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
    3904           6 :                                                 AArch64::sub_32);
    3905             :     // Create the AND instruction which performs the actual truncation.
    3906          12 :     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
    3907             :     assert(ResultReg && "Unexpected AND instruction emission failure.");
    3908             :   } else {
    3909           6 :     ResultReg = createResultReg(&AArch64::GPR32RegClass);
    3910          12 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3911          12 :             TII.get(TargetOpcode::COPY), ResultReg)
    3912           6 :         .addReg(SrcReg, getKillRegState(SrcIsKill));
    3913             :   }
    3914             : 
    3915          12 :   updateValueMap(I, ResultReg);
    3916          12 :   return true;
    3917             : }
    3918             : 
    3919         141 : unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
    3920             :   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
    3921             :           DestVT == MVT::i64) &&
    3922             :          "Unexpected value type.");
    3923             :   // Handle i8 and i16 as i32.
    3924         141 :   if (DestVT == MVT::i8 || DestVT == MVT::i16)
    3925             :     DestVT = MVT::i32;
    3926             : 
    3927         141 :   if (IsZExt) {
    3928         264 :     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
    3929             :     assert(ResultReg && "Unexpected AND instruction emission failure.");
    3930         132 :     if (DestVT == MVT::i64) {
    3931             :       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
    3932             :       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
    3933           0 :       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
    3934           0 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3935           0 :               TII.get(AArch64::SUBREG_TO_REG), Reg64)
    3936           0 :           .addImm(0)
    3937           0 :           .addReg(ResultReg)
    3938           0 :           .addImm(AArch64::sub_32);
    3939           0 :       ResultReg = Reg64;
    3940             :     }
    3941             :     return ResultReg;
    3942             :   } else {
    3943           9 :     if (DestVT == MVT::i64) {
    3944             :       // FIXME: We're SExt i1 to i64.
    3945             :       return 0;
    3946             :     }
    3947           9 :     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
    3948           9 :                             /*TODO:IsKill=*/false, 0, 0);
    3949             :   }
    3950             : }
    3951             : 
    3952          19 : unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
    3953             :                                       unsigned Op1, bool Op1IsKill) {
    3954             :   unsigned Opc, ZReg;
    3955          19 :   switch (RetVT.SimpleTy) {
    3956             :   default: return 0;
    3957           5 :   case MVT::i8:
    3958             :   case MVT::i16:
    3959             :   case MVT::i32:
    3960           5 :     RetVT = MVT::i32;
    3961           5 :     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
    3962             :   case MVT::i64:
    3963             :     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
    3964             :   }
    3965             : 
    3966             :   const TargetRegisterClass *RC =
    3967          19 :       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    3968          19 :   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
    3969          19 :                           /*IsKill=*/ZReg, true);
    3970             : }
    3971             : 
    3972             : unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
    3973             :                                         unsigned Op1, bool Op1IsKill) {
    3974           3 :   if (RetVT != MVT::i64)
    3975             :     return 0;
    3976             : 
    3977             :   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
    3978             :                           Op0, Op0IsKill, Op1, Op1IsKill,
    3979           3 :                           AArch64::XZR, /*IsKill=*/true);
    3980             : }
    3981             : 
    3982             : unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
    3983             :                                         unsigned Op1, bool Op1IsKill) {
    3984           3 :   if (RetVT != MVT::i64)
    3985             :     return 0;
    3986             : 
    3987             :   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
    3988             :                           Op0, Op0IsKill, Op1, Op1IsKill,
    3989           3 :                           AArch64::XZR, /*IsKill=*/true);
    3990             : }
    3991             : 
    3992           4 : unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
    3993             :                                      unsigned Op1Reg, bool Op1IsKill) {
    3994           4 :   unsigned Opc = 0;
    3995           4 :   bool NeedTrunc = false;
    3996           4 :   uint64_t Mask = 0;
    3997           4 :   switch (RetVT.SimpleTy) {
    3998             :   default: return 0;
    3999             :   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
    4000           1 :   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
    4001           1 :   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
    4002           1 :   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
    4003             :   }
    4004             : 
    4005             :   const TargetRegisterClass *RC =
    4006           4 :       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4007           4 :   if (NeedTrunc) {
    4008           4 :     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
    4009           2 :     Op1IsKill = true;
    4010             :   }
    4011           4 :   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
    4012           4 :                                        Op1IsKill);
    4013           4 :   if (NeedTrunc)
    4014           4 :     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
    4015             :   return ResultReg;
    4016             : }
    4017             : 
    4018          55 : unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
    4019             :                                      bool Op0IsKill, uint64_t Shift,
    4020             :                                      bool IsZExt) {
    4021             :   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
    4022             :          "Unexpected source/return type pair.");
    4023             :   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
    4024             :           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
    4025             :          "Unexpected source value type.");
    4026             :   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
    4027             :           RetVT == MVT::i64) && "Unexpected return value type.");
    4028             : 
    4029         110 :   bool Is64Bit = (RetVT == MVT::i64);
    4030          55 :   unsigned RegSize = Is64Bit ? 64 : 32;
    4031          55 :   unsigned DstBits = RetVT.getSizeInBits();
    4032          55 :   unsigned SrcBits = SrcVT.getSizeInBits();
    4033          55 :   const TargetRegisterClass *RC =
    4034          55 :       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4035             : 
    4036             :   // Just emit a copy for "zero" shifts.
    4037          55 :   if (Shift == 0) {
    4038           2 :     if (RetVT == SrcVT) {
    4039           1 :       unsigned ResultReg = createResultReg(RC);
    4040           2 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4041           2 :               TII.get(TargetOpcode::COPY), ResultReg)
    4042           1 :           .addReg(Op0, getKillRegState(Op0IsKill));
    4043           1 :       return ResultReg;
    4044             :     } else
    4045           1 :       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
    4046             :   }
    4047             : 
    4048             :   // Don't deal with undefined shifts.
    4049          53 :   if (Shift >= DstBits)
    4050             :     return 0;
    4051             : 
    4052             :   // For immediate shifts we can fold the zero-/sign-extension into the shift.
    4053             :   // {S|U}BFM Wd, Wn, #r, #s
    4054             :   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
    4055             : 
    4056             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4057             :   // %2 = shl i16 %1, 4
    4058             :   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
    4059             :   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
    4060             :   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
    4061             :   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
    4062             : 
    4063             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4064             :   // %2 = shl i16 %1, 8
    4065             :   // Wd<32+7-24,32-24> = Wn<7:0>
    4066             :   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
    4067             :   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
    4068             :   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
    4069             : 
    4070             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4071             :   // %2 = shl i16 %1, 12
    4072             :   // Wd<32+3-20,32-20> = Wn<3:0>
    4073             :   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
    4074             :   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
    4075             :   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
    4076             : 
    4077          39 :   unsigned ImmR = RegSize - Shift;
    4078             :   // Limit the width to the length of the source type.
    4079          78 :   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
    4080             :   static const unsigned OpcTable[2][2] = {
    4081             :     {AArch64::SBFMWri, AArch64::SBFMXri},
    4082             :     {AArch64::UBFMWri, AArch64::UBFMXri}
    4083             :   };
    4084          39 :   unsigned Opc = OpcTable[IsZExt][Is64Bit];
    4085          39 :   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
    4086          10 :     unsigned TmpReg = MRI.createVirtualRegister(RC);
    4087          20 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4088          20 :             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
    4089          10 :         .addImm(0)
    4090          10 :         .addReg(Op0, getKillRegState(Op0IsKill))
    4091          10 :         .addImm(AArch64::sub_32);
    4092          10 :     Op0 = TmpReg;
    4093          10 :     Op0IsKill = true;
    4094             :   }
    4095          39 :   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
    4096             : }
    4097             : 
    4098           4 : unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
    4099             :                                      unsigned Op1Reg, bool Op1IsKill) {
    4100           4 :   unsigned Opc = 0;
    4101           4 :   bool NeedTrunc = false;
    4102           4 :   uint64_t Mask = 0;
    4103           4 :   switch (RetVT.SimpleTy) {
    4104             :   default: return 0;
    4105             :   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
    4106           1 :   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
    4107           1 :   case MVT::i32: Opc = AArch64::LSRVWr; break;
    4108           1 :   case MVT::i64: Opc = AArch64::LSRVXr; break;
    4109             :   }
    4110             : 
    4111             :   const TargetRegisterClass *RC =
    4112           4 :       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4113           4 :   if (NeedTrunc) {
    4114           4 :     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
    4115           4 :     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
    4116           2 :     Op0IsKill = Op1IsKill = true;
    4117             :   }
    4118           4 :   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
    4119           4 :                                        Op1IsKill);
    4120           4 :   if (NeedTrunc)
    4121           4 :     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
    4122             :   return ResultReg;
    4123             : }
    4124             : 
    4125          27 : unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
    4126             :                                      bool Op0IsKill, uint64_t Shift,
    4127             :                                      bool IsZExt) {
    4128             :   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
    4129             :          "Unexpected source/return type pair.");
    4130             :   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
    4131             :           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
    4132             :          "Unexpected source value type.");
    4133             :   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
    4134             :           RetVT == MVT::i64) && "Unexpected return value type.");
    4135             : 
    4136          54 :   bool Is64Bit = (RetVT == MVT::i64);
    4137          27 :   unsigned RegSize = Is64Bit ? 64 : 32;
    4138          27 :   unsigned DstBits = RetVT.getSizeInBits();
    4139          27 :   unsigned SrcBits = SrcVT.getSizeInBits();
    4140          27 :   const TargetRegisterClass *RC =
    4141          27 :       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4142             : 
    4143             :   // Just emit a copy for "zero" shifts.
    4144          27 :   if (Shift == 0) {
    4145           2 :     if (RetVT == SrcVT) {
    4146           1 :       unsigned ResultReg = createResultReg(RC);
    4147           2 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4148           2 :               TII.get(TargetOpcode::COPY), ResultReg)
    4149           1 :       .addReg(Op0, getKillRegState(Op0IsKill));
    4150           1 :       return ResultReg;
    4151             :     } else
    4152           1 :       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
    4153             :   }
    4154             : 
    4155             :   // Don't deal with undefined shifts.
    4156          25 :   if (Shift >= DstBits)
    4157             :     return 0;
    4158             : 
    4159             :   // For immediate shifts we can fold the zero-/sign-extension into the shift.
    4160             :   // {S|U}BFM Wd, Wn, #r, #s
    4161             :   // Wd<s-r:0> = Wn<s:r> when r <= s
    4162             : 
    4163             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4164             :   // %2 = lshr i16 %1, 4
    4165             :   // Wd<7-4:0> = Wn<7:4>
    4166             :   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
    4167             :   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
    4168             :   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
    4169             : 
    4170             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4171             :   // %2 = lshr i16 %1, 8
    4172             :   // Wd<7-7,0> = Wn<7:7>
    4173             :   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
    4174             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
    4175             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
    4176             : 
    4177             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4178             :   // %2 = lshr i16 %1, 12
    4179             :   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
    4180             :   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
    4181             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
    4182             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
    4183             : 
    4184          25 :   if (Shift >= SrcBits && IsZExt)
    4185           6 :     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
    4186             : 
    4187             :   // It is not possible to fold a sign-extend into the LShr instruction. In this
    4188             :   // case emit a sign-extend.
    4189          22 :   if (!IsZExt) {
    4190           4 :     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
    4191           4 :     if (!Op0)
    4192             :       return 0;
    4193           4 :     Op0IsKill = true;
    4194           4 :     SrcVT = RetVT;
    4195           4 :     SrcBits = SrcVT.getSizeInBits();
    4196           4 :     IsZExt = true;
    4197             :   }
    4198             : 
    4199          44 :   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
    4200          22 :   unsigned ImmS = SrcBits - 1;
    4201             :   static const unsigned OpcTable[2][2] = {
    4202             :     {AArch64::SBFMWri, AArch64::SBFMXri},
    4203             :     {AArch64::UBFMWri, AArch64::UBFMXri}
    4204             :   };
    4205          22 :   unsigned Opc = OpcTable[IsZExt][Is64Bit];
    4206          22 :   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
    4207           0 :     unsigned TmpReg = MRI.createVirtualRegister(RC);
    4208           0 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4209           0 :             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
    4210           0 :         .addImm(0)
    4211           0 :         .addReg(Op0, getKillRegState(Op0IsKill))
    4212           0 :         .addImm(AArch64::sub_32);
    4213           0 :     Op0 = TmpReg;
    4214           0 :     Op0IsKill = true;
    4215             :   }
    4216          22 :   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
    4217             : }
    4218             : 
    4219           4 : unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
    4220             :                                      unsigned Op1Reg, bool Op1IsKill) {
    4221           4 :   unsigned Opc = 0;
    4222           4 :   bool NeedTrunc = false;
    4223           4 :   uint64_t Mask = 0;
    4224           4 :   switch (RetVT.SimpleTy) {
    4225             :   default: return 0;
    4226             :   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
    4227           1 :   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
    4228           1 :   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
    4229           1 :   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
    4230             :   }
    4231             : 
    4232             :   const TargetRegisterClass *RC =
    4233           4 :       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4234           4 :   if (NeedTrunc) {
    4235           2 :     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
    4236           4 :     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
    4237           2 :     Op0IsKill = Op1IsKill = true;
    4238             :   }
    4239           4 :   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
    4240           4 :                                        Op1IsKill);
    4241           4 :   if (NeedTrunc)
    4242           4 :     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
    4243             :   return ResultReg;
    4244             : }
    4245             : 
    4246          29 : unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
    4247             :                                      bool Op0IsKill, uint64_t Shift,
    4248             :                                      bool IsZExt) {
    4249             :   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
    4250             :          "Unexpected source/return type pair.");
    4251             :   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
    4252             :           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
    4253             :          "Unexpected source value type.");
    4254             :   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
    4255             :           RetVT == MVT::i64) && "Unexpected return value type.");
    4256             : 
    4257          58 :   bool Is64Bit = (RetVT == MVT::i64);
    4258          29 :   unsigned RegSize = Is64Bit ? 64 : 32;
    4259          29 :   unsigned DstBits = RetVT.getSizeInBits();
    4260          29 :   unsigned SrcBits = SrcVT.getSizeInBits();
    4261          29 :   const TargetRegisterClass *RC =
    4262          29 :       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4263             : 
    4264             :   // Just emit a copy for "zero" shifts.
    4265          29 :   if (Shift == 0) {
    4266           2 :     if (RetVT == SrcVT) {
    4267           1 :       unsigned ResultReg = createResultReg(RC);
    4268           2 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4269           2 :               TII.get(TargetOpcode::COPY), ResultReg)
    4270           1 :       .addReg(Op0, getKillRegState(Op0IsKill));
    4271           1 :       return ResultReg;
    4272             :     } else
    4273           1 :       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
    4274             :   }
    4275             : 
    4276             :   // Don't deal with undefined shifts.
    4277          27 :   if (Shift >= DstBits)
    4278             :     return 0;
    4279             : 
    4280             :   // For immediate shifts we can fold the zero-/sign-extension into the shift.
    4281             :   // {S|U}BFM Wd, Wn, #r, #s
    4282             :   // Wd<s-r:0> = Wn<s:r> when r <= s
    4283             : 
    4284             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4285             :   // %2 = ashr i16 %1, 4
    4286             :   // Wd<7-4:0> = Wn<7:4>
    4287             :   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
    4288             :   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
    4289             :   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
    4290             : 
    4291             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4292             :   // %2 = ashr i16 %1, 8
    4293             :   // Wd<7-7,0> = Wn<7:7>
    4294             :   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
    4295             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
    4296             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
    4297             : 
    4298             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4299             :   // %2 = ashr i16 %1, 12
    4300             :   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
    4301             :   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
    4302             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
    4303             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
    4304             : 
    4305          27 :   if (Shift >= SrcBits && IsZExt)
    4306           6 :     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
    4307             : 
    4308          48 :   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
    4309          24 :   unsigned ImmS = SrcBits - 1;
    4310             :   static const unsigned OpcTable[2][2] = {
    4311             :     {AArch64::SBFMWri, AArch64::SBFMXri},
    4312             :     {AArch64::UBFMWri, AArch64::UBFMXri}
    4313             :   };
    4314          24 :   unsigned Opc = OpcTable[IsZExt][Is64Bit];
    4315          24 :   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
    4316           1 :     unsigned TmpReg = MRI.createVirtualRegister(RC);
    4317           2 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4318           2 :             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
    4319           1 :         .addImm(0)
    4320           1 :         .addReg(Op0, getKillRegState(Op0IsKill))
    4321           1 :         .addImm(AArch64::sub_32);
    4322           1 :     Op0 = TmpReg;
    4323           1 :     Op0IsKill = true;
    4324             :   }
    4325          24 :   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
    4326             : }
    4327             : 
    4328         418 : unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
    4329             :                                      bool IsZExt) {
    4330             :   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
    4331             : 
    4332             :   // FastISel does not have plumbing to deal with extensions where the SrcVT or
    4333             :   // DestVT are odd things, so test to make sure that they are both types we can
    4334             :   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
    4335             :   // bail out to SelectionDAG.
    4336        1247 :   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
    4337        1310 :        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
    4338         824 :       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
    4339         160 :        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
    4340             :     return 0;
    4341             : 
    4342             :   unsigned Opc;
    4343         418 :   unsigned Imm = 0;
    4344             : 
    4345         418 :   switch (SrcVT.SimpleTy) {
    4346             :   default:
    4347             :     return 0;
    4348         141 :   case MVT::i1:
    4349         141 :     return emiti1Ext(SrcReg, DestVT, IsZExt);
    4350         148 :   case MVT::i8:
    4351         148 :     if (DestVT == MVT::i64)
    4352          16 :       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
    4353             :     else
    4354         132 :       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
    4355             :     Imm = 7;
    4356             :     break;
    4357          98 :   case MVT::i16:
    4358          98 :     if (DestVT == MVT::i64)
    4359          15 :       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
    4360             :     else
    4361          83 :       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
    4362             :     Imm = 15;
    4363             :     break;
    4364          31 :   case MVT::i32:
    4365             :     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
    4366          31 :     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
    4367             :     Imm = 31;
    4368             :     break;
    4369             :   }
    4370             : 
    4371             :   // Handle i8 and i16 as i32.
    4372         277 :   if (DestVT == MVT::i8 || DestVT == MVT::i16)
    4373             :     DestVT = MVT::i32;
    4374         274 :   else if (DestVT == MVT::i64) {
    4375          62 :     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
    4376         124 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4377         124 :             TII.get(AArch64::SUBREG_TO_REG), Src64)
    4378          62 :         .addImm(0)
    4379          62 :         .addReg(SrcReg)
    4380          62 :         .addImm(AArch64::sub_32);
    4381          62 :     SrcReg = Src64;
    4382             :   }
    4383             : 
    4384             :   const TargetRegisterClass *RC =
    4385         277 :       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4386         277 :   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
    4387             : }
    4388             : 
    4389             : static bool isZExtLoad(const MachineInstr *LI) {
    4390           2 :   switch (LI->getOpcode()) {
    4391             :   default:
    4392             :     return false;
    4393             :   case AArch64::LDURBBi:
    4394             :   case AArch64::LDURHHi:
    4395             :   case AArch64::LDURWi:
    4396             :   case AArch64::LDRBBui:
    4397             :   case AArch64::LDRHHui:
    4398             :   case AArch64::LDRWui:
    4399             :   case AArch64::LDRBBroX:
    4400             :   case AArch64::LDRHHroX:
    4401             :   case AArch64::LDRWroX:
    4402             :   case AArch64::LDRBBroW:
    4403             :   case AArch64::LDRHHroW:
    4404             :   case AArch64::LDRWroW:
    4405             :     return true;
    4406             :   }
    4407             : }
    4408             : 
    4409             : static bool isSExtLoad(const MachineInstr *LI) {
    4410           0 :   switch (LI->getOpcode()) {
    4411             :   default:
    4412             :     return false;
    4413             :   case AArch64::LDURSBWi:
    4414             :   case AArch64::LDURSHWi:
    4415             :   case AArch64::LDURSBXi:
    4416             :   case AArch64::LDURSHXi:
    4417             :   case AArch64::LDURSWi:
    4418             :   case AArch64::LDRSBWui:
    4419             :   case AArch64::LDRSHWui:
    4420             :   case AArch64::LDRSBXui:
    4421             :   case AArch64::LDRSHXui:
    4422             :   case AArch64::LDRSWui:
    4423             :   case AArch64::LDRSBWroX:
    4424             :   case AArch64::LDRSHWroX:
    4425             :   case AArch64::LDRSBXroX:
    4426             :   case AArch64::LDRSHXroX:
    4427             :   case AArch64::LDRSWroX:
    4428             :   case AArch64::LDRSBWroW:
    4429             :   case AArch64::LDRSHWroW:
    4430             :   case AArch64::LDRSBXroW:
    4431             :   case AArch64::LDRSHXroW:
    4432             :   case AArch64::LDRSWroW:
    4433             :     return true;
    4434             :   }
    4435             : }
    4436             : 
    4437         196 : bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
    4438             :                                          MVT SrcVT) {
    4439         478 :   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
    4440         172 :   if (!LI || !LI->hasOneUse())
    4441             :     return false;
    4442             : 
    4443             :   // Check if the load instruction has already been selected.
    4444          86 :   unsigned Reg = lookUpRegForValue(LI);
    4445          86 :   if (!Reg)
    4446             :     return false;
    4447             : 
    4448           1 :   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
    4449           1 :   if (!MI)
    4450             :     return false;
    4451             : 
    4452             :   // Check if the correct load instruction has been emitted - SelectionDAG might
    4453             :   // have emitted a zero-extending load, but we need a sign-extending load.
    4454           2 :   bool IsZExt = isa<ZExtInst>(I);
    4455           1 :   const auto *LoadMI = MI;
    4456           2 :   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
    4457           0 :       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
    4458           0 :     unsigned LoadReg = MI->getOperand(1).getReg();
    4459           0 :     LoadMI = MRI.getUniqueVRegDef(LoadReg);
    4460             :     assert(LoadMI && "Expected valid instruction");
    4461             :   }
    4462           1 :   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
    4463             :     return false;
    4464             : 
    4465             :   // Nothing to be done.
    4466           3 :   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
    4467           0 :     updateValueMap(I, Reg);
    4468           0 :     return true;
    4469             :   }
    4470             : 
    4471           1 :   if (IsZExt) {
    4472           1 :     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
    4473           2 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4474           2 :             TII.get(AArch64::SUBREG_TO_REG), Reg64)
    4475           1 :         .addImm(0)
    4476           1 :         .addReg(Reg, getKillRegState(true))
    4477           1 :         .addImm(AArch64::sub_32);
    4478           1 :     Reg = Reg64;
    4479             :   } else {
    4480             :     assert((MI->getOpcode() == TargetOpcode::COPY &&
    4481             :             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
    4482             :            "Expected copy instruction");
    4483           0 :     Reg = MI->getOperand(1).getReg();
    4484           0 :     MI->eraseFromParent();
    4485             :   }
    4486           1 :   updateValueMap(I, Reg);
    4487           1 :   return true;
    4488             : }
    4489             : 
    4490         203 : bool AArch64FastISel::selectIntExt(const Instruction *I) {
    4491             :   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
    4492             :          "Unexpected integer extend instruction.");
    4493         203 :   MVT RetVT;
    4494         203 :   MVT SrcVT;
    4495         203 :   if (!isTypeSupported(I->getType(), RetVT))
    4496             :     return false;
    4497             : 
    4498         392 :   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
    4499             :     return false;
    4500             : 
    4501             :   // Try to optimize already sign-/zero-extended values from load instructions.
    4502         196 :   if (optimizeIntExtLoad(I, RetVT, SrcVT))
    4503             :     return true;
    4504             : 
    4505         390 :   unsigned SrcReg = getRegForValue(I->getOperand(0));
    4506         195 :   if (!SrcReg)
    4507             :     return false;
    4508         390 :   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
    4509             : 
    4510             :   // Try to optimize already sign-/zero-extended values from function arguments.
    4511         390 :   bool IsZExt = isa<ZExtInst>(I);
    4512         468 :   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
    4513          78 :     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
    4514         145 :       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
    4515          13 :         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
    4516          26 :         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4517          26 :                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
    4518          13 :             .addImm(0)
    4519          13 :             .addReg(SrcReg, getKillRegState(SrcIsKill))
    4520          13 :             .addImm(AArch64::sub_32);
    4521          13 :         SrcReg = ResultReg;
    4522             :       }
    4523             :       // Conservatively clear all kill flags from all uses, because we are
    4524             :       // replacing a sign-/zero-extend instruction at IR level with a nop at MI
    4525             :       // level. The result of the instruction at IR level might have been
    4526             :       // trivially dead, which is now not longer true.
    4527          66 :       unsigned UseReg = lookUpRegForValue(I);
    4528          66 :       if (UseReg)
    4529          66 :         MRI.clearKillFlags(UseReg);
    4530             : 
    4531          66 :       updateValueMap(I, SrcReg);
    4532          66 :       return true;
    4533             :     }
    4534             :   }
    4535             : 
    4536         129 :   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
    4537         129 :   if (!ResultReg)
    4538             :     return false;
    4539             : 
    4540         129 :   updateValueMap(I, ResultReg);
    4541         129 :   return true;
    4542             : }
    4543             : 
    4544           8 : bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
    4545           8 :   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
    4546           8 :   if (!DestEVT.isSimple())
    4547             :     return false;
    4548             : 
    4549           8 :   MVT DestVT = DestEVT.getSimpleVT();
    4550           8 :   if (DestVT != MVT::i64 && DestVT != MVT::i32)
    4551             :     return false;
    4552             : 
    4553             :   unsigned DivOpc;
    4554           8 :   bool Is64bit = (DestVT == MVT::i64);
    4555           8 :   switch (ISDOpcode) {
    4556             :   default:
    4557             :     return false;
    4558           4 :   case ISD::SREM:
    4559           4 :     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
    4560             :     break;
    4561           4 :   case ISD::UREM:
    4562           4 :     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
    4563             :     break;
    4564             :   }
    4565           8 :   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
    4566          16 :   unsigned Src0Reg = getRegForValue(I->getOperand(0));
    4567           8 :   if (!Src0Reg)
    4568             :     return false;
    4569          16 :   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
    4570             : 
    4571          16 :   unsigned Src1Reg = getRegForValue(I->getOperand(1));
    4572           8 :   if (!Src1Reg)
    4573             :     return false;
    4574          16 :   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
    4575             : 
    4576             :   const TargetRegisterClass *RC =
    4577           8 :       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4578             :   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
    4579           8 :                                      Src1Reg, /*IsKill=*/false);
    4580             :   assert(QuotReg && "Unexpected DIV instruction emission failure.");
    4581             :   // The remainder is computed as numerator - (quotient * denominator) using the
    4582             :   // MSUB instruction.
    4583           8 :   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
    4584             :                                         Src1Reg, Src1IsKill, Src0Reg,
    4585           8 :                                         Src0IsKill);
    4586           8 :   updateValueMap(I, ResultReg);
    4587           8 :   return true;
    4588             : }
    4589             : 
    4590          10 : bool AArch64FastISel::selectMul(const Instruction *I) {
    4591          10 :   MVT VT;
    4592          10 :   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
    4593             :     return false;
    4594             : 
    4595          10 :   if (VT.isVector())
    4596           0 :     return selectBinaryOp(I, ISD::MUL);
    4597             : 
    4598          20 :   const Value *Src0 = I->getOperand(0);
    4599          20 :   const Value *Src1 = I->getOperand(1);
    4600          10 :   if (const auto *C = dyn_cast<ConstantInt>(Src0))
    4601           0 :     if (C->getValue().isPowerOf2())
    4602             :       std::swap(Src0, Src1);
    4603             : 
    4604             :   // Try to simplify to a shift instruction.
    4605          14 :   if (const auto *C = dyn_cast<ConstantInt>(Src1))
    4606           4 :     if (C->getValue().isPowerOf2()) {
    4607           4 :       uint64_t ShiftVal = C->getValue().logBase2();
    4608           2 :       MVT SrcVT = VT;
    4609           2 :       bool IsZExt = true;
    4610           2 :       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
    4611           0 :         if (!isIntExtFree(ZExt)) {
    4612           0 :           MVT VT;
    4613           0 :           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
    4614           0 :             SrcVT = VT;
    4615           0 :             IsZExt = true;
    4616           0 :             Src0 = ZExt->getOperand(0);
    4617             :           }
    4618             :         }
    4619           2 :       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
    4620           0 :         if (!isIntExtFree(SExt)) {
    4621           0 :           MVT VT;
    4622           0 :           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
    4623           0 :             SrcVT = VT;
    4624           0 :             IsZExt = false;
    4625           0 :             Src0 = SExt->getOperand(0);
    4626             :           }
    4627             :         }
    4628             :       }
    4629             : 
    4630           2 :       unsigned Src0Reg = getRegForValue(Src0);
    4631           2 :       if (!Src0Reg)
    4632           2 :         return false;
    4633           2 :       bool Src0IsKill = hasTrivialKill(Src0);
    4634             : 
    4635             :       unsigned ResultReg =
    4636           2 :           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
    4637             : 
    4638           2 :       if (ResultReg) {
    4639           2 :         updateValueMap(I, ResultReg);
    4640           2 :         return true;
    4641             :       }
    4642             :     }
    4643             : 
    4644          16 :   unsigned Src0Reg = getRegForValue(I->getOperand(0));
    4645           8 :   if (!Src0Reg)
    4646             :     return false;
    4647          16 :   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
    4648             : 
    4649          16 :   unsigned Src1Reg = getRegForValue(I->getOperand(1));
    4650           8 :   if (!Src1Reg)
    4651             :     return false;
    4652          16 :   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
    4653             : 
    4654           8 :   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
    4655             : 
    4656           8 :   if (!ResultReg)
    4657             :     return false;
    4658             : 
    4659           8 :   updateValueMap(I, ResultReg);
    4660           8 :   return true;
    4661             : }
    4662             : 
    4663         105 : bool AArch64FastISel::selectShift(const Instruction *I) {
    4664         105 :   MVT RetVT;
    4665         105 :   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
    4666             :     return false;
    4667             : 
    4668         105 :   if (RetVT.isVector())
    4669           0 :     return selectOperator(I, I->getOpcode());
    4670             : 
    4671         303 :   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
    4672          93 :     unsigned ResultReg = 0;
    4673          93 :     uint64_t ShiftVal = C->getZExtValue();
    4674          93 :     MVT SrcVT = RetVT;
    4675          93 :     bool IsZExt = I->getOpcode() != Instruction::AShr;
    4676         186 :     const Value *Op0 = I->getOperand(0);
    4677          22 :     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
    4678          22 :       if (!isIntExtFree(ZExt)) {
    4679          22 :         MVT TmpVT;
    4680          44 :         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
    4681          22 :           SrcVT = TmpVT;
    4682          22 :           IsZExt = true;
    4683          22 :           Op0 = ZExt->getOperand(0);
    4684             :         }
    4685             :       }
    4686          20 :     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
    4687          20 :       if (!isIntExtFree(SExt)) {
    4688          20 :         MVT TmpVT;
    4689          39 :         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
    4690          19 :           SrcVT = TmpVT;
    4691          19 :           IsZExt = false;
    4692          19 :           Op0 = SExt->getOperand(0);
    4693             :         }
    4694             :       }
    4695             :     }
    4696             : 
    4697          93 :     unsigned Op0Reg = getRegForValue(Op0);
    4698          93 :     if (!Op0Reg)
    4699             :       return false;
    4700          93 :     bool Op0IsKill = hasTrivialKill(Op0);
    4701             : 
    4702          93 :     switch (I->getOpcode()) {
    4703           0 :     default: llvm_unreachable("Unexpected instruction.");
    4704          50 :     case Instruction::Shl:
    4705          50 :       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
    4706          50 :       break;
    4707          19 :     case Instruction::AShr:
    4708          19 :       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
    4709          19 :       break;
    4710          24 :     case Instruction::LShr:
    4711          24 :       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
    4712          24 :       break;
    4713             :     }
    4714          93 :     if (!ResultReg)
    4715             :       return false;
    4716             : 
    4717          79 :     updateValueMap(I, ResultReg);
    4718          79 :     return true;
    4719             :   }
    4720             : 
    4721          24 :   unsigned Op0Reg = getRegForValue(I->getOperand(0));
    4722          12 :   if (!Op0Reg)
    4723             :     return false;
    4724          24 :   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
    4725             : 
    4726          24 :   unsigned Op1Reg = getRegForValue(I->getOperand(1));
    4727          12 :   if (!Op1Reg)
    4728             :     return false;
    4729          24 :   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
    4730             : 
    4731          12 :   unsigned ResultReg = 0;
    4732          12 :   switch (I->getOpcode()) {
    4733           0 :   default: llvm_unreachable("Unexpected instruction.");
    4734           4 :   case Instruction::Shl:
    4735           4 :     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
    4736           4 :     break;
    4737           4 :   case Instruction::AShr:
    4738           4 :     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
    4739           4 :     break;
    4740           4 :   case Instruction::LShr:
    4741           4 :     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
    4742           4 :     break;
    4743             :   }
    4744             : 
    4745          12 :   if (!ResultReg)
    4746             :     return false;
    4747             : 
    4748          12 :   updateValueMap(I, ResultReg);
    4749          12 :   return true;
    4750             : }
    4751             : 
    4752          22 : bool AArch64FastISel::selectBitCast(const Instruction *I) {
    4753          22 :   MVT RetVT, SrcVT;
    4754             : 
    4755          44 :   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
    4756             :     return false;
    4757          22 :   if (!isTypeLegal(I->getType(), RetVT))
    4758             :     return false;
    4759             : 
    4760             :   unsigned Opc;
    4761          45 :   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
    4762             :     Opc = AArch64::FMOVWSr;
    4763          26 :   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
    4764             :     Opc = AArch64::FMOVXDr;
    4765          19 :   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
    4766             :     Opc = AArch64::FMOVSWr;
    4767          30 :   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
    4768             :     Opc = AArch64::FMOVDXr;
    4769             :   else
    4770             :     return false;
    4771             : 
    4772           8 :   const TargetRegisterClass *RC = nullptr;
    4773           8 :   switch (RetVT.SimpleTy) {
    4774           0 :   default: llvm_unreachable("Unexpected value type.");
    4775             :   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
    4776           3 :   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
    4777           1 :   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
    4778           3 :   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
    4779             :   }
    4780          16 :   unsigned Op0Reg = getRegForValue(I->getOperand(0));
    4781           8 :   if (!Op0Reg)
    4782             :     return false;
    4783          16 :   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
    4784           8 :   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
    4785             : 
    4786           8 :   if (!ResultReg)
    4787             :     return false;
    4788             : 
    4789           8 :   updateValueMap(I, ResultReg);
    4790           8 :   return true;
    4791             : }
    4792             : 
    4793           4 : bool AArch64FastISel::selectFRem(const Instruction *I) {
    4794           4 :   MVT RetVT;
    4795           4 :   if (!isTypeLegal(I->getType(), RetVT))
    4796             :     return false;
    4797             : 
    4798             :   RTLIB::Libcall LC;
    4799           4 :   switch (RetVT.SimpleTy) {
    4800             :   default:
    4801             :     return false;
    4802             :   case MVT::f32:
    4803             :     LC = RTLIB::REM_F32;
    4804             :     break;
    4805           2 :   case MVT::f64:
    4806           2 :     LC = RTLIB::REM_F64;
    4807           2 :     break;
    4808             :   }
    4809             : 
    4810           8 :   ArgListTy Args;
    4811           8 :   Args.reserve(I->getNumOperands());
    4812             : 
    4813             :   // Populate the argument list.
    4814          16 :   for (auto &Arg : I->operands()) {
    4815           8 :     ArgListEntry Entry;
    4816           8 :     Entry.Val = Arg;
    4817           8 :     Entry.Ty = Arg->getType();
    4818           8 :     Args.push_back(Entry);
    4819             :   }
    4820             : 
    4821           8 :   CallLoweringInfo CLI;
    4822           4 :   MCContext &Ctx = MF->getContext();
    4823           4 :   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
    4824          20 :                 TLI.getLibcallName(LC), std::move(Args));
    4825           4 :   if (!lowerCallTo(CLI))
    4826             :     return false;
    4827           4 :   updateValueMap(I, CLI.ResultReg);
    4828           4 :   return true;
    4829             : }
    4830             : 
    4831          14 : bool AArch64FastISel::selectSDiv(const Instruction *I) {
    4832          14 :   MVT VT;
    4833          14 :   if (!isTypeLegal(I->getType(), VT))
    4834             :     return false;
    4835             : 
    4836          42 :   if (!isa<ConstantInt>(I->getOperand(1)))
    4837           0 :     return selectBinaryOp(I, ISD::SDIV);
    4838             : 
    4839          56 :   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
    4840          70 :   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
    4841          50 :       !(C.isPowerOf2() || (-C).isPowerOf2()))
    4842           0 :     return selectBinaryOp(I, ISD::SDIV);
    4843             : 
    4844          14 :   unsigned Lg2 = C.countTrailingZeros();
    4845          28 :   unsigned Src0Reg = getRegForValue(I->getOperand(0));
    4846          14 :   if (!Src0Reg)
    4847             :     return false;
    4848          28 :   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
    4849             : 
    4850          14 :   if (cast<BinaryOperator>(I)->isExact()) {
    4851           3 :     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
    4852           3 :     if (!ResultReg)
    4853             :       return false;
    4854           3 :     updateValueMap(I, ResultReg);
    4855           3 :     return true;
    4856             :   }
    4857             : 
    4858          11 :   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
    4859          11 :   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
    4860          11 :   if (!AddReg)
    4861             :     return false;
    4862             : 
    4863             :   // (Src0 < 0) ? Pow2 - 1 : 0;
    4864          11 :   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
    4865             :     return false;
    4866             : 
    4867             :   unsigned SelectOpc;
    4868             :   const TargetRegisterClass *RC;
    4869          22 :   if (VT == MVT::i64) {
    4870             :     SelectOpc = AArch64::CSELXr;
    4871             :     RC = &AArch64::GPR64RegClass;
    4872             :   } else {
    4873           5 :     SelectOpc = AArch64::CSELWr;
    4874           5 :     RC = &AArch64::GPR32RegClass;
    4875             :   }
    4876             :   unsigned SelectReg =
    4877          11 :       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
    4878          11 :                        Src0IsKill, AArch64CC::LT);
    4879          11 :   if (!SelectReg)
    4880             :     return false;
    4881             : 
    4882             :   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
    4883             :   // negate the result.
    4884          22 :   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
    4885             :   unsigned ResultReg;
    4886          11 :   if (C.isNegative())
    4887           4 :     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
    4888             :                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
    4889             :   else
    4890           7 :     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
    4891             : 
    4892          11 :   if (!ResultReg)
    4893             :     return false;
    4894             : 
    4895          11 :   updateValueMap(I, ResultReg);
    4896          11 :   return true;
    4897             : }
    4898             : 
    4899             : /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
    4900             : /// have to duplicate it for AArch64, because otherwise we would fail during the
    4901             : /// sign-extend emission.
    4902           5 : std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
    4903           5 :   unsigned IdxN = getRegForValue(Idx);
    4904           5 :   if (IdxN == 0)
    4905             :     // Unhandled operand. Halt "fast" selection and bail.
    4906           0 :     return std::pair<unsigned, bool>(0, false);
    4907             : 
    4908           5 :   bool IdxNIsKill = hasTrivialKill(Idx);
    4909             : 
    4910             :   // If the index is smaller or larger than intptr_t, truncate or extend it.
    4911          10 :   MVT PtrVT = TLI.getPointerTy(DL);
    4912           5 :   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
    4913           5 :   if (IdxVT.bitsLT(PtrVT)) {
    4914           2 :     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
    4915           2 :     IdxNIsKill = true;
    4916           3 :   } else if (IdxVT.bitsGT(PtrVT))
    4917           0 :     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
    4918           5 :   return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
    4919             : }
    4920             : 
    4921             : /// This is mostly a copy of the existing FastISel GEP code, but we have to
    4922             : /// duplicate it for AArch64, because otherwise we would bail out even for
    4923             : /// simple cases. This is because the standard fastEmit functions don't cover
    4924             : /// MUL at all and ADD is lowered very inefficientily.
    4925          20 : bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
    4926          40 :   unsigned N = getRegForValue(I->getOperand(0));
    4927          20 :   if (!N)
    4928             :     return false;
    4929          40 :   bool NIsKill = hasTrivialKill(I->getOperand(0));
    4930             : 
    4931             :   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
    4932             :   // into a single N = N + TotalOffset.
    4933          20 :   uint64_t TotalOffs = 0;
    4934          40 :   MVT VT = TLI.getPointerTy(DL);
    4935          45 :   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
    4936          45 :        GTI != E; ++GTI) {
    4937          50 :     const Value *Idx = GTI.getOperand();
    4938           4 :     if (auto *StTy = GTI.getStructTypeOrNull()) {
    4939           8 :       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
    4940             :       // N = N + Offset
    4941           4 :       if (Field)
    4942           4 :         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
    4943             :     } else {
    4944          21 :       Type *Ty = GTI.getIndexedType();
    4945             : 
    4946             :       // If this is a constant subscript, handle it quickly.
    4947          27 :       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
    4948          16 :         if (CI->isZero())
    4949          21 :           continue;
    4950             :         // N = N + Offset
    4951          11 :         TotalOffs +=
    4952          33 :             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
    4953          11 :         continue;
    4954             :       }
    4955           5 :       if (TotalOffs) {
    4956           0 :         N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
    4957           0 :         if (!N)
    4958           0 :           return false;
    4959             :         NIsKill = true;
    4960             :         TotalOffs = 0;
    4961             :       }
    4962             : 
    4963             :       // N = N + Idx * ElementSize;
    4964           5 :       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
    4965           5 :       std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
    4966           5 :       unsigned IdxN = Pair.first;
    4967           5 :       bool IdxNIsKill = Pair.second;
    4968           5 :       if (!IdxN)
    4969             :         return false;
    4970             : 
    4971           5 :       if (ElementSize != 1) {
    4972           4 :         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
    4973           4 :         if (!C)
    4974             :           return false;
    4975           4 :         IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
    4976           4 :         if (!IdxN)
    4977             :           return false;
    4978             :         IdxNIsKill = true;
    4979             :       }
    4980           5 :       N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
    4981           5 :       if (!N)
    4982             :         return false;
    4983             :     }
    4984             :   }
    4985          20 :   if (TotalOffs) {
    4986          13 :     N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
    4987          13 :     if (!N)
    4988             :       return false;
    4989             :   }
    4990          20 :   updateValueMap(I, N);
    4991          20 :   return true;
    4992             : }
    4993             : 
    4994           3 : bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
    4995             :   assert(TM.getOptLevel() == CodeGenOpt::None &&
    4996             :          "cmpxchg survived AtomicExpand at optlevel > -O0");
    4997             : 
    4998           6 :   auto *RetPairTy = cast<StructType>(I->getType());
    4999           3 :   Type *RetTy = RetPairTy->getTypeAtIndex(0U);
    5000             :   assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
    5001             :          "cmpxchg has a non-i1 status result");
    5002             : 
    5003           3 :   MVT VT;
    5004           3 :   if (!isTypeLegal(RetTy, VT))
    5005             :     return false;
    5006             : 
    5007             :   const TargetRegisterClass *ResRC;
    5008             :   unsigned Opc, CmpOpc;
    5009             :   // This only supports i32/i64, because i8/i16 aren't legal, and the generic
    5010             :   // extractvalue selection doesn't support that.
    5011           6 :   if (VT == MVT::i32) {
    5012             :     Opc = AArch64::CMP_SWAP_32;
    5013             :     CmpOpc = AArch64::SUBSWrs;
    5014             :     ResRC = &AArch64::GPR32RegClass;
    5015           1 :   } else if (VT == MVT::i64) {
    5016             :     Opc = AArch64::CMP_SWAP_64;
    5017             :     CmpOpc = AArch64::SUBSXrs;
    5018             :     ResRC = &AArch64::GPR64RegClass;
    5019             :   } else {
    5020             :     return false;
    5021             :   }
    5022             : 
    5023           6 :   const MCInstrDesc &II = TII.get(Opc);
    5024             : 
    5025           9 :   const unsigned AddrReg = constrainOperandRegClass(
    5026           3 :       II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
    5027           6 :   const unsigned DesiredReg = constrainOperandRegClass(
    5028           3 :       II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
    5029           6 :   const unsigned NewReg = constrainOperandRegClass(
    5030           3 :       II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
    5031             : 
    5032           3 :   const unsigned ResultReg1 = createResultReg(ResRC);
    5033           3 :   const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
    5034           3 :   const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
    5035             : 
    5036             :   // FIXME: MachineMemOperand doesn't support cmpxchg yet.
    5037           6 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    5038           3 :       .addDef(ResultReg1)
    5039           3 :       .addDef(ScratchReg)
    5040           3 :       .addUse(AddrReg)
    5041           3 :       .addUse(DesiredReg)
    5042           3 :       .addUse(NewReg);
    5043             : 
    5044           9 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
    5045           9 :       .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
    5046           3 :       .addUse(ResultReg1)
    5047           3 :       .addUse(DesiredReg)
    5048           3 :       .addImm(0);
    5049             : 
    5050           9 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
    5051           3 :       .addDef(ResultReg2)
    5052           3 :       .addUse(AArch64::WZR)
    5053           3 :       .addUse(AArch64::WZR)
    5054           3 :       .addImm(AArch64CC::NE);
    5055             : 
    5056             :   assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
    5057           3 :   updateValueMap(I, ResultReg1, 2);
    5058           3 :   return true;
    5059             : }
    5060             : 
    5061        4007 : bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
    5062        4007 :   switch (I->getOpcode()) {
    5063             :   default:
    5064             :     break;
    5065         268 :   case Instruction::Add:
    5066             :   case Instruction::Sub:
    5067         268 :     return selectAddSub(I);
    5068          10 :   case Instruction::Mul:
    5069          10 :     return selectMul(I);
    5070          14 :   case Instruction::SDiv:
    5071          14 :     return selectSDiv(I);
    5072           4 :   case Instruction::SRem:
    5073           4 :     if (!selectBinaryOp(I, ISD::SREM))
    5074           4 :       return selectRem(I, ISD::SREM);
    5075             :     return true;
    5076           4 :   case Instruction::URem:
    5077           4 :     if (!selectBinaryOp(I, ISD::UREM))
    5078           4 :       return selectRem(I, ISD::UREM);
    5079             :     return true;
    5080         105 :   case Instruction::Shl:
    5081             :   case Instruction::LShr:
    5082             :   case Instruction::AShr:
    5083         105 :     return selectShift(I);
    5084          89 :   case Instruction::And:
    5085             :   case Instruction::Or:
    5086             :   case Instruction::Xor:
    5087          89 :     return selectLogicalOp(I);
    5088         297 :   case Instruction::Br:
    5089         297 :     return selectBranch(I);
    5090           1 :   case Instruction::IndirectBr:
    5091           1 :     return selectIndirectBr(I);
    5092          50 :   case Instruction::BitCast:
    5093          50 :     if (!FastISel::selectBitCast(I))
    5094          22 :       return selectBitCast(I);
    5095             :     return true;
    5096          12 :   case Instruction::FPToSI:
    5097          12 :     if (!selectCast(I, ISD::FP_TO_SINT))
    5098           1 :       return selectFPToInt(I, /*Signed=*/true);
    5099             :     return true;
    5100          18 :   case Instruction::FPToUI:
    5101          18 :     return selectFPToInt(I, /*Signed=*/false);
    5102         203 :   case Instruction::ZExt:
    5103             :   case Instruction::SExt:
    5104         203 :     return selectIntExt(I);
    5105          26 :   case Instruction::Trunc:
    5106          26 :     if (!selectCast(I, ISD::TRUNCATE))
    5107          14 :       return selectTrunc(I);
    5108             :     return true;
    5109           7 :   case Instruction::FPExt:
    5110           7 :     return selectFPExt(I);
    5111           2 :   case Instruction::FPTrunc:
    5112           2 :     return selectFPTrunc(I);
    5113          25 :   case Instruction::SIToFP:
    5114          25 :     if (!selectCast(I, ISD::SINT_TO_FP))
    5115           9 :       return selectIntToFP(I, /*Signed=*/true);
    5116             :     return true;
    5117          21 :   case Instruction::UIToFP:
    5118          21 :     return selectIntToFP(I, /*Signed=*/false);
    5119         367 :   case Instruction::Load:
    5120         367 :     return selectLoad(I);
    5121         413 :   case Instruction::Store:
    5122         413 :     return selectStore(I);
    5123          57 :   case Instruction::FCmp:
    5124             :   case Instruction::ICmp:
    5125          57 :     return selectCmp(I);
    5126          53 :   case Instruction::Select:
    5127          53 :     return selectSelect(I);
    5128        1300 :   case Instruction::Ret:
    5129        1300 :     return selectRet(I);
    5130           4 :   case Instruction::FRem:
    5131           4 :     return selectFRem(I);
    5132          20 :   case Instruction::GetElementPtr:
    5133          20 :     return selectGetElementPtr(I);
    5134           3 :   case Instruction::AtomicCmpXchg:
    5135           3 :     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
    5136             :   }
    5137             : 
    5138             :   // fall-back to target-independent instruction selection.
    5139        1268 :   return selectOperator(I, I->getOpcode());
    5140             :   // Silence warnings.
    5141             :   (void)&CC_AArch64_DarwinPCS_VarArg;
    5142             :   (void)&CC_AArch64_Win64_VarArg;
    5143             : }
    5144             : 
    5145             : namespace llvm {
    5146             : 
    5147        1214 : FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
    5148             :                                         const TargetLibraryInfo *LibInfo) {
    5149        1214 :   return new AArch64FastISel(FuncInfo, LibInfo);
    5150             : }
    5151             : 
    5152             : } // end namespace llvm

Generated by: LCOV version 1.13