LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64FastISel.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 1844 2000 92.2 %
Date: 2018-10-20 13:21:21 Functions: 73 95 76.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file defines the AArch64-specific support for the FastISel class. Some
      11             : // of the target-specific code is generated by tablegen in the file
      12             : // AArch64GenFastISel.inc, which is #included here.
      13             : //
      14             : //===----------------------------------------------------------------------===//
      15             : 
      16             : #include "AArch64.h"
      17             : #include "AArch64CallingConvention.h"
      18             : #include "AArch64RegisterInfo.h"
      19             : #include "AArch64Subtarget.h"
      20             : #include "MCTargetDesc/AArch64AddressingModes.h"
      21             : #include "Utils/AArch64BaseInfo.h"
      22             : #include "llvm/ADT/APFloat.h"
      23             : #include "llvm/ADT/APInt.h"
      24             : #include "llvm/ADT/DenseMap.h"
      25             : #include "llvm/ADT/SmallVector.h"
      26             : #include "llvm/Analysis/BranchProbabilityInfo.h"
      27             : #include "llvm/CodeGen/CallingConvLower.h"
      28             : #include "llvm/CodeGen/FastISel.h"
      29             : #include "llvm/CodeGen/FunctionLoweringInfo.h"
      30             : #include "llvm/CodeGen/ISDOpcodes.h"
      31             : #include "llvm/CodeGen/MachineBasicBlock.h"
      32             : #include "llvm/CodeGen/MachineConstantPool.h"
      33             : #include "llvm/CodeGen/MachineFrameInfo.h"
      34             : #include "llvm/CodeGen/MachineInstr.h"
      35             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      36             : #include "llvm/CodeGen/MachineMemOperand.h"
      37             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      38             : #include "llvm/CodeGen/RuntimeLibcalls.h"
      39             : #include "llvm/CodeGen/ValueTypes.h"
      40             : #include "llvm/IR/Argument.h"
      41             : #include "llvm/IR/Attributes.h"
      42             : #include "llvm/IR/BasicBlock.h"
      43             : #include "llvm/IR/CallingConv.h"
      44             : #include "llvm/IR/Constant.h"
      45             : #include "llvm/IR/Constants.h"
      46             : #include "llvm/IR/DataLayout.h"
      47             : #include "llvm/IR/DerivedTypes.h"
      48             : #include "llvm/IR/Function.h"
      49             : #include "llvm/IR/GetElementPtrTypeIterator.h"
      50             : #include "llvm/IR/GlobalValue.h"
      51             : #include "llvm/IR/InstrTypes.h"
      52             : #include "llvm/IR/Instruction.h"
      53             : #include "llvm/IR/Instructions.h"
      54             : #include "llvm/IR/IntrinsicInst.h"
      55             : #include "llvm/IR/Intrinsics.h"
      56             : #include "llvm/IR/Operator.h"
      57             : #include "llvm/IR/Type.h"
      58             : #include "llvm/IR/User.h"
      59             : #include "llvm/IR/Value.h"
      60             : #include "llvm/MC/MCInstrDesc.h"
      61             : #include "llvm/MC/MCRegisterInfo.h"
      62             : #include "llvm/MC/MCSymbol.h"
      63             : #include "llvm/Support/AtomicOrdering.h"
      64             : #include "llvm/Support/Casting.h"
      65             : #include "llvm/Support/CodeGen.h"
      66             : #include "llvm/Support/Compiler.h"
      67             : #include "llvm/Support/ErrorHandling.h"
      68             : #include "llvm/Support/MachineValueType.h"
      69             : #include "llvm/Support/MathExtras.h"
      70             : #include <algorithm>
      71             : #include <cassert>
      72             : #include <cstdint>
      73             : #include <iterator>
      74             : #include <utility>
      75             : 
      76             : using namespace llvm;
      77             : 
      78             : namespace {
      79             : 
      80             : class AArch64FastISel final : public FastISel {
      81             :   class Address {
      82             :   public:
      83             :     using BaseKind = enum {
      84             :       RegBase,
      85             :       FrameIndexBase
      86             :     };
      87             : 
      88             :   private:
      89             :     BaseKind Kind = RegBase;
      90             :     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
      91             :     union {
      92             :       unsigned Reg;
      93             :       int FI;
      94             :     } Base;
      95             :     unsigned OffsetReg = 0;
      96             :     unsigned Shift = 0;
      97             :     int64_t Offset = 0;
      98             :     const GlobalValue *GV = nullptr;
      99             : 
     100             :   public:
     101          13 :     Address() { Base.Reg = 0; }
     102             : 
     103         172 :     void setKind(BaseKind K) { Kind = K; }
     104             :     BaseKind getKind() const { return Kind; }
     105          37 :     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
     106           0 :     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
     107           0 :     bool isRegBase() const { return Kind == RegBase; }
     108           0 :     bool isFIBase() const { return Kind == FrameIndexBase; }
     109             : 
     110           0 :     void setReg(unsigned Reg) {
     111             :       assert(isRegBase() && "Invalid base register access!");
     112           3 :       Base.Reg = Reg;
     113           0 :     }
     114             : 
     115           0 :     unsigned getReg() const {
     116             :       assert(isRegBase() && "Invalid base register access!");
     117           0 :       return Base.Reg;
     118             :     }
     119             : 
     120           0 :     void setOffsetReg(unsigned Reg) {
     121         525 :       OffsetReg = Reg;
     122           0 :     }
     123             : 
     124           0 :     unsigned getOffsetReg() const {
     125           0 :       return OffsetReg;
     126             :     }
     127             : 
     128           0 :     void setFI(unsigned FI) {
     129             :       assert(isFIBase() && "Invalid base frame index  access!");
     130         169 :       Base.FI = FI;
     131           0 :     }
     132             : 
     133           0 :     unsigned getFI() const {
     134             :       assert(isFIBase() && "Invalid base frame index access!");
     135         174 :       return Base.FI;
     136             :     }
     137             : 
     138           8 :     void setOffset(int64_t O) { Offset = O; }
     139           0 :     int64_t getOffset() { return Offset; }
     140          43 :     void setShift(unsigned S) { Shift = S; }
     141           0 :     unsigned getShift() { return Shift; }
     142             : 
     143          84 :     void setGlobalValue(const GlobalValue *G) { GV = G; }
     144           0 :     const GlobalValue *getGlobalValue() { return GV; }
     145             :   };
     146             : 
     147             :   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
     148             :   /// make the right decision when generating code for different targets.
     149             :   const AArch64Subtarget *Subtarget;
     150             :   LLVMContext *Context;
     151             : 
     152             :   bool fastLowerArguments() override;
     153             :   bool fastLowerCall(CallLoweringInfo &CLI) override;
     154             :   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
     155             : 
     156             : private:
     157             :   // Selection routines.
     158             :   bool selectAddSub(const Instruction *I);
     159             :   bool selectLogicalOp(const Instruction *I);
     160             :   bool selectLoad(const Instruction *I);
     161             :   bool selectStore(const Instruction *I);
     162             :   bool selectBranch(const Instruction *I);
     163             :   bool selectIndirectBr(const Instruction *I);
     164             :   bool selectCmp(const Instruction *I);
     165             :   bool selectSelect(const Instruction *I);
     166             :   bool selectFPExt(const Instruction *I);
     167             :   bool selectFPTrunc(const Instruction *I);
     168             :   bool selectFPToInt(const Instruction *I, bool Signed);
     169             :   bool selectIntToFP(const Instruction *I, bool Signed);
     170             :   bool selectRem(const Instruction *I, unsigned ISDOpcode);
     171             :   bool selectRet(const Instruction *I);
     172             :   bool selectTrunc(const Instruction *I);
     173             :   bool selectIntExt(const Instruction *I);
     174             :   bool selectMul(const Instruction *I);
     175             :   bool selectShift(const Instruction *I);
     176             :   bool selectBitCast(const Instruction *I);
     177             :   bool selectFRem(const Instruction *I);
     178             :   bool selectSDiv(const Instruction *I);
     179             :   bool selectGetElementPtr(const Instruction *I);
     180             :   bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
     181             : 
     182             :   // Utility helper routines.
     183             :   bool isTypeLegal(Type *Ty, MVT &VT);
     184             :   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
     185             :   bool isValueAvailable(const Value *V) const;
     186             :   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
     187             :   bool computeCallAddress(const Value *V, Address &Addr);
     188             :   bool simplifyAddress(Address &Addr, MVT VT);
     189             :   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
     190             :                             MachineMemOperand::Flags Flags,
     191             :                             unsigned ScaleFactor, MachineMemOperand *MMO);
     192             :   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
     193             :   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
     194             :                           unsigned Alignment);
     195             :   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
     196             :                          const Value *Cond);
     197             :   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
     198             :   bool optimizeSelect(const SelectInst *SI);
     199             :   std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
     200             : 
     201             :   // Emit helper routines.
     202             :   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
     203             :                       const Value *RHS, bool SetFlags = false,
     204             :                       bool WantResult = true,  bool IsZExt = false);
     205             :   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
     206             :                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
     207             :                          bool SetFlags = false, bool WantResult = true);
     208             :   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
     209             :                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
     210             :                          bool WantResult = true);
     211             :   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
     212             :                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
     213             :                          AArch64_AM::ShiftExtendType ShiftType,
     214             :                          uint64_t ShiftImm, bool SetFlags = false,
     215             :                          bool WantResult = true);
     216             :   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
     217             :                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
     218             :                           AArch64_AM::ShiftExtendType ExtType,
     219             :                           uint64_t ShiftImm, bool SetFlags = false,
     220             :                          bool WantResult = true);
     221             : 
     222             :   // Emit functions.
     223             :   bool emitCompareAndBranch(const BranchInst *BI);
     224             :   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
     225             :   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
     226             :   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
     227             :   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
     228             :   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
     229             :                     MachineMemOperand *MMO = nullptr);
     230             :   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
     231             :                  MachineMemOperand *MMO = nullptr);
     232             :   bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
     233             :                         MachineMemOperand *MMO = nullptr);
     234             :   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
     235             :   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
     236             :   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
     237             :                    bool SetFlags = false, bool WantResult = true,
     238             :                    bool IsZExt = false);
     239             :   unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
     240             :   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
     241             :                    bool SetFlags = false, bool WantResult = true,
     242             :                    bool IsZExt = false);
     243             :   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
     244             :                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
     245             :   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
     246             :                        unsigned RHSReg, bool RHSIsKill,
     247             :                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
     248             :                        bool WantResult = true);
     249             :   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
     250             :                          const Value *RHS);
     251             :   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
     252             :                             bool LHSIsKill, uint64_t Imm);
     253             :   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
     254             :                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
     255             :                             uint64_t ShiftImm);
     256             :   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
     257             :   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
     258             :                       unsigned Op1, bool Op1IsKill);
     259             :   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
     260             :                         unsigned Op1, bool Op1IsKill);
     261             :   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
     262             :                         unsigned Op1, bool Op1IsKill);
     263             :   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
     264             :                       unsigned Op1Reg, bool Op1IsKill);
     265             :   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
     266             :                       uint64_t Imm, bool IsZExt = true);
     267             :   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
     268             :                       unsigned Op1Reg, bool Op1IsKill);
     269             :   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
     270             :                       uint64_t Imm, bool IsZExt = true);
     271             :   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
     272             :                       unsigned Op1Reg, bool Op1IsKill);
     273             :   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
     274             :                       uint64_t Imm, bool IsZExt = false);
     275             : 
     276             :   unsigned materializeInt(const ConstantInt *CI, MVT VT);
     277             :   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
     278             :   unsigned materializeGV(const GlobalValue *GV);
     279             : 
     280             :   // Call handling routines.
     281             : private:
     282             :   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
     283             :   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
     284             :                        unsigned &NumBytes);
     285             :   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
     286             : 
     287             : public:
     288             :   // Backend specific FastISel code.
     289             :   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
     290             :   unsigned fastMaterializeConstant(const Constant *C) override;
     291             :   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
     292             : 
     293        1222 :   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
     294             :                            const TargetLibraryInfo *LibInfo)
     295        1222 :       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
     296        1222 :     Subtarget =
     297        1222 :         &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
     298        1222 :     Context = &FuncInfo.Fn->getContext();
     299        1222 :   }
     300             : 
     301             :   bool fastSelectInstruction(const Instruction *I) override;
     302             : 
     303             : #include "AArch64GenFastISel.inc"
     304             : };
     305             : 
     306             : } // end anonymous namespace
     307             : 
     308             : #include "AArch64GenCallingConv.inc"
     309             : 
     310             : /// Check if the sign-/zero-extend will be a noop.
     311          82 : static bool isIntExtFree(const Instruction *I) {
     312             :   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
     313             :          "Unexpected integer extend instruction.");
     314             :   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
     315             :          "Unexpected value type.");
     316             :   bool IsZExt = isa<ZExtInst>(I);
     317             : 
     318          82 :   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
     319           0 :     if (LI->hasOneUse())
     320             :       return true;
     321             : 
     322             :   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
     323          80 :     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
     324           4 :       return true;
     325             : 
     326             :   return false;
     327             : }
     328             : 
     329             : /// Determine the implicit scale factor that is applied by a memory
     330             : /// operation for a given value type.
     331             : static unsigned getImplicitScaleFactor(MVT VT) {
     332             :   switch (VT.SimpleTy) {
     333             :   default:
     334             :     return 0;    // invalid
     335             :   case MVT::i1:  // fall-through
     336             :   case MVT::i8:
     337             :     return 1;
     338             :   case MVT::i16:
     339             :     return 2;
     340             :   case MVT::i32: // fall-through
     341             :   case MVT::f32:
     342             :     return 4;
     343             :   case MVT::i64: // fall-through
     344             :   case MVT::f64:
     345             :     return 8;
     346             :   }
     347             : }
     348             : 
     349           0 : CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
     350         183 :   if (CC == CallingConv::WebKit_JS)
     351           0 :     return CC_AArch64_WebKit_JS;
     352         176 :   if (CC == CallingConv::GHC)
     353           0 :     return CC_AArch64_GHC;
     354         176 :   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
     355             : }
     356             : 
     357          16 : unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
     358             :   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
     359             :          "Alloca should always return a pointer.");
     360             : 
     361             :   // Don't handle dynamic allocas.
     362          16 :   if (!FuncInfo.StaticAllocaMap.count(AI))
     363           0 :     return 0;
     364             : 
     365             :   DenseMap<const AllocaInst *, int>::iterator SI =
     366          16 :       FuncInfo.StaticAllocaMap.find(AI);
     367             : 
     368          32 :   if (SI != FuncInfo.StaticAllocaMap.end()) {
     369          16 :     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
     370          32 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
     371          32 :             ResultReg)
     372          16 :         .addFrameIndex(SI->second)
     373             :         .addImm(0)
     374             :         .addImm(0);
     375          16 :     return ResultReg;
     376             :   }
     377             : 
     378             :   return 0;
     379             : }
     380             : 
     381         350 : unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
     382         350 :   if (VT > MVT::i64)
     383             :     return 0;
     384             : 
     385         350 :   if (!CI->isZero())
     386         231 :     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
     387             : 
     388             :   // Create a copy from the zero register to materialize a "0" value.
     389         119 :   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
     390             :                                                    : &AArch64::GPR32RegClass;
     391         119 :   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
     392         119 :   unsigned ResultReg = createResultReg(RC);
     393         357 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
     394         238 :           ResultReg).addReg(ZeroReg, getKillRegState(true));
     395         119 :   return ResultReg;
     396             : }
     397             : 
     398          19 : unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
     399             :   // Positive zero (+0.0) has to be materialized with a fmov from the zero
     400             :   // register, because the immediate version of fmov cannot encode zero.
     401          19 :   if (CFP->isNullValue())
     402           2 :     return fastMaterializeFloatZero(CFP);
     403             : 
     404          17 :   if (VT != MVT::f32 && VT != MVT::f64)
     405             :     return 0;
     406             : 
     407             :   const APFloat Val = CFP->getValueAPF();
     408             :   bool Is64Bit = (VT == MVT::f64);
     409             :   // This checks to see if we can use FMOV instructions to materialize
     410             :   // a constant, otherwise we have to materialize via the constant pool.
     411          34 :   if (TLI.isFPImmLegal(Val, VT)) {
     412             :     int Imm =
     413          10 :         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
     414             :     assert((Imm != -1) && "Cannot encode floating-point constant.");
     415          10 :     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
     416          10 :     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
     417             :   }
     418             : 
     419             :   // For the MachO large code model materialize the FP constant in code.
     420          14 :   if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
     421           4 :     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
     422           4 :     const TargetRegisterClass *RC = Is64Bit ?
     423             :         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
     424             : 
     425           4 :     unsigned TmpReg = createResultReg(RC);
     426           8 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
     427           8 :         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
     428             : 
     429           4 :     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
     430           8 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
     431           8 :             TII.get(TargetOpcode::COPY), ResultReg)
     432           4 :         .addReg(TmpReg, getKillRegState(true));
     433             : 
     434           4 :     return ResultReg;
     435             :   }
     436             : 
     437             :   // Materialize via constant pool.  MachineConstantPool wants an explicit
     438             :   // alignment.
     439           3 :   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
     440           3 :   if (Align == 0)
     441           0 :     Align = DL.getTypeAllocSize(CFP->getType());
     442             : 
     443           3 :   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
     444           3 :   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
     445           9 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
     446           6 :           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
     447             : 
     448           3 :   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
     449           3 :   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
     450           6 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
     451           3 :       .addReg(ADRPReg)
     452             :       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
     453           3 :   return ResultReg;
     454             : }
     455             : 
     456         128 : unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
     457             :   // We can't handle thread-local variables quickly yet.
     458         128 :   if (GV->isThreadLocal())
     459             :     return 0;
     460             : 
     461             :   // MachO still uses GOT for large code-model accesses, but ELF requires
     462             :   // movz/movk sequences, which FastISel doesn't handle yet.
     463         187 :   if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
     464             :     return 0;
     465             : 
     466          91 :   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
     467             : 
     468         182 :   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
     469          91 :   if (!DestEVT.isSimple())
     470             :     return 0;
     471             : 
     472          91 :   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
     473             :   unsigned ResultReg;
     474             : 
     475          91 :   if (OpFlags & AArch64II::MO_GOT) {
     476             :     // ADRP + LDRX
     477         135 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
     478          90 :             ADRPReg)
     479          45 :         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
     480             : 
     481          45 :     ResultReg = createResultReg(&AArch64::GPR64RegClass);
     482          45 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
     483          90 :             ResultReg)
     484          45 :         .addReg(ADRPReg)
     485             :         .addGlobalAddress(GV, 0,
     486          45 :                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags);
     487             :   } else {
     488             :     // ADRP + ADDX
     489         138 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
     490          92 :             ADRPReg)
     491          46 :         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
     492             : 
     493          46 :     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
     494          46 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
     495          92 :             ResultReg)
     496          46 :         .addReg(ADRPReg)
     497             :         .addGlobalAddress(GV, 0,
     498          46 :                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
     499             :         .addImm(0);
     500             :   }
     501             :   return ResultReg;
     502             : }
     503             : 
     504         537 : unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
     505         537 :   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
     506             : 
     507             :   // Only handle simple types.
     508         537 :   if (!CEVT.isSimple())
     509             :     return 0;
     510         537 :   MVT VT = CEVT.getSimpleVT();
     511             : 
     512             :   if (const auto *CI = dyn_cast<ConstantInt>(C))
     513         344 :     return materializeInt(CI, VT);
     514             :   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
     515          19 :     return materializeFP(CFP, VT);
     516             :   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
     517         121 :     return materializeGV(GV);
     518             : 
     519             :   return 0;
     520             : }
     521             : 
     522           2 : unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
     523             :   assert(CFP->isNullValue() &&
     524             :          "Floating-point constant is not a positive zero.");
     525           2 :   MVT VT;
     526           2 :   if (!isTypeLegal(CFP->getType(), VT))
     527             :     return 0;
     528             : 
     529           2 :   if (VT != MVT::f32 && VT != MVT::f64)
     530             :     return 0;
     531             : 
     532             :   bool Is64Bit = (VT == MVT::f64);
     533           2 :   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
     534           2 :   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
     535           2 :   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
     536             : }
     537             : 
     538             : /// Check if the multiply is by a power-of-2 constant.
     539         578 : static bool isMulPowOf2(const Value *I) {
     540             :   if (const auto *MI = dyn_cast<MulOperator>(I)) {
     541          23 :     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
     542           0 :       if (C->getValue().isPowerOf2())
     543             :         return true;
     544             :     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
     545          21 :       if (C->getValue().isPowerOf2())
     546          19 :         return true;
     547             :   }
     548             :   return false;
     549             : }
     550             : 
     551             : // Computes the address to get to an object.
     552        1201 : bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
     553             : {
     554             :   const User *U = nullptr;
     555             :   unsigned Opcode = Instruction::UserOp1;
     556             :   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
     557             :     // Don't walk into other basic blocks unless the object is an alloca from
     558             :     // another block, otherwise it may not have a virtual register assigned.
     559         762 :     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
     560         593 :         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
     561             :       Opcode = I->getOpcode();
     562             :       U = I;
     563             :     }
     564             :   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
     565             :     Opcode = C->getOpcode();
     566             :     U = C;
     567             :   }
     568             : 
     569        1201 :   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
     570         839 :     if (Ty->getAddressSpace() > 255)
     571             :       // Fast instruction selection doesn't support the special
     572             :       // address spaces.
     573             :       return false;
     574             : 
     575        1191 :   switch (Opcode) {
     576             :   default:
     577             :     break;
     578             :   case Instruction::BitCast:
     579             :     // Look through bitcasts.
     580          17 :     return computeAddress(U->getOperand(0), Addr, Ty);
     581             : 
     582         136 :   case Instruction::IntToPtr:
     583             :     // Look past no-op inttoptrs.
     584         544 :     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
     585             :         TLI.getPointerTy(DL))
     586         136 :       return computeAddress(U->getOperand(0), Addr, Ty);
     587             :     break;
     588             : 
     589           6 :   case Instruction::PtrToInt:
     590             :     // Look past no-op ptrtoints.
     591          18 :     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
     592           6 :       return computeAddress(U->getOperand(0), Addr, Ty);
     593             :     break;
     594             : 
     595          46 :   case Instruction::GetElementPtr: {
     596          46 :     Address SavedAddr = Addr;
     597          46 :     uint64_t TmpOffset = Addr.getOffset();
     598             : 
     599             :     // Iterate through the GEP folding the constants into offsets where
     600             :     // we can.
     601         126 :     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
     602         206 :          GTI != E; ++GTI) {
     603             :       const Value *Op = GTI.getOperand();
     604          17 :       if (StructType *STy = GTI.getStructTypeOrNull()) {
     605          17 :         const StructLayout *SL = DL.getStructLayout(STy);
     606          17 :         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
     607          17 :         TmpOffset += SL->getElementOffset(Idx);
     608             :       } else {
     609          64 :         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
     610             :         while (true) {
     611             :           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
     612             :             // Constant-offset addressing.
     613          63 :             TmpOffset += CI->getSExtValue() * S;
     614          63 :             break;
     615             :           }
     616           1 :           if (canFoldAddIntoGEP(U, Op)) {
     617             :             // A compatible add with a constant operand. Fold the constant.
     618             :             ConstantInt *CI =
     619           0 :                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
     620           0 :             TmpOffset += CI->getSExtValue() * S;
     621             :             // Iterate on the other operand.
     622             :             Op = cast<AddOperator>(Op)->getOperand(0);
     623             :             continue;
     624             :           }
     625             :           // Unsupported
     626           1 :           goto unsupported_gep;
     627           0 :         }
     628             :       }
     629             :     }
     630             : 
     631             :     // Try to grab the base operand now.
     632          45 :     Addr.setOffset(TmpOffset);
     633          45 :     if (computeAddress(U->getOperand(0), Addr, Ty))
     634          40 :       return true;
     635             : 
     636             :     // We failed, restore everything and try the other options.
     637           5 :     Addr = SavedAddr;
     638             : 
     639           6 :   unsupported_gep:
     640           6 :     break;
     641             :   }
     642             :   case Instruction::Alloca: {
     643             :     const AllocaInst *AI = cast<AllocaInst>(Obj);
     644             :     DenseMap<const AllocaInst *, int>::iterator SI =
     645         169 :         FuncInfo.StaticAllocaMap.find(AI);
     646         338 :     if (SI != FuncInfo.StaticAllocaMap.end()) {
     647             :       Addr.setKind(Address::FrameIndexBase);
     648         169 :       Addr.setFI(SI->second);
     649         169 :       return true;
     650             :     }
     651           0 :     break;
     652             :   }
     653             :   case Instruction::Add: {
     654             :     // Adds of constants are common and easy enough.
     655             :     const Value *LHS = U->getOperand(0);
     656             :     const Value *RHS = U->getOperand(1);
     657             : 
     658         117 :     if (isa<ConstantInt>(LHS))
     659             :       std::swap(LHS, RHS);
     660             : 
     661             :     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
     662          29 :       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
     663         117 :       return computeAddress(LHS, Addr, Ty);
     664             :     }
     665             : 
     666          88 :     Address Backup = Addr;
     667          88 :     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
     668             :       return true;
     669           0 :     Addr = Backup;
     670             : 
     671           0 :     break;
     672             :   }
     673             :   case Instruction::Sub: {
     674             :     // Subs of constants are common and easy enough.
     675             :     const Value *LHS = U->getOperand(0);
     676             :     const Value *RHS = U->getOperand(1);
     677             : 
     678             :     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
     679          21 :       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
     680          21 :       return computeAddress(LHS, Addr, Ty);
     681             :     }
     682             :     break;
     683             :   }
     684          26 :   case Instruction::Shl: {
     685          26 :     if (Addr.getOffsetReg())
     686             :       break;
     687             : 
     688             :     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
     689             :     if (!CI)
     690             :       break;
     691             : 
     692          23 :     unsigned Val = CI->getZExtValue();
     693          23 :     if (Val < 1 || Val > 3)
     694             :       break;
     695             : 
     696             :     uint64_t NumBytes = 0;
     697          23 :     if (Ty && Ty->isSized()) {
     698          23 :       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
     699          23 :       NumBytes = NumBits / 8;
     700             :       if (!isPowerOf2_64(NumBits))
     701             :         NumBytes = 0;
     702             :     }
     703             : 
     704          23 :     if (NumBytes != (1ULL << Val))
     705             :       break;
     706             : 
     707             :     Addr.setShift(Val);
     708             :     Addr.setExtendType(AArch64_AM::LSL);
     709             : 
     710             :     const Value *Src = U->getOperand(0);
     711             :     if (const auto *I = dyn_cast<Instruction>(Src)) {
     712          17 :       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
     713             :         // Fold the zext or sext when it won't become a noop.
     714             :         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
     715           9 :           if (!isIntExtFree(ZE) &&
     716           4 :               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
     717             :             Addr.setExtendType(AArch64_AM::UXTW);
     718             :             Src = ZE->getOperand(0);
     719             :           }
     720             :         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
     721          15 :           if (!isIntExtFree(SE) &&
     722           7 :               SE->getOperand(0)->getType()->isIntegerTy(32)) {
     723             :             Addr.setExtendType(AArch64_AM::SXTW);
     724             :             Src = SE->getOperand(0);
     725             :           }
     726             :         }
     727             :       }
     728             :     }
     729             : 
     730             :     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
     731           5 :       if (AI->getOpcode() == Instruction::And) {
     732             :         const Value *LHS = AI->getOperand(0);
     733             :         const Value *RHS = AI->getOperand(1);
     734             : 
     735             :         if (const auto *C = dyn_cast<ConstantInt>(LHS))
     736           0 :           if (C->getValue() == 0xffffffff)
     737             :             std::swap(LHS, RHS);
     738             : 
     739             :         if (const auto *C = dyn_cast<ConstantInt>(RHS))
     740           3 :           if (C->getValue() == 0xffffffff) {
     741             :             Addr.setExtendType(AArch64_AM::UXTW);
     742           3 :             unsigned Reg = getRegForValue(LHS);
     743           3 :             if (!Reg)
     744             :               return false;
     745           3 :             bool RegIsKill = hasTrivialKill(LHS);
     746           6 :             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
     747             :                                              AArch64::sub_32);
     748             :             Addr.setOffsetReg(Reg);
     749           3 :             return true;
     750             :           }
     751             :       }
     752             : 
     753          20 :     unsigned Reg = getRegForValue(Src);
     754          20 :     if (!Reg)
     755             :       return false;
     756             :     Addr.setOffsetReg(Reg);
     757          20 :     return true;
     758             :   }
     759          13 :   case Instruction::Mul: {
     760          13 :     if (Addr.getOffsetReg())
     761             :       break;
     762             : 
     763          13 :     if (!isMulPowOf2(U))
     764             :       break;
     765             : 
     766             :     const Value *LHS = U->getOperand(0);
     767             :     const Value *RHS = U->getOperand(1);
     768             : 
     769             :     // Canonicalize power-of-2 value to the RHS.
     770             :     if (const auto *C = dyn_cast<ConstantInt>(LHS))
     771           0 :       if (C->getValue().isPowerOf2())
     772             :         std::swap(LHS, RHS);
     773             : 
     774             :     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
     775             :     const auto *C = cast<ConstantInt>(RHS);
     776             :     unsigned Val = C->getValue().logBase2();
     777          13 :     if (Val < 1 || Val > 3)
     778             :       break;
     779             : 
     780             :     uint64_t NumBytes = 0;
     781          13 :     if (Ty && Ty->isSized()) {
     782          13 :       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
     783          13 :       NumBytes = NumBits / 8;
     784             :       if (!isPowerOf2_64(NumBits))
     785             :         NumBytes = 0;
     786             :     }
     787             : 
     788          13 :     if (NumBytes != (1ULL << Val))
     789             :       break;
     790             : 
     791             :     Addr.setShift(Val);
     792             :     Addr.setExtendType(AArch64_AM::LSL);
     793             : 
     794             :     const Value *Src = LHS;
     795             :     if (const auto *I = dyn_cast<Instruction>(Src)) {
     796           9 :       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
     797             :         // Fold the zext or sext when it won't become a noop.
     798             :         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
     799           7 :           if (!isIntExtFree(ZE) &&
     800           3 :               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
     801             :             Addr.setExtendType(AArch64_AM::UXTW);
     802             :             Src = ZE->getOperand(0);
     803             :           }
     804             :         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
     805           7 :           if (!isIntExtFree(SE) &&
     806           3 :               SE->getOperand(0)->getType()->isIntegerTy(32)) {
     807             :             Addr.setExtendType(AArch64_AM::SXTW);
     808             :             Src = SE->getOperand(0);
     809             :           }
     810             :         }
     811             :       }
     812             :     }
     813             : 
     814          13 :     unsigned Reg = getRegForValue(Src);
     815          13 :     if (!Reg)
     816             :       return false;
     817             :     Addr.setOffsetReg(Reg);
     818          13 :     return true;
     819             :   }
     820           3 :   case Instruction::And: {
     821           3 :     if (Addr.getOffsetReg())
     822             :       break;
     823             : 
     824           3 :     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
     825             :       break;
     826             : 
     827             :     const Value *LHS = U->getOperand(0);
     828             :     const Value *RHS = U->getOperand(1);
     829             : 
     830             :     if (const auto *C = dyn_cast<ConstantInt>(LHS))
     831           0 :       if (C->getValue() == 0xffffffff)
     832             :         std::swap(LHS, RHS);
     833             : 
     834             :     if (const auto *C = dyn_cast<ConstantInt>(RHS))
     835           1 :       if (C->getValue() == 0xffffffff) {
     836             :         Addr.setShift(0);
     837             :         Addr.setExtendType(AArch64_AM::LSL);
     838             :         Addr.setExtendType(AArch64_AM::UXTW);
     839             : 
     840           1 :         unsigned Reg = getRegForValue(LHS);
     841           1 :         if (!Reg)
     842             :           return false;
     843           1 :         bool RegIsKill = hasTrivialKill(LHS);
     844           2 :         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
     845             :                                          AArch64::sub_32);
     846             :         Addr.setOffsetReg(Reg);
     847           1 :         return true;
     848             :       }
     849             :     break;
     850             :   }
     851          20 :   case Instruction::SExt:
     852             :   case Instruction::ZExt: {
     853          20 :     if (!Addr.getReg() || Addr.getOffsetReg())
     854             :       break;
     855             : 
     856             :     const Value *Src = nullptr;
     857             :     // Fold the zext or sext when it won't become a noop.
     858             :     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
     859           0 :       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
     860             :         Addr.setExtendType(AArch64_AM::UXTW);
     861             :         Src = ZE->getOperand(0);
     862             :       }
     863             :     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
     864          40 :       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
     865             :         Addr.setExtendType(AArch64_AM::SXTW);
     866             :         Src = SE->getOperand(0);
     867             :       }
     868             :     }
     869             : 
     870          20 :     if (!Src)
     871             :       break;
     872             : 
     873             :     Addr.setShift(0);
     874          20 :     unsigned Reg = getRegForValue(Src);
     875          20 :     if (!Reg)
     876             :       return false;
     877             :     Addr.setOffsetReg(Reg);
     878          20 :     return true;
     879             :   }
     880             :   } // end switch
     881             : 
     882         628 :   if (Addr.isRegBase() && !Addr.getReg()) {
     883         594 :     unsigned Reg = getRegForValue(Obj);
     884         594 :     if (!Reg)
     885             :       return false;
     886             :     Addr.setReg(Reg);
     887         569 :     return true;
     888             :   }
     889             : 
     890          34 :   if (!Addr.getOffsetReg()) {
     891          34 :     unsigned Reg = getRegForValue(Obj);
     892          34 :     if (!Reg)
     893             :       return false;
     894             :     Addr.setOffsetReg(Reg);
     895          34 :     return true;
     896             :   }
     897             : 
     898             :   return false;
     899             : }
     900             : 
     901         103 : bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
     902             :   const User *U = nullptr;
     903             :   unsigned Opcode = Instruction::UserOp1;
     904             :   bool InMBB = true;
     905             : 
     906             :   if (const auto *I = dyn_cast<Instruction>(V)) {
     907             :     Opcode = I->getOpcode();
     908             :     U = I;
     909          14 :     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
     910             :   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
     911             :     Opcode = C->getOpcode();
     912             :     U = C;
     913             :   }
     914             : 
     915         117 :   switch (Opcode) {
     916             :   default: break;
     917           2 :   case Instruction::BitCast:
     918             :     // Look past bitcasts if its operand is in the same BB.
     919           2 :     if (InMBB)
     920           2 :       return computeCallAddress(U->getOperand(0), Addr);
     921             :     break;
     922          12 :   case Instruction::IntToPtr:
     923             :     // Look past no-op inttoptrs if its operand is in the same BB.
     924          12 :     if (InMBB &&
     925          48 :         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
     926             :             TLI.getPointerTy(DL))
     927          12 :       return computeCallAddress(U->getOperand(0), Addr);
     928             :     break;
     929           0 :   case Instruction::PtrToInt:
     930             :     // Look past no-op ptrtoints if its operand is in the same BB.
     931           0 :     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
     932           0 :       return computeCallAddress(U->getOperand(0), Addr);
     933             :     break;
     934             :   }
     935             : 
     936             :   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     937             :     Addr.setGlobalValue(GV);
     938          84 :     return true;
     939             :   }
     940             : 
     941             :   // If all else fails, try to materialize the value in a register.
     942          19 :   if (!Addr.getGlobalValue()) {
     943          19 :     Addr.setReg(getRegForValue(V));
     944          19 :     return Addr.getReg() != 0;
     945             :   }
     946             : 
     947             :   return false;
     948             : }
     949             : 
     950           0 : bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
     951           0 :   EVT evt = TLI.getValueType(DL, Ty, true);
     952             : 
     953             :   // Only handle simple types.
     954           0 :   if (evt == MVT::Other || !evt.isSimple())
     955           0 :     return false;
     956           0 :   VT = evt.getSimpleVT();
     957             : 
     958             :   // This is a legal type, but it's not something we handle in fast-isel.
     959           0 :   if (VT == MVT::f128)
     960           0 :     return false;
     961             : 
     962             :   // Handle all other legal types, i.e. a register that will directly hold this
     963             :   // value.
     964           0 :   return TLI.isTypeLegal(VT);
     965             : }
     966             : 
     967             : /// Determine if the value type is supported by FastISel.
     968             : ///
     969             : /// FastISel for AArch64 can handle more value types than are legal. This adds
     970             : /// simple value type such as i1, i8, and i16.
     971        1931 : bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
     972        1931 :   if (Ty->isVectorTy() && !IsVectorAllowed)
     973             :     return false;
     974             : 
     975        1925 :   if (isTypeLegal(Ty, VT))
     976             :     return true;
     977             : 
     978             :   // If this is a type than can be sign or zero-extended to a basic operation
     979             :   // go ahead and accept it now.
     980         496 :   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
     981         473 :     return true;
     982             : 
     983             :   return false;
     984             : }
     985             : 
     986           0 : bool AArch64FastISel::isValueAvailable(const Value *V) const {
     987           0 :   if (!isa<Instruction>(V))
     988           0 :     return true;
     989             : 
     990             :   const auto *I = cast<Instruction>(V);
     991           0 :   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
     992             : }
     993             : 
     994         827 : bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
     995             :   unsigned ScaleFactor = getImplicitScaleFactor(VT);
     996         603 :   if (!ScaleFactor)
     997             :     return false;
     998             : 
     999             :   bool ImmediateOffsetNeedsLowering = false;
    1000             :   bool RegisterOffsetNeedsLowering = false;
    1001         603 :   int64_t Offset = Addr.getOffset();
    1002         603 :   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
    1003             :     ImmediateOffsetNeedsLowering = true;
    1004         599 :   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
    1005         105 :            !isUInt<12>(Offset / ScaleFactor))
    1006             :     ImmediateOffsetNeedsLowering = true;
    1007             : 
    1008             :   // Cannot encode an offset register and an immediate offset in the same
    1009             :   // instruction. Fold the immediate offset into the load/store instruction and
    1010             :   // emit an additional add to take care of the offset register.
    1011         593 :   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
    1012             :     RegisterOffsetNeedsLowering = true;
    1013             : 
    1014             :   // Cannot encode zero register as base.
    1015         603 :   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
    1016             :     RegisterOffsetNeedsLowering = true;
    1017             : 
    1018             :   // If this is a stack pointer and the offset needs to be simplified then put
    1019             :   // the alloca address into a register, set the base type back to register and
    1020             :   // continue. This should almost never happen.
    1021         603 :   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
    1022             :   {
    1023           3 :     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
    1024           6 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
    1025           6 :             ResultReg)
    1026           3 :       .addFrameIndex(Addr.getFI())
    1027             :       .addImm(0)
    1028             :       .addImm(0);
    1029             :     Addr.setKind(Address::RegBase);
    1030             :     Addr.setReg(ResultReg);
    1031             :   }
    1032             : 
    1033         603 :   if (RegisterOffsetNeedsLowering) {
    1034             :     unsigned ResultReg = 0;
    1035           7 :     if (Addr.getReg()) {
    1036           4 :       if (Addr.getExtendType() == AArch64_AM::SXTW ||
    1037             :           Addr.getExtendType() == AArch64_AM::UXTW   )
    1038           1 :         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
    1039             :                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
    1040             :                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
    1041           1 :                                   Addr.getShift());
    1042             :       else
    1043           3 :         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
    1044             :                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
    1045             :                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
    1046           3 :                                   Addr.getShift());
    1047             :     } else {
    1048           3 :       if (Addr.getExtendType() == AArch64_AM::UXTW)
    1049           0 :         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
    1050           0 :                                /*Op0IsKill=*/false, Addr.getShift(),
    1051             :                                /*IsZExt=*/true);
    1052           3 :       else if (Addr.getExtendType() == AArch64_AM::SXTW)
    1053           1 :         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
    1054           1 :                                /*Op0IsKill=*/false, Addr.getShift(),
    1055             :                                /*IsZExt=*/false);
    1056             :       else
    1057           2 :         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
    1058           2 :                                /*Op0IsKill=*/false, Addr.getShift());
    1059             :     }
    1060           7 :     if (!ResultReg)
    1061             :       return false;
    1062             : 
    1063             :     Addr.setReg(ResultReg);
    1064             :     Addr.setOffsetReg(0);
    1065             :     Addr.setShift(0);
    1066             :     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
    1067             :   }
    1068             : 
    1069             :   // Since the offset is too large for the load/store instruction get the
    1070             :   // reg+offset into a register.
    1071         603 :   if (ImmediateOffsetNeedsLowering) {
    1072             :     unsigned ResultReg;
    1073          10 :     if (Addr.getReg())
    1074             :       // Try to fold the immediate into the add instruction.
    1075          10 :       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
    1076             :     else
    1077           0 :       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
    1078             : 
    1079          10 :     if (!ResultReg)
    1080             :       return false;
    1081             :     Addr.setReg(ResultReg);
    1082             :     Addr.setOffset(0);
    1083             :   }
    1084             :   return true;
    1085             : }
    1086             : 
    1087         601 : void AArch64FastISel::addLoadStoreOperands(Address &Addr,
    1088             :                                            const MachineInstrBuilder &MIB,
    1089             :                                            MachineMemOperand::Flags Flags,
    1090             :                                            unsigned ScaleFactor,
    1091             :                                            MachineMemOperand *MMO) {
    1092         601 :   int64_t Offset = Addr.getOffset() / ScaleFactor;
    1093             :   // Frame base works a bit differently. Handle it separately.
    1094         601 :   if (Addr.isFIBase()) {
    1095         174 :     int FI = Addr.getFI();
    1096             :     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
    1097             :     // and alignment should be based on the VT.
    1098         174 :     MMO = FuncInfo.MF->getMachineMemOperand(
    1099         174 :         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
    1100         174 :         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
    1101             :     // Now add the rest of the operands.
    1102             :     MIB.addFrameIndex(FI).addImm(Offset);
    1103             :   } else {
    1104             :     assert(Addr.isRegBase() && "Unexpected address kind.");
    1105         427 :     const MCInstrDesc &II = MIB->getDesc();
    1106         427 :     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
    1107         427 :     Addr.setReg(
    1108         427 :       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
    1109         427 :     Addr.setOffsetReg(
    1110         854 :       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
    1111         427 :     if (Addr.getOffsetReg()) {
    1112             :       assert(Addr.getOffset() == 0 && "Unexpected offset");
    1113          84 :       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
    1114             :                       Addr.getExtendType() == AArch64_AM::SXTX;
    1115          84 :       MIB.addReg(Addr.getReg());
    1116          84 :       MIB.addReg(Addr.getOffsetReg());
    1117          84 :       MIB.addImm(IsSigned);
    1118          84 :       MIB.addImm(Addr.getShift() != 0);
    1119             :     } else
    1120         343 :       MIB.addReg(Addr.getReg()).addImm(Offset);
    1121             :   }
    1122             : 
    1123         601 :   if (MMO)
    1124             :     MIB.addMemOperand(MMO);
    1125         601 : }
    1126             : 
    1127         308 : unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
    1128             :                                      const Value *RHS, bool SetFlags,
    1129             :                                      bool WantResult,  bool IsZExt) {
    1130             :   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
    1131             :   bool NeedExtend = false;
    1132         308 :   switch (RetVT.SimpleTy) {
    1133             :   default:
    1134             :     return 0;
    1135           2 :   case MVT::i1:
    1136             :     NeedExtend = true;
    1137           2 :     break;
    1138           3 :   case MVT::i8:
    1139             :     NeedExtend = true;
    1140           3 :     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
    1141             :     break;
    1142           5 :   case MVT::i16:
    1143             :     NeedExtend = true;
    1144           5 :     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
    1145             :     break;
    1146             :   case MVT::i32:  // fall-through
    1147             :   case MVT::i64:
    1148             :     break;
    1149             :   }
    1150         308 :   MVT SrcVT = RetVT;
    1151         308 :   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
    1152             : 
    1153             :   // Canonicalize immediates to the RHS first.
    1154         308 :   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
    1155             :     std::swap(LHS, RHS);
    1156             : 
    1157             :   // Canonicalize mul by power of 2 to the RHS.
    1158         540 :   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
    1159         197 :     if (isMulPowOf2(LHS))
    1160             :       std::swap(LHS, RHS);
    1161             : 
    1162             :   // Canonicalize shift immediate to the RHS.
    1163         540 :   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
    1164             :     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
    1165          79 :       if (isa<ConstantInt>(SI->getOperand(1)))
    1166           4 :         if (SI->getOpcode() == Instruction::Shl  ||
    1167           7 :             SI->getOpcode() == Instruction::LShr ||
    1168             :             SI->getOpcode() == Instruction::AShr   )
    1169             :           std::swap(LHS, RHS);
    1170             : 
    1171         308 :   unsigned LHSReg = getRegForValue(LHS);
    1172         308 :   if (!LHSReg)
    1173             :     return 0;
    1174         308 :   bool LHSIsKill = hasTrivialKill(LHS);
    1175             : 
    1176         308 :   if (NeedExtend)
    1177          10 :     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
    1178             : 
    1179             :   unsigned ResultReg = 0;
    1180             :   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
    1181         113 :     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
    1182          57 :     if (C->isNegative())
    1183           8 :       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
    1184             :                                 SetFlags, WantResult);
    1185             :     else
    1186          49 :       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
    1187             :                                 WantResult);
    1188             :   } else if (const auto *C = dyn_cast<Constant>(RHS))
    1189           2 :     if (C->isNullValue())
    1190           2 :       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
    1191             :                                 WantResult);
    1192             : 
    1193          59 :   if (ResultReg)
    1194             :     return ResultReg;
    1195             : 
    1196             :   // Only extend the RHS within the instruction if there is a valid extend type.
    1197         264 :   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
    1198           6 :       isValueAvailable(RHS)) {
    1199             :     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
    1200             :       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
    1201           0 :         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
    1202           0 :           unsigned RHSReg = getRegForValue(SI->getOperand(0));
    1203           0 :           if (!RHSReg)
    1204             :             return 0;
    1205           0 :           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
    1206           0 :           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
    1207             :                                RHSIsKill, ExtendType, C->getZExtValue(),
    1208           0 :                                SetFlags, WantResult);
    1209             :         }
    1210           6 :     unsigned RHSReg = getRegForValue(RHS);
    1211           6 :     if (!RHSReg)
    1212             :       return 0;
    1213           6 :     bool RHSIsKill = hasTrivialKill(RHS);
    1214           6 :     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
    1215           6 :                          ExtendType, 0, SetFlags, WantResult);
    1216             :   }
    1217             : 
    1218             :   // Check if the mul can be folded into the instruction.
    1219         246 :   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
    1220         208 :     if (isMulPowOf2(RHS)) {
    1221           0 :       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
    1222             :       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
    1223             : 
    1224             :       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
    1225           0 :         if (C->getValue().isPowerOf2())
    1226             :           std::swap(MulLHS, MulRHS);
    1227             : 
    1228             :       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
    1229           0 :       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
    1230           0 :       unsigned RHSReg = getRegForValue(MulLHS);
    1231           0 :       if (!RHSReg)
    1232             :         return 0;
    1233           0 :       bool RHSIsKill = hasTrivialKill(MulLHS);
    1234           0 :       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
    1235             :                                 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
    1236             :                                 WantResult);
    1237           0 :       if (ResultReg)
    1238             :         return ResultReg;
    1239             :     }
    1240             :   }
    1241             : 
    1242             :   // Check if the shift can be folded into the instruction.
    1243         246 :   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
    1244             :     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
    1245             :       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
    1246             :         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
    1247             :         switch (SI->getOpcode()) {
    1248             :         default: break;
    1249             :         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
    1250             :         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
    1251             :         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
    1252             :         }
    1253             :         uint64_t ShiftVal = C->getZExtValue();
    1254          12 :         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
    1255          12 :           unsigned RHSReg = getRegForValue(SI->getOperand(0));
    1256          12 :           if (!RHSReg)
    1257             :             return 0;
    1258          12 :           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
    1259          12 :           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
    1260             :                                     RHSIsKill, ShiftType, ShiftVal, SetFlags,
    1261             :                                     WantResult);
    1262          12 :           if (ResultReg)
    1263             :             return ResultReg;
    1264             :         }
    1265             :       }
    1266             :     }
    1267             :   }
    1268             : 
    1269         236 :   unsigned RHSReg = getRegForValue(RHS);
    1270         236 :   if (!RHSReg)
    1271             :     return 0;
    1272         236 :   bool RHSIsKill = hasTrivialKill(RHS);
    1273             : 
    1274         236 :   if (NeedExtend)
    1275           1 :     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
    1276             : 
    1277         236 :   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
    1278         236 :                        SetFlags, WantResult);
    1279             : }
    1280             : 
    1281         245 : unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
    1282             :                                         bool LHSIsKill, unsigned RHSReg,
    1283             :                                         bool RHSIsKill, bool SetFlags,
    1284             :                                         bool WantResult) {
    1285             :   assert(LHSReg && RHSReg && "Invalid register number.");
    1286             : 
    1287         245 :   if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
    1288         243 :       RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
    1289             :     return 0;
    1290             : 
    1291         243 :   if (RetVT != MVT::i32 && RetVT != MVT::i64)
    1292             :     return 0;
    1293             : 
    1294             :   static const unsigned OpcTable[2][2][2] = {
    1295             :     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
    1296             :       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
    1297             :     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
    1298             :       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
    1299             :   };
    1300             :   bool Is64Bit = RetVT == MVT::i64;
    1301         243 :   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
    1302             :   const TargetRegisterClass *RC =
    1303         243 :       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    1304             :   unsigned ResultReg;
    1305         243 :   if (WantResult)
    1306         200 :     ResultReg = createResultReg(RC);
    1307             :   else
    1308          43 :     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
    1309             : 
    1310         243 :   const MCInstrDesc &II = TII.get(Opc);
    1311         486 :   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
    1312         486 :   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
    1313         243 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    1314         243 :       .addReg(LHSReg, getKillRegState(LHSIsKill))
    1315         243 :       .addReg(RHSReg, getKillRegState(RHSIsKill));
    1316         243 :   return ResultReg;
    1317             : }
    1318             : 
    1319         104 : unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
    1320             :                                         bool LHSIsKill, uint64_t Imm,
    1321             :                                         bool SetFlags, bool WantResult) {
    1322             :   assert(LHSReg && "Invalid register number.");
    1323             : 
    1324         104 :   if (RetVT != MVT::i32 && RetVT != MVT::i64)
    1325             :     return 0;
    1326             : 
    1327             :   unsigned ShiftImm;
    1328         104 :   if (isUInt<12>(Imm))
    1329             :     ShiftImm = 0;
    1330          12 :   else if ((Imm & 0xfff000) == Imm) {
    1331             :     ShiftImm = 12;
    1332           4 :     Imm >>= 12;
    1333             :   } else
    1334             :     return 0;
    1335             : 
    1336             :   static const unsigned OpcTable[2][2][2] = {
    1337             :     { { AArch64::SUBWri,  AArch64::SUBXri  },
    1338             :       { AArch64::ADDWri,  AArch64::ADDXri  }  },
    1339             :     { { AArch64::SUBSWri, AArch64::SUBSXri },
    1340             :       { AArch64::ADDSWri, AArch64::ADDSXri }  }
    1341             :   };
    1342             :   bool Is64Bit = RetVT == MVT::i64;
    1343          96 :   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
    1344             :   const TargetRegisterClass *RC;
    1345          96 :   if (SetFlags)
    1346          32 :     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    1347             :   else
    1348          64 :     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
    1349             :   unsigned ResultReg;
    1350          96 :   if (WantResult)
    1351          69 :     ResultReg = createResultReg(RC);
    1352             :   else
    1353          27 :     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
    1354             : 
    1355          96 :   const MCInstrDesc &II = TII.get(Opc);
    1356         192 :   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
    1357          96 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    1358          96 :       .addReg(LHSReg, getKillRegState(LHSIsKill))
    1359          96 :       .addImm(Imm)
    1360          96 :       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
    1361          96 :   return ResultReg;
    1362             : }
    1363             : 
    1364          28 : unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
    1365             :                                         bool LHSIsKill, unsigned RHSReg,
    1366             :                                         bool RHSIsKill,
    1367             :                                         AArch64_AM::ShiftExtendType ShiftType,
    1368             :                                         uint64_t ShiftImm, bool SetFlags,
    1369             :                                         bool WantResult) {
    1370             :   assert(LHSReg && RHSReg && "Invalid register number.");
    1371             :   assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
    1372             :          RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
    1373             : 
    1374          28 :   if (RetVT != MVT::i32 && RetVT != MVT::i64)
    1375             :     return 0;
    1376             : 
    1377             :   // Don't deal with undefined shifts.
    1378          28 :   if (ShiftImm >= RetVT.getSizeInBits())
    1379             :     return 0;
    1380             : 
    1381             :   static const unsigned OpcTable[2][2][2] = {
    1382             :     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
    1383             :       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
    1384             :     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
    1385             :       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
    1386             :   };
    1387             :   bool Is64Bit = RetVT == MVT::i64;
    1388          26 :   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
    1389             :   const TargetRegisterClass *RC =
    1390          26 :       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    1391             :   unsigned ResultReg;
    1392          26 :   if (WantResult)
    1393          17 :     ResultReg = createResultReg(RC);
    1394             :   else
    1395           9 :     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
    1396             : 
    1397          26 :   const MCInstrDesc &II = TII.get(Opc);
    1398          52 :   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
    1399          52 :   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
    1400          26 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    1401          26 :       .addReg(LHSReg, getKillRegState(LHSIsKill))
    1402          26 :       .addReg(RHSReg, getKillRegState(RHSIsKill))
    1403          26 :       .addImm(getShifterImm(ShiftType, ShiftImm));
    1404          26 :   return ResultReg;
    1405             : }
    1406             : 
    1407           7 : unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
    1408             :                                         bool LHSIsKill, unsigned RHSReg,
    1409             :                                         bool RHSIsKill,
    1410             :                                         AArch64_AM::ShiftExtendType ExtType,
    1411             :                                         uint64_t ShiftImm, bool SetFlags,
    1412             :                                         bool WantResult) {
    1413             :   assert(LHSReg && RHSReg && "Invalid register number.");
    1414             :   assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
    1415             :          RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
    1416             : 
    1417           7 :   if (RetVT != MVT::i32 && RetVT != MVT::i64)
    1418             :     return 0;
    1419             : 
    1420           7 :   if (ShiftImm >= 4)
    1421             :     return 0;
    1422             : 
    1423             :   static const unsigned OpcTable[2][2][2] = {
    1424             :     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
    1425             :       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
    1426             :     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
    1427             :       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
    1428             :   };
    1429             :   bool Is64Bit = RetVT == MVT::i64;
    1430           7 :   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
    1431             :   const TargetRegisterClass *RC = nullptr;
    1432           7 :   if (SetFlags)
    1433           5 :     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    1434             :   else
    1435           2 :     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
    1436             :   unsigned ResultReg;
    1437           7 :   if (WantResult)
    1438           2 :     ResultReg = createResultReg(RC);
    1439             :   else
    1440           5 :     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
    1441             : 
    1442           7 :   const MCInstrDesc &II = TII.get(Opc);
    1443          14 :   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
    1444          14 :   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
    1445           7 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    1446           7 :       .addReg(LHSReg, getKillRegState(LHSIsKill))
    1447           7 :       .addReg(RHSReg, getKillRegState(RHSIsKill))
    1448           7 :       .addImm(getArithExtendImm(ExtType, ShiftImm));
    1449           7 :   return ResultReg;
    1450             : }
    1451             : 
    1452         107 : bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
    1453         107 :   Type *Ty = LHS->getType();
    1454         107 :   EVT EVT = TLI.getValueType(DL, Ty, true);
    1455         107 :   if (!EVT.isSimple())
    1456             :     return false;
    1457             :   MVT VT = EVT.getSimpleVT();
    1458             : 
    1459         107 :   switch (VT.SimpleTy) {
    1460             :   default:
    1461             :     return false;
    1462          60 :   case MVT::i1:
    1463             :   case MVT::i8:
    1464             :   case MVT::i16:
    1465             :   case MVT::i32:
    1466             :   case MVT::i64:
    1467         120 :     return emitICmp(VT, LHS, RHS, IsZExt);
    1468          47 :   case MVT::f32:
    1469             :   case MVT::f64:
    1470          47 :     return emitFCmp(VT, LHS, RHS);
    1471             :   }
    1472             : }
    1473             : 
    1474             : bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
    1475             :                                bool IsZExt) {
    1476          60 :   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
    1477          60 :                  IsZExt) != 0;
    1478             : }
    1479             : 
    1480             : bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
    1481             :                                   uint64_t Imm) {
    1482          11 :   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
    1483             :                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
    1484             : }
    1485             : 
    1486          47 : bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
    1487          47 :   if (RetVT != MVT::f32 && RetVT != MVT::f64)
    1488             :     return false;
    1489             : 
    1490             :   // Check to see if the 2nd operand is a constant that we can encode directly
    1491             :   // in the compare.
    1492             :   bool UseImm = false;
    1493             :   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
    1494           4 :     if (CFP->isZero() && !CFP->isNegative())
    1495             :       UseImm = true;
    1496             : 
    1497          47 :   unsigned LHSReg = getRegForValue(LHS);
    1498          47 :   if (!LHSReg)
    1499             :     return false;
    1500          47 :   bool LHSIsKill = hasTrivialKill(LHS);
    1501             : 
    1502          47 :   if (UseImm) {
    1503           2 :     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
    1504           4 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
    1505           2 :         .addReg(LHSReg, getKillRegState(LHSIsKill));
    1506           2 :     return true;
    1507             :   }
    1508             : 
    1509          45 :   unsigned RHSReg = getRegForValue(RHS);
    1510          45 :   if (!RHSReg)
    1511             :     return false;
    1512          45 :   bool RHSIsKill = hasTrivialKill(RHS);
    1513             : 
    1514          45 :   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
    1515          90 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
    1516          45 :       .addReg(LHSReg, getKillRegState(LHSIsKill))
    1517          45 :       .addReg(RHSReg, getKillRegState(RHSIsKill));
    1518          45 :   return true;
    1519             : }
    1520             : 
    1521             : unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
    1522             :                                   bool SetFlags, bool WantResult, bool IsZExt) {
    1523         210 :   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
    1524             :                     IsZExt);
    1525             : }
    1526             : 
    1527             : /// This method is a wrapper to simplify add emission.
    1528             : ///
    1529             : /// First try to emit an add with an immediate operand using emitAddSub_ri. If
    1530             : /// that fails, then try to materialize the immediate into a register and use
    1531             : /// emitAddSub_rr instead.
    1532          34 : unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
    1533             :                                       int64_t Imm) {
    1534             :   unsigned ResultReg;
    1535          34 :   if (Imm < 0)
    1536           2 :     ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
    1537             :   else
    1538          32 :     ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
    1539             : 
    1540          34 :   if (ResultReg)
    1541             :     return ResultReg;
    1542             : 
    1543           5 :   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
    1544           5 :   if (!CReg)
    1545             :     return 0;
    1546             : 
    1547           5 :   ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
    1548           5 :   return ResultReg;
    1549             : }
    1550             : 
    1551             : unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
    1552             :                                   bool SetFlags, bool WantResult, bool IsZExt) {
    1553          63 :   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
    1554             :                     IsZExt);
    1555             : }
    1556             : 
    1557             : unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
    1558             :                                       bool LHSIsKill, unsigned RHSReg,
    1559             :                                       bool RHSIsKill, bool WantResult) {
    1560           4 :   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
    1561             :                        RHSIsKill, /*SetFlags=*/true, WantResult);
    1562             : }
    1563             : 
    1564             : unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
    1565             :                                       bool LHSIsKill, unsigned RHSReg,
    1566             :                                       bool RHSIsKill,
    1567             :                                       AArch64_AM::ShiftExtendType ShiftType,
    1568             :                                       uint64_t ShiftImm, bool WantResult) {
    1569           9 :   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
    1570             :                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
    1571             :                        WantResult);
    1572             : }
    1573             : 
    1574          89 : unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
    1575             :                                         const Value *LHS, const Value *RHS) {
    1576             :   // Canonicalize immediates to the RHS first.
    1577          89 :   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
    1578             :     std::swap(LHS, RHS);
    1579             : 
    1580             :   // Canonicalize mul by power-of-2 to the RHS.
    1581          89 :   if (LHS->hasOneUse() && isValueAvailable(LHS))
    1582          89 :     if (isMulPowOf2(LHS))
    1583             :       std::swap(LHS, RHS);
    1584             : 
    1585             :   // Canonicalize shift immediate to the RHS.
    1586          89 :   if (LHS->hasOneUse() && isValueAvailable(LHS))
    1587             :     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
    1588           0 :       if (isa<ConstantInt>(SI->getOperand(1)))
    1589             :         std::swap(LHS, RHS);
    1590             : 
    1591          89 :   unsigned LHSReg = getRegForValue(LHS);
    1592          89 :   if (!LHSReg)
    1593             :     return 0;
    1594          89 :   bool LHSIsKill = hasTrivialKill(LHS);
    1595             : 
    1596             :   unsigned ResultReg = 0;
    1597             :   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
    1598             :     uint64_t Imm = C->getZExtValue();
    1599          18 :     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
    1600             :   }
    1601          18 :   if (ResultReg)
    1602             :     return ResultReg;
    1603             : 
    1604             :   // Check if the mul can be folded into the instruction.
    1605          71 :   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
    1606          71 :     if (isMulPowOf2(RHS)) {
    1607           6 :       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
    1608             :       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
    1609             : 
    1610             :       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
    1611           0 :         if (C->getValue().isPowerOf2())
    1612             :           std::swap(MulLHS, MulRHS);
    1613             : 
    1614             :       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
    1615           6 :       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
    1616             : 
    1617           6 :       unsigned RHSReg = getRegForValue(MulLHS);
    1618           6 :       if (!RHSReg)
    1619             :         return 0;
    1620           6 :       bool RHSIsKill = hasTrivialKill(MulLHS);
    1621           6 :       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
    1622             :                                    RHSIsKill, ShiftVal);
    1623           6 :       if (ResultReg)
    1624             :         return ResultReg;
    1625             :     }
    1626             :   }
    1627             : 
    1628             :   // Check if the shift can be folded into the instruction.
    1629          65 :   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
    1630             :     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
    1631          24 :       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
    1632             :         uint64_t ShiftVal = C->getZExtValue();
    1633          24 :         unsigned RHSReg = getRegForValue(SI->getOperand(0));
    1634          24 :         if (!RHSReg)
    1635             :           return 0;
    1636          24 :         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
    1637          24 :         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
    1638             :                                      RHSIsKill, ShiftVal);
    1639          24 :         if (ResultReg)
    1640             :           return ResultReg;
    1641             :       }
    1642             :   }
    1643             : 
    1644          53 :   unsigned RHSReg = getRegForValue(RHS);
    1645          53 :   if (!RHSReg)
    1646             :     return 0;
    1647          53 :   bool RHSIsKill = hasTrivialKill(RHS);
    1648             : 
    1649          61 :   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
    1650          53 :   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
    1651          53 :   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
    1652          12 :     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
    1653          12 :     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
    1654             :   }
    1655             :   return ResultReg;
    1656             : }
    1657             : 
    1658         203 : unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
    1659             :                                            unsigned LHSReg, bool LHSIsKill,
    1660             :                                            uint64_t Imm) {
    1661             :   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
    1662             :                 "ISD nodes are not consecutive!");
    1663             :   static const unsigned OpcTable[3][2] = {
    1664             :     { AArch64::ANDWri, AArch64::ANDXri },
    1665             :     { AArch64::ORRWri, AArch64::ORRXri },
    1666             :     { AArch64::EORWri, AArch64::EORXri }
    1667             :   };
    1668             :   const TargetRegisterClass *RC;
    1669             :   unsigned Opc;
    1670             :   unsigned RegSize;
    1671         203 :   switch (RetVT.SimpleTy) {
    1672             :   default:
    1673             :     return 0;
    1674         197 :   case MVT::i1:
    1675             :   case MVT::i8:
    1676             :   case MVT::i16:
    1677             :   case MVT::i32: {
    1678         197 :     unsigned Idx = ISDOpc - ISD::AND;
    1679         197 :     Opc = OpcTable[Idx][0];
    1680             :     RC = &AArch64::GPR32spRegClass;
    1681             :     RegSize = 32;
    1682         197 :     break;
    1683             :   }
    1684           6 :   case MVT::i64:
    1685           6 :     Opc = OpcTable[ISDOpc - ISD::AND][1];
    1686             :     RC = &AArch64::GPR64spRegClass;
    1687             :     RegSize = 64;
    1688           6 :     break;
    1689             :   }
    1690             : 
    1691         203 :   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
    1692             :     return 0;
    1693             : 
    1694             :   unsigned ResultReg =
    1695         203 :       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
    1696             :                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
    1697         203 :   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
    1698           4 :     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
    1699           4 :     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
    1700             :   }
    1701             :   return ResultReg;
    1702             : }
    1703             : 
    1704          30 : unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
    1705             :                                            unsigned LHSReg, bool LHSIsKill,
    1706             :                                            unsigned RHSReg, bool RHSIsKill,
    1707             :                                            uint64_t ShiftImm) {
    1708             :   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
    1709             :                 "ISD nodes are not consecutive!");
    1710             :   static const unsigned OpcTable[3][2] = {
    1711             :     { AArch64::ANDWrs, AArch64::ANDXrs },
    1712             :     { AArch64::ORRWrs, AArch64::ORRXrs },
    1713             :     { AArch64::EORWrs, AArch64::EORXrs }
    1714             :   };
    1715             : 
    1716             :   // Don't deal with undefined shifts.
    1717          30 :   if (ShiftImm >= RetVT.getSizeInBits())
    1718             :     return 0;
    1719             : 
    1720             :   const TargetRegisterClass *RC;
    1721             :   unsigned Opc;
    1722          18 :   switch (RetVT.SimpleTy) {
    1723             :   default:
    1724             :     return 0;
    1725          12 :   case MVT::i1:
    1726             :   case MVT::i8:
    1727             :   case MVT::i16:
    1728             :   case MVT::i32:
    1729          12 :     Opc = OpcTable[ISDOpc - ISD::AND][0];
    1730             :     RC = &AArch64::GPR32RegClass;
    1731          12 :     break;
    1732           6 :   case MVT::i64:
    1733           6 :     Opc = OpcTable[ISDOpc - ISD::AND][1];
    1734             :     RC = &AArch64::GPR64RegClass;
    1735           6 :     break;
    1736             :   }
    1737             :   unsigned ResultReg =
    1738          18 :       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
    1739          18 :                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
    1740          18 :   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
    1741           6 :     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
    1742           6 :     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
    1743             :   }
    1744             :   return ResultReg;
    1745             : }
    1746             : 
    1747           4 : unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
    1748             :                                      uint64_t Imm) {
    1749         170 :   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
    1750             : }
    1751             : 
    1752         384 : unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
    1753             :                                    bool WantZExt, MachineMemOperand *MMO) {
    1754         768 :   if (!TLI.allowsMisalignedMemoryAccesses(VT))
    1755             :     return 0;
    1756             : 
    1757             :   // Simplify this down to something we can handle.
    1758         382 :   if (!simplifyAddress(Addr, VT))
    1759             :     return 0;
    1760             : 
    1761             :   unsigned ScaleFactor = getImplicitScaleFactor(VT);
    1762         310 :   if (!ScaleFactor)
    1763           0 :     llvm_unreachable("Unexpected value type.");
    1764             : 
    1765             :   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
    1766             :   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
    1767             :   bool UseScaled = true;
    1768         310 :   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
    1769             :     UseScaled = false;
    1770             :     ScaleFactor = 1;
    1771             :   }
    1772             : 
    1773             :   static const unsigned GPOpcTable[2][8][4] = {
    1774             :     // Sign-extend.
    1775             :     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
    1776             :         AArch64::LDURXi  },
    1777             :       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
    1778             :         AArch64::LDURXi  },
    1779             :       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
    1780             :         AArch64::LDRXui  },
    1781             :       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
    1782             :         AArch64::LDRXui  },
    1783             :       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
    1784             :         AArch64::LDRXroX },
    1785             :       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
    1786             :         AArch64::LDRXroX },
    1787             :       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
    1788             :         AArch64::LDRXroW },
    1789             :       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
    1790             :         AArch64::LDRXroW }
    1791             :     },
    1792             :     // Zero-extend.
    1793             :     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
    1794             :         AArch64::LDURXi  },
    1795             :       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
    1796             :         AArch64::LDURXi  },
    1797             :       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
    1798             :         AArch64::LDRXui  },
    1799             :       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
    1800             :         AArch64::LDRXui  },
    1801             :       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
    1802             :         AArch64::LDRXroX },
    1803             :       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
    1804             :         AArch64::LDRXroX },
    1805             :       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
    1806             :         AArch64::LDRXroW },
    1807             :       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
    1808             :         AArch64::LDRXroW }
    1809             :     }
    1810             :   };
    1811             : 
    1812             :   static const unsigned FPOpcTable[4][2] = {
    1813             :     { AArch64::LDURSi,  AArch64::LDURDi  },
    1814             :     { AArch64::LDRSui,  AArch64::LDRDui  },
    1815             :     { AArch64::LDRSroX, AArch64::LDRDroX },
    1816             :     { AArch64::LDRSroW, AArch64::LDRDroW }
    1817             :   };
    1818             : 
    1819             :   unsigned Opc;
    1820             :   const TargetRegisterClass *RC;
    1821         310 :   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
    1822         181 :                       Addr.getOffsetReg();
    1823         229 :   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
    1824         310 :   if (Addr.getExtendType() == AArch64_AM::UXTW ||
    1825             :       Addr.getExtendType() == AArch64_AM::SXTW)
    1826          39 :     Idx++;
    1827             : 
    1828             :   bool IsRet64Bit = RetVT == MVT::i64;
    1829         310 :   switch (VT.SimpleTy) {
    1830           0 :   default:
    1831           0 :     llvm_unreachable("Unexpected value type.");
    1832          53 :   case MVT::i1: // Intentional fall-through.
    1833             :   case MVT::i8:
    1834          53 :     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
    1835          53 :     RC = (IsRet64Bit && !WantZExt) ?
    1836             :              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
    1837             :     break;
    1838          42 :   case MVT::i16:
    1839          42 :     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
    1840          42 :     RC = (IsRet64Bit && !WantZExt) ?
    1841             :              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
    1842             :     break;
    1843         108 :   case MVT::i32:
    1844         108 :     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
    1845         108 :     RC = (IsRet64Bit && !WantZExt) ?
    1846             :              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
    1847             :     break;
    1848          94 :   case MVT::i64:
    1849          94 :     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
    1850             :     RC = &AArch64::GPR64RegClass;
    1851          94 :     break;
    1852           3 :   case MVT::f32:
    1853           3 :     Opc = FPOpcTable[Idx][0];
    1854             :     RC = &AArch64::FPR32RegClass;
    1855           3 :     break;
    1856          10 :   case MVT::f64:
    1857          10 :     Opc = FPOpcTable[Idx][1];
    1858             :     RC = &AArch64::FPR64RegClass;
    1859          10 :     break;
    1860             :   }
    1861             : 
    1862             :   // Create the base instruction, then add the operands.
    1863         310 :   unsigned ResultReg = createResultReg(RC);
    1864         310 :   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    1865         620 :                                     TII.get(Opc), ResultReg);
    1866         310 :   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
    1867             : 
    1868             :   // Loading an i1 requires special handling.
    1869         310 :   if (VT == MVT::i1) {
    1870           3 :     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
    1871             :     assert(ANDReg && "Unexpected AND instruction emission failure.");
    1872             :     ResultReg = ANDReg;
    1873             :   }
    1874             : 
    1875             :   // For zero-extending loads to 64bit we emit a 32bit load and then convert
    1876             :   // the 32bit reg to a 64bit reg.
    1877         310 :   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
    1878          20 :     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
    1879          20 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    1880          40 :             TII.get(AArch64::SUBREG_TO_REG), Reg64)
    1881             :         .addImm(0)
    1882          20 :         .addReg(ResultReg, getKillRegState(true))
    1883             :         .addImm(AArch64::sub_32);
    1884             :     ResultReg = Reg64;
    1885             :   }
    1886             :   return ResultReg;
    1887             : }
    1888             : 
    1889         284 : bool AArch64FastISel::selectAddSub(const Instruction *I) {
    1890         284 :   MVT VT;
    1891         284 :   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
    1892             :     return false;
    1893             : 
    1894         568 :   if (VT.isVector())
    1895         142 :     return selectOperator(I, I->getOpcode());
    1896             : 
    1897             :   unsigned ResultReg;
    1898         213 :   switch (I->getOpcode()) {
    1899           0 :   default:
    1900           0 :     llvm_unreachable("Unexpected instruction.");
    1901         210 :   case Instruction::Add:
    1902         210 :     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
    1903         210 :     break;
    1904           3 :   case Instruction::Sub:
    1905           3 :     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
    1906           3 :     break;
    1907             :   }
    1908         213 :   if (!ResultReg)
    1909             :     return false;
    1910             : 
    1911         213 :   updateValueMap(I, ResultReg);
    1912         213 :   return true;
    1913             : }
    1914             : 
    1915          89 : bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
    1916          89 :   MVT VT;
    1917          89 :   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
    1918             :     return false;
    1919             : 
    1920         178 :   if (VT.isVector())
    1921           0 :     return selectOperator(I, I->getOpcode());
    1922             : 
    1923             :   unsigned ResultReg;
    1924          89 :   switch (I->getOpcode()) {
    1925           0 :   default:
    1926           0 :     llvm_unreachable("Unexpected instruction.");
    1927          50 :   case Instruction::And:
    1928         100 :     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
    1929          50 :     break;
    1930          20 :   case Instruction::Or:
    1931          40 :     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
    1932          20 :     break;
    1933          19 :   case Instruction::Xor:
    1934          38 :     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
    1935          19 :     break;
    1936             :   }
    1937          89 :   if (!ResultReg)
    1938             :     return false;
    1939             : 
    1940          89 :   updateValueMap(I, ResultReg);
    1941          89 :   return true;
    1942             : }
    1943             : 
    1944         375 : bool AArch64FastISel::selectLoad(const Instruction *I) {
    1945         375 :   MVT VT;
    1946             :   // Verify we have a legal type before going any further.  Currently, we handle
    1947             :   // simple types that will directly fit in a register (i32/f32/i64/f64) or
    1948             :   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
    1949         745 :   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
    1950         370 :       cast<LoadInst>(I)->isAtomic())
    1951           5 :     return false;
    1952             : 
    1953         370 :   const Value *SV = I->getOperand(0);
    1954         370 :   if (TLI.supportSwiftError()) {
    1955             :     // Swifterror values can come from either a function parameter with
    1956             :     // swifterror attribute or an alloca with swifterror attribute.
    1957             :     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
    1958         103 :       if (Arg->hasSwiftErrorAttr())
    1959             :         return false;
    1960             :     }
    1961             : 
    1962             :     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
    1963          65 :       if (Alloca->isSwiftError())
    1964             :         return false;
    1965             :     }
    1966             :   }
    1967             : 
    1968             :   // See if we can handle this address.
    1969             :   Address Addr;
    1970         730 :   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
    1971             :     return false;
    1972             : 
    1973             :   // Fold the following sign-/zero-extend into the load instruction.
    1974             :   bool WantZExt = true;
    1975         343 :   MVT RetVT = VT;
    1976             :   const Value *IntExtVal = nullptr;
    1977         343 :   if (I->hasOneUse()) {
    1978         249 :     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
    1979          37 :       if (isTypeSupported(ZE->getType(), RetVT))
    1980             :         IntExtVal = ZE;
    1981             :       else
    1982           0 :         RetVT = VT;
    1983             :     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
    1984          38 :       if (isTypeSupported(SE->getType(), RetVT))
    1985             :         IntExtVal = SE;
    1986             :       else
    1987           0 :         RetVT = VT;
    1988             :       WantZExt = false;
    1989             :     }
    1990             :   }
    1991             : 
    1992             :   unsigned ResultReg =
    1993         343 :       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
    1994         343 :   if (!ResultReg)
    1995             :     return false;
    1996             : 
    1997             :   // There are a few different cases we have to handle, because the load or the
    1998             :   // sign-/zero-extend might not be selected by FastISel if we fall-back to
    1999             :   // SelectionDAG. There is also an ordering issue when both instructions are in
    2000             :   // different basic blocks.
    2001             :   // 1.) The load instruction is selected by FastISel, but the integer extend
    2002             :   //     not. This usually happens when the integer extend is in a different
    2003             :   //     basic block and SelectionDAG took over for that basic block.
    2004             :   // 2.) The load instruction is selected before the integer extend. This only
    2005             :   //     happens when the integer extend is in a different basic block.
    2006             :   // 3.) The load instruction is selected by SelectionDAG and the integer extend
    2007             :   //     by FastISel. This happens if there are instructions between the load
    2008             :   //     and the integer extend that couldn't be selected by FastISel.
    2009         269 :   if (IntExtVal) {
    2010             :     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
    2011             :     // could select it. Emit a copy to subreg if necessary. FastISel will remove
    2012             :     // it when it selects the integer extend.
    2013          75 :     unsigned Reg = lookUpRegForValue(IntExtVal);
    2014          75 :     auto *MI = MRI.getUniqueVRegDef(Reg);
    2015          75 :     if (!MI) {
    2016           1 :       if (RetVT == MVT::i64 && VT <= MVT::i32) {
    2017           1 :         if (WantZExt) {
    2018             :           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
    2019           1 :           std::prev(FuncInfo.InsertPt)->eraseFromParent();
    2020           1 :           ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
    2021             :         } else
    2022           0 :           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
    2023             :                                                  /*IsKill=*/true,
    2024             :                                                  AArch64::sub_32);
    2025             :       }
    2026           1 :       updateValueMap(I, ResultReg);
    2027           1 :       return true;
    2028             :     }
    2029             : 
    2030             :     // The integer extend has already been emitted - delete all the instructions
    2031             :     // that have been emitted by the integer extend lowering code and use the
    2032             :     // result from the load instruction directly.
    2033         186 :     while (MI) {
    2034             :       Reg = 0;
    2035         150 :       for (auto &Opnd : MI->uses()) {
    2036         150 :         if (Opnd.isReg()) {
    2037         112 :           Reg = Opnd.getReg();
    2038         112 :           break;
    2039             :         }
    2040             :       }
    2041         112 :       MI->eraseFromParent();
    2042             :       MI = nullptr;
    2043         112 :       if (Reg)
    2044         112 :         MI = MRI.getUniqueVRegDef(Reg);
    2045             :     }
    2046          74 :     updateValueMap(IntExtVal, ResultReg);
    2047          74 :     return true;
    2048             :   }
    2049             : 
    2050         194 :   updateValueMap(I, ResultReg);
    2051         194 :   return true;
    2052             : }
    2053             : 
    2054          16 : bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
    2055             :                                        unsigned AddrReg,
    2056             :                                        MachineMemOperand *MMO) {
    2057             :   unsigned Opc;
    2058          16 :   switch (VT.SimpleTy) {
    2059             :   default: return false;
    2060             :   case MVT::i8:  Opc = AArch64::STLRB; break;
    2061             :   case MVT::i16: Opc = AArch64::STLRH; break;
    2062             :   case MVT::i32: Opc = AArch64::STLRW; break;
    2063             :   case MVT::i64: Opc = AArch64::STLRX; break;
    2064             :   }
    2065             : 
    2066          16 :   const MCInstrDesc &II = TII.get(Opc);
    2067          16 :   SrcReg = constrainOperandRegClass(II, SrcReg, 0);
    2068          16 :   AddrReg = constrainOperandRegClass(II, AddrReg, 1);
    2069          16 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    2070          16 :       .addReg(SrcReg)
    2071          16 :       .addReg(AddrReg)
    2072             :       .addMemOperand(MMO);
    2073          16 :   return true;
    2074             : }
    2075             : 
    2076         445 : bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
    2077             :                                 MachineMemOperand *MMO) {
    2078         890 :   if (!TLI.allowsMisalignedMemoryAccesses(VT))
    2079             :     return false;
    2080             : 
    2081             :   // Simplify this down to something we can handle.
    2082         445 :   if (!simplifyAddress(Addr, VT))
    2083             :     return false;
    2084             : 
    2085             :   unsigned ScaleFactor = getImplicitScaleFactor(VT);
    2086         291 :   if (!ScaleFactor)
    2087           0 :     llvm_unreachable("Unexpected value type.");
    2088             : 
    2089             :   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
    2090             :   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
    2091             :   bool UseScaled = true;
    2092         291 :   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
    2093             :     UseScaled = false;
    2094             :     ScaleFactor = 1;
    2095             :   }
    2096             : 
    2097             :   static const unsigned OpcTable[4][6] = {
    2098             :     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
    2099             :       AArch64::STURSi,   AArch64::STURDi },
    2100             :     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
    2101             :       AArch64::STRSui,   AArch64::STRDui },
    2102             :     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
    2103             :       AArch64::STRSroX,  AArch64::STRDroX },
    2104             :     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
    2105             :       AArch64::STRSroW,  AArch64::STRDroW }
    2106             :   };
    2107             : 
    2108             :   unsigned Opc;
    2109             :   bool VTIsi1 = false;
    2110         291 :   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
    2111         133 :                       Addr.getOffsetReg();
    2112         288 :   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
    2113         291 :   if (Addr.getExtendType() == AArch64_AM::UXTW ||
    2114             :       Addr.getExtendType() == AArch64_AM::SXTW)
    2115           0 :     Idx++;
    2116             : 
    2117         291 :   switch (VT.SimpleTy) {
    2118           0 :   default: llvm_unreachable("Unexpected value type.");
    2119           8 :   case MVT::i1:  VTIsi1 = true; LLVM_FALLTHROUGH;
    2120          50 :   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
    2121          21 :   case MVT::i16: Opc = OpcTable[Idx][1]; break;
    2122         100 :   case MVT::i32: Opc = OpcTable[Idx][2]; break;
    2123         101 :   case MVT::i64: Opc = OpcTable[Idx][3]; break;
    2124           2 :   case MVT::f32: Opc = OpcTable[Idx][4]; break;
    2125          17 :   case MVT::f64: Opc = OpcTable[Idx][5]; break;
    2126             :   }
    2127             : 
    2128             :   // Storing an i1 requires special handling.
    2129         291 :   if (VTIsi1 && SrcReg != AArch64::WZR) {
    2130           7 :     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
    2131             :     assert(ANDReg && "Unexpected AND instruction emission failure.");
    2132             :     SrcReg = ANDReg;
    2133             :   }
    2134             :   // Create the base instruction, then add the operands.
    2135         291 :   const MCInstrDesc &II = TII.get(Opc);
    2136         582 :   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
    2137             :   MachineInstrBuilder MIB =
    2138         291 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
    2139         291 :   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
    2140             : 
    2141         291 :   return true;
    2142             : }
    2143             : 
    2144         421 : bool AArch64FastISel::selectStore(const Instruction *I) {
    2145         421 :   MVT VT;
    2146         421 :   const Value *Op0 = I->getOperand(0);
    2147             :   // Verify we have a legal type before going any further.  Currently, we handle
    2148             :   // simple types that will directly fit in a register (i32/f32/i64/f64) or
    2149             :   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
    2150         421 :   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
    2151             :     return false;
    2152             : 
    2153             :   const Value *PtrV = I->getOperand(1);
    2154         404 :   if (TLI.supportSwiftError()) {
    2155             :     // Swifterror values can come from either a function parameter with
    2156             :     // swifterror attribute or an alloca with swifterror attribute.
    2157             :     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
    2158         228 :       if (Arg->hasSwiftErrorAttr())
    2159             :         return false;
    2160             :     }
    2161             : 
    2162             :     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
    2163          98 :       if (Alloca->isSwiftError())
    2164             :         return false;
    2165             :     }
    2166             :   }
    2167             : 
    2168             :   // Get the value to be stored into a register. Use the zero register directly
    2169             :   // when possible to avoid an unnecessary copy and a wasted register.
    2170             :   unsigned SrcReg = 0;
    2171             :   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
    2172          55 :     if (CI->isZero())
    2173          36 :       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
    2174             :   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
    2175           2 :     if (CF->isZero() && !CF->isNegative()) {
    2176           2 :       VT = MVT::getIntegerVT(VT.getSizeInBits());
    2177           2 :       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
    2178             :     }
    2179             :   }
    2180             : 
    2181             :   if (!SrcReg)
    2182         364 :     SrcReg = getRegForValue(Op0);
    2183             : 
    2184         402 :   if (!SrcReg)
    2185             :     return false;
    2186             : 
    2187             :   auto *SI = cast<StoreInst>(I);
    2188             : 
    2189             :   // Try to emit a STLR for seq_cst/release.
    2190         396 :   if (SI->isAtomic()) {
    2191             :     AtomicOrdering Ord = SI->getOrdering();
    2192             :     // The non-atomic instructions are sufficient for relaxed stores.
    2193          24 :     if (isReleaseOrStronger(Ord)) {
    2194             :       // The STLR addressing mode only supports a base reg; pass that directly.
    2195          16 :       unsigned AddrReg = getRegForValue(PtrV);
    2196          16 :       return emitStoreRelease(VT, SrcReg, AddrReg,
    2197          16 :                               createMachineMemOperandFor(I));
    2198             :     }
    2199             :   }
    2200             : 
    2201             :   // See if we can handle this address.
    2202             :   Address Addr;
    2203         380 :   if (!computeAddress(PtrV, Addr, Op0->getType()))
    2204             :     return false;
    2205             : 
    2206         372 :   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
    2207         152 :     return false;
    2208             :   return true;
    2209             : }
    2210             : 
    2211             : static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
    2212             :   switch (Pred) {
    2213             :   case CmpInst::FCMP_ONE:
    2214             :   case CmpInst::FCMP_UEQ:
    2215             :   default:
    2216             :     // AL is our "false" for now. The other two need more compares.
    2217             :     return AArch64CC::AL;
    2218             :   case CmpInst::ICMP_EQ:
    2219             :   case CmpInst::FCMP_OEQ:
    2220             :     return AArch64CC::EQ;
    2221             :   case CmpInst::ICMP_SGT:
    2222             :   case CmpInst::FCMP_OGT:
    2223             :     return AArch64CC::GT;
    2224             :   case CmpInst::ICMP_SGE:
    2225             :   case CmpInst::FCMP_OGE:
    2226             :     return AArch64CC::GE;
    2227             :   case CmpInst::ICMP_UGT:
    2228             :   case CmpInst::FCMP_UGT:
    2229             :     return AArch64CC::HI;
    2230             :   case CmpInst::FCMP_OLT:
    2231             :     return AArch64CC::MI;
    2232             :   case CmpInst::ICMP_ULE:
    2233             :   case CmpInst::FCMP_OLE:
    2234             :     return AArch64CC::LS;
    2235             :   case CmpInst::FCMP_ORD:
    2236             :     return AArch64CC::VC;
    2237             :   case CmpInst::FCMP_UNO:
    2238             :     return AArch64CC::VS;
    2239             :   case CmpInst::FCMP_UGE:
    2240             :     return AArch64CC::PL;
    2241             :   case CmpInst::ICMP_SLT:
    2242             :   case CmpInst::FCMP_ULT:
    2243             :     return AArch64CC::LT;
    2244             :   case CmpInst::ICMP_SLE:
    2245             :   case CmpInst::FCMP_ULE:
    2246             :     return AArch64CC::LE;
    2247             :   case CmpInst::FCMP_UNE:
    2248             :   case CmpInst::ICMP_NE:
    2249             :     return AArch64CC::NE;
    2250             :   case CmpInst::ICMP_UGE:
    2251             :     return AArch64CC::HS;
    2252             :   case CmpInst::ICMP_ULT:
    2253             :     return AArch64CC::LO;
    2254             :   }
    2255             : }
    2256             : 
    2257             : /// Try to emit a combined compare-and-branch instruction.
    2258          88 : bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
    2259             :   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
    2260             :   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
    2261          88 :   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
    2262             : 
    2263             :   const Value *LHS = CI->getOperand(0);
    2264             :   const Value *RHS = CI->getOperand(1);
    2265             : 
    2266          88 :   MVT VT;
    2267          88 :   if (!isTypeSupported(LHS->getType(), VT))
    2268             :     return false;
    2269             : 
    2270          88 :   unsigned BW = VT.getSizeInBits();
    2271          88 :   if (BW > 64)
    2272             :     return false;
    2273             : 
    2274          88 :   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
    2275          88 :   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
    2276             : 
    2277             :   // Try to take advantage of fallthrough opportunities.
    2278          88 :   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
    2279             :     std::swap(TBB, FBB);
    2280          49 :     Predicate = CmpInst::getInversePredicate(Predicate);
    2281             :   }
    2282             : 
    2283             :   int TestBit = -1;
    2284             :   bool IsCmpNE;
    2285             :   switch (Predicate) {
    2286             :   default:
    2287             :     return false;
    2288          44 :   case CmpInst::ICMP_EQ:
    2289             :   case CmpInst::ICMP_NE:
    2290          44 :     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
    2291             :       std::swap(LHS, RHS);
    2292             : 
    2293          44 :     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
    2294           3 :       return false;
    2295             : 
    2296             :     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
    2297          13 :       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
    2298             :         const Value *AndLHS = AI->getOperand(0);
    2299             :         const Value *AndRHS = AI->getOperand(1);
    2300             : 
    2301             :         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
    2302           0 :           if (C->getValue().isPowerOf2())
    2303             :             std::swap(AndLHS, AndRHS);
    2304             : 
    2305             :         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
    2306          12 :           if (C->getValue().isPowerOf2()) {
    2307          12 :             TestBit = C->getValue().logBase2();
    2308             :             LHS = AndLHS;
    2309             :           }
    2310             :       }
    2311             : 
    2312          41 :     if (VT == MVT::i1)
    2313             :       TestBit = 0;
    2314             : 
    2315          41 :     IsCmpNE = Predicate == CmpInst::ICMP_NE;
    2316          41 :     break;
    2317          13 :   case CmpInst::ICMP_SLT:
    2318             :   case CmpInst::ICMP_SGE:
    2319          13 :     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
    2320           7 :       return false;
    2321             : 
    2322           6 :     TestBit = BW - 1;
    2323           6 :     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
    2324           6 :     break;
    2325          10 :   case CmpInst::ICMP_SGT:
    2326             :   case CmpInst::ICMP_SLE:
    2327          10 :     if (!isa<ConstantInt>(RHS))
    2328             :       return false;
    2329             : 
    2330           8 :     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
    2331             :       return false;
    2332             : 
    2333           8 :     TestBit = BW - 1;
    2334           8 :     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
    2335           8 :     break;
    2336             :   } // end switch
    2337             : 
    2338             :   static const unsigned OpcTable[2][2][2] = {
    2339             :     { {AArch64::CBZW,  AArch64::CBZX },
    2340             :       {AArch64::CBNZW, AArch64::CBNZX} },
    2341             :     { {AArch64::TBZW,  AArch64::TBZX },
    2342             :       {AArch64::TBNZW, AArch64::TBNZX} }
    2343             :   };
    2344             : 
    2345          55 :   bool IsBitTest = TestBit != -1;
    2346          55 :   bool Is64Bit = BW == 64;
    2347          55 :   if (TestBit < 32 && TestBit >= 0)
    2348             :     Is64Bit = false;
    2349             : 
    2350          55 :   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
    2351          55 :   const MCInstrDesc &II = TII.get(Opc);
    2352             : 
    2353          55 :   unsigned SrcReg = getRegForValue(LHS);
    2354          55 :   if (!SrcReg)
    2355             :     return false;
    2356          55 :   bool SrcIsKill = hasTrivialKill(LHS);
    2357             : 
    2358          55 :   if (BW == 64 && !Is64Bit)
    2359           4 :     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
    2360             :                                         AArch64::sub_32);
    2361             : 
    2362          55 :   if ((BW < 32) && !IsBitTest)
    2363           8 :     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
    2364             : 
    2365             :   // Emit the combined compare and branch instruction.
    2366         110 :   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
    2367             :   MachineInstrBuilder MIB =
    2368         110 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
    2369          55 :           .addReg(SrcReg, getKillRegState(SrcIsKill));
    2370          55 :   if (IsBitTest)
    2371          27 :     MIB.addImm(TestBit);
    2372             :   MIB.addMBB(TBB);
    2373             : 
    2374          55 :   finishCondBranch(BI->getParent(), TBB, FBB);
    2375          55 :   return true;
    2376             : }
    2377             : 
    2378         272 : bool AArch64FastISel::selectBranch(const Instruction *I) {
    2379             :   const BranchInst *BI = cast<BranchInst>(I);
    2380         272 :   if (BI->isUnconditional()) {
    2381         150 :     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
    2382         300 :     fastEmitBranch(MSucc, BI->getDebugLoc());
    2383         150 :     return true;
    2384             :   }
    2385             : 
    2386         122 :   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
    2387         122 :   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
    2388             : 
    2389             :   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
    2390          89 :     if (CI->hasOneUse() && isValueAvailable(CI)) {
    2391             :       // Try to optimize or fold the cmp.
    2392          88 :       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
    2393          88 :       switch (Predicate) {
    2394             :       default:
    2395             :         break;
    2396           0 :       case CmpInst::FCMP_FALSE:
    2397           0 :         fastEmitBranch(FBB, DbgLoc);
    2398           0 :         return true;
    2399           0 :       case CmpInst::FCMP_TRUE:
    2400           0 :         fastEmitBranch(TBB, DbgLoc);
    2401           0 :         return true;
    2402             :       }
    2403             : 
    2404             :       // Try to emit a combined compare-and-branch first.
    2405          88 :       if (emitCompareAndBranch(BI))
    2406             :         return true;
    2407             : 
    2408             :       // Try to take advantage of fallthrough opportunities.
    2409          33 :       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
    2410             :         std::swap(TBB, FBB);
    2411           6 :         Predicate = CmpInst::getInversePredicate(Predicate);
    2412             :       }
    2413             : 
    2414             :       // Emit the cmp.
    2415          66 :       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
    2416             :         return false;
    2417             : 
    2418             :       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
    2419             :       // instruction.
    2420             :       AArch64CC::CondCode CC = getCompareCC(Predicate);
    2421             :       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
    2422          33 :       switch (Predicate) {
    2423             :       default:
    2424             :         break;
    2425             :       case CmpInst::FCMP_UEQ:
    2426             :         ExtraCC = AArch64CC::EQ;
    2427             :         CC = AArch64CC::VS;
    2428             :         break;
    2429             :       case CmpInst::FCMP_ONE:
    2430             :         ExtraCC = AArch64CC::MI;
    2431             :         CC = AArch64CC::GT;
    2432             :         break;
    2433             :       }
    2434             :       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
    2435             : 
    2436             :       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
    2437             :       if (ExtraCC != AArch64CC::AL) {
    2438           4 :         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
    2439           2 :             .addImm(ExtraCC)
    2440             :             .addMBB(TBB);
    2441             :       }
    2442             : 
    2443             :       // Emit the branch.
    2444          66 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
    2445          33 :           .addImm(CC)
    2446             :           .addMBB(TBB);
    2447             : 
    2448          33 :       finishCondBranch(BI->getParent(), TBB, FBB);
    2449          33 :       return true;
    2450             :     }
    2451             :   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
    2452             :     uint64_t Imm = CI->getZExtValue();
    2453           2 :     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
    2454           4 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
    2455             :         .addMBB(Target);
    2456             : 
    2457             :     // Obtain the branch probability and add the target to the successor list.
    2458           2 :     if (FuncInfo.BPI) {
    2459             :       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
    2460           0 :           BI->getParent(), Target->getBasicBlock());
    2461           0 :       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
    2462             :     } else
    2463           2 :       FuncInfo.MBB->addSuccessorWithoutProb(Target);
    2464           2 :     return true;
    2465             :   } else {
    2466          31 :     AArch64CC::CondCode CC = AArch64CC::NE;
    2467          31 :     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
    2468             :       // Fake request the condition, otherwise the intrinsic might be completely
    2469             :       // optimized away.
    2470          28 :       unsigned CondReg = getRegForValue(BI->getCondition());
    2471          14 :       if (!CondReg)
    2472          14 :         return false;
    2473             : 
    2474             :       // Emit the branch.
    2475          28 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
    2476          14 :         .addImm(CC)
    2477             :         .addMBB(TBB);
    2478             : 
    2479          14 :       finishCondBranch(BI->getParent(), TBB, FBB);
    2480          14 :       return true;
    2481             :     }
    2482             :   }
    2483             : 
    2484          36 :   unsigned CondReg = getRegForValue(BI->getCondition());
    2485          18 :   if (CondReg == 0)
    2486             :     return false;
    2487          18 :   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
    2488             : 
    2489             :   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
    2490             :   unsigned Opcode = AArch64::TBNZW;
    2491          18 :   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
    2492             :     std::swap(TBB, FBB);
    2493             :     Opcode = AArch64::TBZW;
    2494             :   }
    2495             : 
    2496          18 :   const MCInstrDesc &II = TII.get(Opcode);
    2497             :   unsigned ConstrainedCondReg
    2498          36 :     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
    2499          18 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    2500          18 :       .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
    2501             :       .addImm(0)
    2502             :       .addMBB(TBB);
    2503             : 
    2504          18 :   finishCondBranch(BI->getParent(), TBB, FBB);
    2505          18 :   return true;
    2506             : }
    2507             : 
    2508           0 : bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
    2509             :   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
    2510           0 :   unsigned AddrReg = getRegForValue(BI->getOperand(0));
    2511           0 :   if (AddrReg == 0)
    2512             :     return false;
    2513             : 
    2514             :   // Emit the indirect branch.
    2515           0 :   const MCInstrDesc &II = TII.get(AArch64::BR);
    2516           0 :   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
    2517           0 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
    2518             : 
    2519             :   // Make sure the CFG is up-to-date.
    2520           0 :   for (auto *Succ : BI->successors())
    2521           0 :     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
    2522             : 
    2523           0 :   return true;
    2524             : }
    2525             : 
    2526          57 : bool AArch64FastISel::selectCmp(const Instruction *I) {
    2527             :   const CmpInst *CI = cast<CmpInst>(I);
    2528             : 
    2529             :   // Vectors of i1 are weird: bail out.
    2530         114 :   if (CI->getType()->isVectorTy())
    2531             :     return false;
    2532             : 
    2533             :   // Try to optimize or fold the cmp.
    2534          51 :   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
    2535             :   unsigned ResultReg = 0;
    2536          51 :   switch (Predicate) {
    2537             :   default:
    2538             :     break;
    2539           1 :   case CmpInst::FCMP_FALSE:
    2540           1 :     ResultReg = createResultReg(&AArch64::GPR32RegClass);
    2541           2 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    2542           2 :             TII.get(TargetOpcode::COPY), ResultReg)
    2543           1 :         .addReg(AArch64::WZR, getKillRegState(true));
    2544           1 :     break;
    2545             :   case CmpInst::FCMP_TRUE:
    2546           1 :     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
    2547           1 :     break;
    2548             :   }
    2549             : 
    2550           2 :   if (ResultReg) {
    2551           2 :     updateValueMap(I, ResultReg);
    2552           2 :     return true;
    2553             :   }
    2554             : 
    2555             :   // Emit the cmp.
    2556          98 :   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
    2557             :     return false;
    2558             : 
    2559          49 :   ResultReg = createResultReg(&AArch64::GPR32RegClass);
    2560             : 
    2561             :   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
    2562             :   // condition codes are inverted, because they are used by CSINC.
    2563             :   static unsigned CondCodeTable[2][2] = {
    2564             :     { AArch64CC::NE, AArch64CC::VC },
    2565             :     { AArch64CC::PL, AArch64CC::LE }
    2566             :   };
    2567             :   unsigned *CondCodes = nullptr;
    2568          49 :   switch (Predicate) {
    2569             :   default:
    2570             :     break;
    2571             :   case CmpInst::FCMP_UEQ:
    2572             :     CondCodes = &CondCodeTable[0][0];
    2573             :     break;
    2574           1 :   case CmpInst::FCMP_ONE:
    2575             :     CondCodes = &CondCodeTable[1][0];
    2576           1 :     break;
    2577             :   }
    2578             : 
    2579             :   if (CondCodes) {
    2580           2 :     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
    2581           6 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
    2582           4 :             TmpReg1)
    2583           2 :         .addReg(AArch64::WZR, getKillRegState(true))
    2584           2 :         .addReg(AArch64::WZR, getKillRegState(true))
    2585           2 :         .addImm(CondCodes[0]);
    2586           6 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
    2587           4 :             ResultReg)
    2588           2 :         .addReg(TmpReg1, getKillRegState(true))
    2589           2 :         .addReg(AArch64::WZR, getKillRegState(true))
    2590           2 :         .addImm(CondCodes[1]);
    2591             : 
    2592           2 :     updateValueMap(I, ResultReg);
    2593           2 :     return true;
    2594             :   }
    2595             : 
    2596             :   // Now set a register based on the comparison.
    2597             :   AArch64CC::CondCode CC = getCompareCC(Predicate);
    2598             :   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
    2599             :   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
    2600         141 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
    2601          94 :           ResultReg)
    2602          47 :       .addReg(AArch64::WZR, getKillRegState(true))
    2603          47 :       .addReg(AArch64::WZR, getKillRegState(true))
    2604          47 :       .addImm(invertedCC);
    2605             : 
    2606          47 :   updateValueMap(I, ResultReg);
    2607          47 :   return true;
    2608             : }
    2609             : 
    2610             : /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
    2611             : /// value.
    2612          53 : bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
    2613          53 :   if (!SI->getType()->isIntegerTy(1))
    2614             :     return false;
    2615             : 
    2616             :   const Value *Src1Val, *Src2Val;
    2617             :   unsigned Opc = 0;
    2618             :   bool NeedExtraOp = false;
    2619             :   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
    2620           3 :     if (CI->isOne()) {
    2621             :       Src1Val = SI->getCondition();
    2622             :       Src2Val = SI->getFalseValue();
    2623             :       Opc = AArch64::ORRWrr;
    2624             :     } else {
    2625             :       assert(CI->isZero());
    2626             :       Src1Val = SI->getFalseValue();
    2627             :       Src2Val = SI->getCondition();
    2628             :       Opc = AArch64::BICWrr;
    2629             :     }
    2630             :   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
    2631           2 :     if (CI->isOne()) {
    2632             :       Src1Val = SI->getCondition();
    2633             :       Src2Val = SI->getTrueValue();
    2634             :       Opc = AArch64::ORRWrr;
    2635             :       NeedExtraOp = true;
    2636             :     } else {
    2637             :       assert(CI->isZero());
    2638             :       Src1Val = SI->getCondition();
    2639             :       Src2Val = SI->getTrueValue();
    2640             :       Opc = AArch64::ANDWrr;
    2641             :     }
    2642             :   }
    2643             : 
    2644           6 :   if (!Opc)
    2645             :     return false;
    2646             : 
    2647           5 :   unsigned Src1Reg = getRegForValue(Src1Val);
    2648           5 :   if (!Src1Reg)
    2649             :     return false;
    2650           5 :   bool Src1IsKill = hasTrivialKill(Src1Val);
    2651             : 
    2652           5 :   unsigned Src2Reg = getRegForValue(Src2Val);
    2653           5 :   if (!Src2Reg)
    2654             :     return false;
    2655           5 :   bool Src2IsKill = hasTrivialKill(Src2Val);
    2656             : 
    2657           5 :   if (NeedExtraOp) {
    2658           2 :     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
    2659             :     Src1IsKill = true;
    2660             :   }
    2661           5 :   unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
    2662             :                                        Src1IsKill, Src2Reg, Src2IsKill);
    2663           5 :   updateValueMap(SI, ResultReg);
    2664           5 :   return true;
    2665             : }
    2666             : 
    2667          53 : bool AArch64FastISel::selectSelect(const Instruction *I) {
    2668             :   assert(isa<SelectInst>(I) && "Expected a select instruction.");
    2669          53 :   MVT VT;
    2670          53 :   if (!isTypeSupported(I->getType(), VT))
    2671             :     return false;
    2672             : 
    2673             :   unsigned Opc;
    2674             :   const TargetRegisterClass *RC;
    2675          53 :   switch (VT.SimpleTy) {
    2676             :   default:
    2677             :     return false;
    2678             :   case MVT::i1:
    2679             :   case MVT::i8:
    2680             :   case MVT::i16:
    2681             :   case MVT::i32:
    2682             :     Opc = AArch64::CSELWr;
    2683             :     RC = &AArch64::GPR32RegClass;
    2684             :     break;
    2685           8 :   case MVT::i64:
    2686             :     Opc = AArch64::CSELXr;
    2687             :     RC = &AArch64::GPR64RegClass;
    2688           8 :     break;
    2689          26 :   case MVT::f32:
    2690             :     Opc = AArch64::FCSELSrrr;
    2691             :     RC = &AArch64::FPR32RegClass;
    2692          26 :     break;
    2693           1 :   case MVT::f64:
    2694             :     Opc = AArch64::FCSELDrrr;
    2695             :     RC = &AArch64::FPR64RegClass;
    2696           1 :     break;
    2697             :   }
    2698             : 
    2699             :   const SelectInst *SI = cast<SelectInst>(I);
    2700             :   const Value *Cond = SI->getCondition();
    2701          53 :   AArch64CC::CondCode CC = AArch64CC::NE;
    2702             :   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
    2703             : 
    2704          53 :   if (optimizeSelect(SI))
    2705             :     return true;
    2706             : 
    2707             :   // Try to pickup the flags, so we don't have to emit another compare.
    2708          48 :   if (foldXALUIntrinsic(CC, I, Cond)) {
    2709             :     // Fake request the condition to force emission of the XALU intrinsic.
    2710          12 :     unsigned CondReg = getRegForValue(Cond);
    2711          12 :     if (!CondReg)
    2712             :       return false;
    2713          55 :   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
    2714          27 :              isValueAvailable(Cond)) {
    2715             :     const auto *Cmp = cast<CmpInst>(Cond);
    2716             :     // Try to optimize or fold the cmp.
    2717          27 :     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
    2718             :     const Value *FoldSelect = nullptr;
    2719          27 :     switch (Predicate) {
    2720             :     default:
    2721             :       break;
    2722             :     case CmpInst::FCMP_FALSE:
    2723             :       FoldSelect = SI->getFalseValue();
    2724           1 :       break;
    2725             :     case CmpInst::FCMP_TRUE:
    2726             :       FoldSelect = SI->getTrueValue();
    2727           1 :       break;
    2728             :     }
    2729             : 
    2730           2 :     if (FoldSelect) {
    2731           2 :       unsigned SrcReg = getRegForValue(FoldSelect);
    2732           2 :       if (!SrcReg)
    2733             :         return false;
    2734           2 :       unsigned UseReg = lookUpRegForValue(SI);
    2735           2 :       if (UseReg)
    2736           2 :         MRI.clearKillFlags(UseReg);
    2737             : 
    2738           2 :       updateValueMap(I, SrcReg);
    2739           2 :       return true;
    2740             :     }
    2741             : 
    2742             :     // Emit the cmp.
    2743          50 :     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
    2744             :       return false;
    2745             : 
    2746             :     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
    2747          25 :     CC = getCompareCC(Predicate);
    2748          25 :     switch (Predicate) {
    2749             :     default:
    2750             :       break;
    2751           1 :     case CmpInst::FCMP_UEQ:
    2752             :       ExtraCC = AArch64CC::EQ;
    2753           1 :       CC = AArch64CC::VS;
    2754           1 :       break;
    2755           1 :     case CmpInst::FCMP_ONE:
    2756             :       ExtraCC = AArch64CC::MI;
    2757           1 :       CC = AArch64CC::GT;
    2758           1 :       break;
    2759             :     }
    2760             :     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
    2761             :   } else {
    2762           9 :     unsigned CondReg = getRegForValue(Cond);
    2763           9 :     if (!CondReg)
    2764             :       return false;
    2765           9 :     bool CondIsKill = hasTrivialKill(Cond);
    2766             : 
    2767           9 :     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
    2768           9 :     CondReg = constrainOperandRegClass(II, CondReg, 1);
    2769             : 
    2770             :     // Emit a TST instruction (ANDS wzr, reg, #imm).
    2771           9 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
    2772           9 :             AArch64::WZR)
    2773           9 :         .addReg(CondReg, getKillRegState(CondIsKill))
    2774           9 :         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
    2775             :   }
    2776             : 
    2777          92 :   unsigned Src1Reg = getRegForValue(SI->getTrueValue());
    2778          46 :   bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
    2779             : 
    2780          46 :   unsigned Src2Reg = getRegForValue(SI->getFalseValue());
    2781          46 :   bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
    2782             : 
    2783          46 :   if (!Src1Reg || !Src2Reg)
    2784             :     return false;
    2785             : 
    2786          46 :   if (ExtraCC != AArch64CC::AL) {
    2787           2 :     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
    2788             :                                Src2IsKill, ExtraCC);
    2789             :     Src2IsKill = true;
    2790             :   }
    2791          46 :   unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
    2792             :                                         Src2IsKill, CC);
    2793          46 :   updateValueMap(I, ResultReg);
    2794          46 :   return true;
    2795             : }
    2796             : 
    2797           5 : bool AArch64FastISel::selectFPExt(const Instruction *I) {
    2798           5 :   Value *V = I->getOperand(0);
    2799          10 :   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
    2800             :     return false;
    2801             : 
    2802           3 :   unsigned Op = getRegForValue(V);
    2803           3 :   if (Op == 0)
    2804             :     return false;
    2805             : 
    2806           3 :   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
    2807           9 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
    2808           6 :           ResultReg).addReg(Op);
    2809           3 :   updateValueMap(I, ResultReg);
    2810           3 :   return true;
    2811             : }
    2812             : 
    2813           2 : bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
    2814           2 :   Value *V = I->getOperand(0);
    2815           4 :   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
    2816             :     return false;
    2817             : 
    2818           1 :   unsigned Op = getRegForValue(V);
    2819           1 :   if (Op == 0)
    2820             :     return false;
    2821             : 
    2822           1 :   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
    2823           3 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
    2824           2 :           ResultReg).addReg(Op);
    2825           1 :   updateValueMap(I, ResultReg);
    2826           1 :   return true;
    2827             : }
    2828             : 
    2829             : // FPToUI and FPToSI
    2830           7 : bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
    2831           7 :   MVT DestVT;
    2832           7 :   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
    2833             :     return false;
    2834             : 
    2835          12 :   unsigned SrcReg = getRegForValue(I->getOperand(0));
    2836           6 :   if (SrcReg == 0)
    2837             :     return false;
    2838             : 
    2839          12 :   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
    2840             :   if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
    2841           4 :     return false;
    2842             : 
    2843             :   unsigned Opc;
    2844             :   if (SrcVT == MVT::f64) {
    2845           1 :     if (Signed)
    2846           0 :       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
    2847             :     else
    2848           1 :       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
    2849             :   } else {
    2850           1 :     if (Signed)
    2851           0 :       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
    2852             :     else
    2853           1 :       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
    2854             :   }
    2855           2 :   unsigned ResultReg = createResultReg(
    2856           2 :       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
    2857           4 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
    2858           2 :       .addReg(SrcReg);
    2859           2 :   updateValueMap(I, ResultReg);
    2860           2 :   return true;
    2861             : }
    2862             : 
    2863          22 : bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
    2864          22 :   MVT DestVT;
    2865          22 :   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
    2866             :     return false;
    2867             :   // Let regular ISEL handle FP16
    2868          20 :   if (DestVT == MVT::f16)
    2869             :     return false;
    2870             : 
    2871             :   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
    2872             :          "Unexpected value type.");
    2873             : 
    2874          20 :   unsigned SrcReg = getRegForValue(I->getOperand(0));
    2875          10 :   if (!SrcReg)
    2876             :     return false;
    2877          10 :   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
    2878             : 
    2879          20 :   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
    2880             : 
    2881             :   // Handle sign-extension.
    2882             :   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
    2883             :     SrcReg =
    2884          12 :         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
    2885           6 :     if (!SrcReg)
    2886             :       return false;
    2887             :     SrcIsKill = true;
    2888             :   }
    2889             : 
    2890             :   unsigned Opc;
    2891             :   if (SrcVT == MVT::i64) {
    2892           2 :     if (Signed)
    2893           0 :       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
    2894             :     else
    2895           2 :       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
    2896             :   } else {
    2897           8 :     if (Signed)
    2898           3 :       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
    2899             :     else
    2900           5 :       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
    2901             :   }
    2902             : 
    2903          10 :   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
    2904             :                                       SrcIsKill);
    2905          10 :   updateValueMap(I, ResultReg);
    2906          10 :   return true;
    2907             : }
    2908             : 
    2909        1222 : bool AArch64FastISel::fastLowerArguments() {
    2910        1222 :   if (!FuncInfo.CanLowerReturn)
    2911             :     return false;
    2912             : 
    2913        1222 :   const Function *F = FuncInfo.Fn;
    2914        1222 :   if (F->isVarArg())
    2915             :     return false;
    2916             : 
    2917             :   CallingConv::ID CC = F->getCallingConv();
    2918        1221 :   if (CC != CallingConv::C && CC != CallingConv::Swift)
    2919             :     return false;
    2920             : 
    2921        2426 :   if (Subtarget->hasCustomCallingConv())
    2922             :     return false;
    2923             : 
    2924             :   // Only handle simple cases of up to 8 GPR and FPR each.
    2925             :   unsigned GPRCnt = 0;
    2926             :   unsigned FPRCnt = 0;
    2927        3119 :   for (auto const &Arg : F->args()) {
    2928        4028 :     if (Arg.hasAttribute(Attribute::ByVal) ||
    2929        4028 :         Arg.hasAttribute(Attribute::InReg) ||
    2930        4026 :         Arg.hasAttribute(Attribute::StructRet) ||
    2931        4017 :         Arg.hasAttribute(Attribute::SwiftSelf) ||
    2932        6017 :         Arg.hasAttribute(Attribute::SwiftError) ||
    2933        1998 :         Arg.hasAttribute(Attribute::Nest))
    2934         107 :       return false;
    2935             : 
    2936        1998 :     Type *ArgTy = Arg.getType();
    2937        1998 :     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
    2938             :       return false;
    2939             : 
    2940        1994 :     EVT ArgVT = TLI.getValueType(DL, ArgTy);
    2941        1994 :     if (!ArgVT.isSimple())
    2942             :       return false;
    2943             : 
    2944             :     MVT VT = ArgVT.getSimpleVT().SimpleTy;
    2945        1994 :     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
    2946             :       return false;
    2947             : 
    2948        1994 :     if (VT.isVector() &&
    2949         117 :         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
    2950             :       return false;
    2951             : 
    2952        1923 :     if (VT >= MVT::i1 && VT <= MVT::i64)
    2953        1608 :       ++GPRCnt;
    2954         315 :     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
    2955          37 :              VT.is128BitVector())
    2956         305 :       ++FPRCnt;
    2957             :     else
    2958             :       return false;
    2959             : 
    2960        1913 :     if (GPRCnt > 8 || FPRCnt > 8)
    2961             :       return false;
    2962             :   }
    2963             : 
    2964             :   static const MCPhysReg Registers[6][8] = {
    2965             :     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
    2966             :       AArch64::W5, AArch64::W6, AArch64::W7 },
    2967             :     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
    2968             :       AArch64::X5, AArch64::X6, AArch64::X7 },
    2969             :     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
    2970             :       AArch64::H5, AArch64::H6, AArch64::H7 },
    2971             :     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
    2972             :       AArch64::S5, AArch64::S6, AArch64::S7 },
    2973             :     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
    2974             :       AArch64::D5, AArch64::D6, AArch64::D7 },
    2975             :     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
    2976             :       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
    2977             :   };
    2978             : 
    2979             :   unsigned GPRIdx = 0;
    2980             :   unsigned FPRIdx = 0;
    2981        2943 :   for (auto const &Arg : F->args()) {
    2982        1838 :     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
    2983             :     unsigned SrcReg;
    2984             :     const TargetRegisterClass *RC;
    2985        1838 :     if (VT >= MVT::i1 && VT <= MVT::i32) {
    2986         646 :       SrcReg = Registers[0][GPRIdx++];
    2987             :       RC = &AArch64::GPR32RegClass;
    2988             :       VT = MVT::i32;
    2989        1192 :     } else if (VT == MVT::i64) {
    2990         896 :       SrcReg = Registers[1][GPRIdx++];
    2991             :       RC = &AArch64::GPR64RegClass;
    2992         296 :     } else if (VT == MVT::f16) {
    2993           2 :       SrcReg = Registers[2][FPRIdx++];
    2994             :       RC = &AArch64::FPR16RegClass;
    2995         294 :     } else if (VT ==  MVT::f32) {
    2996         188 :       SrcReg = Registers[3][FPRIdx++];
    2997             :       RC = &AArch64::FPR32RegClass;
    2998         106 :     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
    2999          79 :       SrcReg = Registers[4][FPRIdx++];
    3000             :       RC = &AArch64::FPR64RegClass;
    3001          27 :     } else if (VT.is128BitVector()) {
    3002          27 :       SrcReg = Registers[5][FPRIdx++];
    3003             :       RC = &AArch64::FPR128RegClass;
    3004             :     } else
    3005           0 :       llvm_unreachable("Unexpected value type.");
    3006             : 
    3007        1838 :     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
    3008             :     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
    3009             :     // Without this, EmitLiveInCopies may eliminate the livein if its only
    3010             :     // use is a bitcast (which isn't turned into an instruction).
    3011        1838 :     unsigned ResultReg = createResultReg(RC);
    3012        3676 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3013        3676 :             TII.get(TargetOpcode::COPY), ResultReg)
    3014        1838 :         .addReg(DstReg, getKillRegState(true));
    3015        1838 :     updateValueMap(&Arg, ResultReg);
    3016             :   }
    3017             :   return true;
    3018             : }
    3019             : 
    3020         119 : bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
    3021             :                                       SmallVectorImpl<MVT> &OutVTs,
    3022             :                                       unsigned &NumBytes) {
    3023         119 :   CallingConv::ID CC = CLI.CallConv;
    3024             :   SmallVector<CCValAssign, 16> ArgLocs;
    3025         238 :   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
    3026         119 :   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
    3027             : 
    3028             :   // Get a count of how many bytes are to be pushed on the stack.
    3029         119 :   NumBytes = CCInfo.getNextStackOffset();
    3030             : 
    3031             :   // Issue CALLSEQ_START
    3032         119 :   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
    3033         238 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
    3034         119 :     .addImm(NumBytes).addImm(0);
    3035             : 
    3036             :   // Process the args.
    3037        1430 :   for (CCValAssign &VA : ArgLocs) {
    3038        2630 :     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
    3039        1315 :     MVT ArgVT = OutVTs[VA.getValNo()];
    3040             : 
    3041        1315 :     unsigned ArgReg = getRegForValue(ArgVal);
    3042        1315 :     if (!ArgReg)
    3043           4 :       return false;
    3044             : 
    3045             :     // Handle arg promotion: SExt, ZExt, AExt.
    3046        1313 :     switch (VA.getLocInfo()) {
    3047             :     case CCValAssign::Full:
    3048             :       break;
    3049          15 :     case CCValAssign::SExt: {
    3050          15 :       MVT DestVT = VA.getLocVT();
    3051          15 :       MVT SrcVT = ArgVT;
    3052          15 :       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
    3053          15 :       if (!ArgReg)
    3054           0 :         return false;
    3055          15 :       break;
    3056             :     }
    3057          84 :     case CCValAssign::AExt:
    3058             :     // Intentional fall-through.
    3059             :     case CCValAssign::ZExt: {
    3060          84 :       MVT DestVT = VA.getLocVT();
    3061          84 :       MVT SrcVT = ArgVT;
    3062          84 :       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
    3063          84 :       if (!ArgReg)
    3064           0 :         return false;
    3065          84 :       break;
    3066             :     }
    3067           0 :     default:
    3068           0 :       llvm_unreachable("Unknown arg promotion!");
    3069             :     }
    3070             : 
    3071             :     // Now copy/store arg to correct locations.
    3072        1313 :     if (VA.isRegLoc() && !VA.needsCustom()) {
    3073         498 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3074         498 :               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
    3075         249 :       CLI.OutRegs.push_back(VA.getLocReg());
    3076        1064 :     } else if (VA.needsCustom()) {
    3077             :       // FIXME: Handle custom args.
    3078             :       return false;
    3079             :     } else {
    3080             :       assert(VA.isMemLoc() && "Assuming store on stack.");
    3081             : 
    3082             :       // Don't emit stores for undef values.
    3083        1064 :       if (isa<UndefValue>(ArgVal))
    3084        1032 :         continue;
    3085             : 
    3086             :       // Need to store on the stack.
    3087          32 :       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
    3088             : 
    3089             :       unsigned BEAlign = 0;
    3090          32 :       if (ArgSize < 8 && !Subtarget->isLittleEndian())
    3091           2 :         BEAlign = 8 - ArgSize;
    3092             : 
    3093             :       Address Addr;
    3094             :       Addr.setKind(Address::RegBase);
    3095             :       Addr.setReg(AArch64::SP);
    3096          32 :       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
    3097             : 
    3098          32 :       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
    3099          32 :       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
    3100          32 :           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
    3101             :           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
    3102             : 
    3103          32 :       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
    3104           2 :         return false;
    3105             :     }
    3106             :   }
    3107             :   return true;
    3108             : }
    3109             : 
    3110         115 : bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
    3111             :                                  unsigned NumBytes) {
    3112         115 :   CallingConv::ID CC = CLI.CallConv;
    3113             : 
    3114             :   // Issue CALLSEQ_END
    3115         115 :   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
    3116         230 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
    3117         115 :     .addImm(NumBytes).addImm(0);
    3118             : 
    3119             :   // Now the return value.
    3120         115 :   if (RetVT != MVT::isVoid) {
    3121             :     SmallVector<CCValAssign, 16> RVLocs;
    3122          64 :     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
    3123          64 :     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
    3124             : 
    3125             :     // Only handle a single return value.
    3126          64 :     if (RVLocs.size() != 1)
    3127          10 :       return false;
    3128             : 
    3129             :     // Copy all of the result registers out of their specified physreg.
    3130             :     MVT CopyVT = RVLocs[0].getValVT();
    3131             : 
    3132             :     // TODO: Handle big-endian results
    3133          64 :     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
    3134             :       return false;
    3135             : 
    3136          54 :     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
    3137         108 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3138         108 :             TII.get(TargetOpcode::COPY), ResultReg)
    3139          54 :         .addReg(RVLocs[0].getLocReg());
    3140         108 :     CLI.InRegs.push_back(RVLocs[0].getLocReg());
    3141             : 
    3142          54 :     CLI.ResultReg = ResultReg;
    3143          54 :     CLI.NumResultRegs = 1;
    3144             :   }
    3145             : 
    3146             :   return true;
    3147             : }
    3148             : 
    3149         228 : bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
    3150         228 :   CallingConv::ID CC  = CLI.CallConv;
    3151         228 :   bool IsTailCall     = CLI.IsTailCall;
    3152         228 :   bool IsVarArg       = CLI.IsVarArg;
    3153         228 :   const Value *Callee = CLI.Callee;
    3154         228 :   MCSymbol *Symbol = CLI.Symbol;
    3155             : 
    3156         228 :   if (!Callee && !Symbol)
    3157             :     return false;
    3158             : 
    3159             :   // Allow SelectionDAG isel to handle tail calls.
    3160         228 :   if (IsTailCall)
    3161             :     return false;
    3162             : 
    3163         204 :   CodeModel::Model CM = TM.getCodeModel();
    3164             :   // Only support the small-addressing and large code models.
    3165         204 :   if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
    3166             :     return false;
    3167             : 
    3168             :   // FIXME: Add large code model support for ELF.
    3169         204 :   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
    3170             :     return false;
    3171             : 
    3172             :   // Let SDISel handle vararg functions.
    3173         204 :   if (IsVarArg)
    3174             :     return false;
    3175             : 
    3176             :   // FIXME: Only handle *simple* calls for now.
    3177         204 :   MVT RetVT;
    3178         408 :   if (CLI.RetTy->isVoidTy())
    3179          55 :     RetVT = MVT::isVoid;
    3180         149 :   else if (!isTypeLegal(CLI.RetTy, RetVT))
    3181             :     return false;
    3182             : 
    3183        1581 :   for (auto Flag : CLI.OutFlags)
    3184        1393 :     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
    3185        2786 :         Flag.isSwiftSelf() || Flag.isSwiftError())
    3186           5 :       return false;
    3187             : 
    3188             :   // Set up the argument vectors.
    3189             :   SmallVector<MVT, 16> OutVTs;
    3190         188 :   OutVTs.reserve(CLI.OutVals.size());
    3191             : 
    3192        1506 :   for (auto *Val : CLI.OutVals) {
    3193        1387 :     MVT VT;
    3194        1387 :     if (!isTypeLegal(Val->getType(), VT) &&
    3195         108 :         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
    3196          69 :       return false;
    3197             : 
    3198             :     // We don't handle vector parameters yet.
    3199        2756 :     if (VT.isVector() || VT.getSizeInBits() > 64)
    3200             :       return false;
    3201             : 
    3202        1318 :     OutVTs.push_back(VT);
    3203             :   }
    3204             : 
    3205             :   Address Addr;
    3206         119 :   if (Callee && !computeCallAddress(Callee, Addr))
    3207             :     return false;
    3208             : 
    3209             :   // Handle the arguments now that we've gotten them.
    3210             :   unsigned NumBytes;
    3211         119 :   if (!processCallArgs(CLI, OutVTs, NumBytes))
    3212             :     return false;
    3213             : 
    3214         115 :   const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
    3215         115 :   if (RegInfo->isAnyArgRegReserved(*MF))
    3216           2 :     RegInfo->emitReservedArgRegCallError(*MF);
    3217             : 
    3218             :   // Issue the call.
    3219         115 :   MachineInstrBuilder MIB;
    3220         115 :   if (Subtarget->useSmallAddressing()) {
    3221         180 :     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
    3222          99 :     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
    3223          99 :     if (Symbol)
    3224             :       MIB.addSym(Symbol, 0);
    3225          83 :     else if (Addr.getGlobalValue())
    3226             :       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
    3227          18 :     else if (Addr.getReg()) {
    3228          18 :       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
    3229          18 :       MIB.addReg(Reg);
    3230             :     } else
    3231             :       return false;
    3232             :   } else {
    3233             :     unsigned CallReg = 0;
    3234          16 :     if (Symbol) {
    3235           8 :       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
    3236          24 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
    3237          16 :               ADRPReg)
    3238             :           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
    3239             : 
    3240           8 :       CallReg = createResultReg(&AArch64::GPR64RegClass);
    3241           8 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3242          16 :               TII.get(AArch64::LDRXui), CallReg)
    3243           8 :           .addReg(ADRPReg)
    3244             :           .addSym(Symbol,
    3245             :                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
    3246           8 :     } else if (Addr.getGlobalValue())
    3247           7 :       CallReg = materializeGV(Addr.getGlobalValue());
    3248           1 :     else if (Addr.getReg())
    3249             :       CallReg = Addr.getReg();
    3250             : 
    3251          16 :     if (!CallReg)
    3252           0 :       return false;
    3253             : 
    3254          16 :     const MCInstrDesc &II = TII.get(AArch64::BLR);
    3255          16 :     CallReg = constrainOperandRegClass(II, CallReg, 0);
    3256          16 :     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
    3257             :   }
    3258             : 
    3259             :   // Add implicit physical register uses to the call.
    3260         347 :   for (auto Reg : CLI.OutRegs)
    3261         232 :     MIB.addReg(Reg, RegState::Implicit);
    3262             : 
    3263             :   // Add a register mask with the call-preserved registers.
    3264             :   // Proper defs for return values will be added by setPhysRegsDeadExcept().
    3265         115 :   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
    3266             : 
    3267         115 :   CLI.Call = MIB;
    3268             : 
    3269             :   // Finish off the call including any return values.
    3270         115 :   return finishCall(CLI, RetVT, NumBytes);
    3271             : }
    3272             : 
    3273           0 : bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
    3274           0 :   if (Alignment)
    3275          29 :     return Len / Alignment <= 4;
    3276             :   else
    3277           3 :     return Len < 32;
    3278             : }
    3279             : 
    3280          13 : bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
    3281             :                                          uint64_t Len, unsigned Alignment) {
    3282             :   // Make sure we don't bloat code by inlining very large memcpy's.
    3283          13 :   if (!isMemCpySmall(Len, Alignment))
    3284             :     return false;
    3285             : 
    3286             :   int64_t UnscaledOffset = 0;
    3287             :   Address OrigDest = Dest;
    3288             :   Address OrigSrc = Src;
    3289             : 
    3290          54 :   while (Len) {
    3291          41 :     MVT VT;
    3292          41 :     if (!Alignment || Alignment >= 8) {
    3293          30 :       if (Len >= 8)
    3294          27 :         VT = MVT::i64;
    3295           3 :       else if (Len >= 4)
    3296           0 :         VT = MVT::i32;
    3297           3 :       else if (Len >= 2)
    3298           0 :         VT = MVT::i16;
    3299             :       else {
    3300           3 :         VT = MVT::i8;
    3301             :       }
    3302             :     } else {
    3303             :       // Bound based on alignment.
    3304          11 :       if (Len >= 4 && Alignment == 4)
    3305           2 :         VT = MVT::i32;
    3306           9 :       else if (Len >= 2 && Alignment == 2)
    3307           3 :         VT = MVT::i16;
    3308             :       else {
    3309           6 :         VT = MVT::i8;
    3310             :       }
    3311             :     }
    3312             : 
    3313          41 :     unsigned ResultReg = emitLoad(VT, VT, Src);
    3314          41 :     if (!ResultReg)
    3315           0 :       return false;
    3316             : 
    3317          41 :     if (!emitStore(VT, ResultReg, Dest))
    3318             :       return false;
    3319             : 
    3320          41 :     int64_t Size = VT.getSizeInBits() / 8;
    3321          41 :     Len -= Size;
    3322          41 :     UnscaledOffset += Size;
    3323             : 
    3324             :     // We need to recompute the unscaled offset for each iteration.
    3325          41 :     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
    3326          41 :     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
    3327             :   }
    3328             : 
    3329             :   return true;
    3330             : }
    3331             : 
    3332             : /// Check if it is possible to fold the condition from the XALU intrinsic
    3333             : /// into the user. The condition code will only be updated on success.
    3334          79 : bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
    3335             :                                         const Instruction *I,
    3336             :                                         const Value *Cond) {
    3337             :   if (!isa<ExtractValueInst>(Cond))
    3338             :     return false;
    3339             : 
    3340             :   const auto *EV = cast<ExtractValueInst>(Cond);
    3341             :   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
    3342             :     return false;
    3343             : 
    3344             :   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
    3345          26 :   MVT RetVT;
    3346             :   const Function *Callee = II->getCalledFunction();
    3347             :   Type *RetTy =
    3348          26 :   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
    3349          26 :   if (!isTypeLegal(RetTy, RetVT))
    3350             :     return false;
    3351             : 
    3352          26 :   if (RetVT != MVT::i32 && RetVT != MVT::i64)
    3353             :     return false;
    3354             : 
    3355          26 :   const Value *LHS = II->getArgOperand(0);
    3356             :   const Value *RHS = II->getArgOperand(1);
    3357             : 
    3358             :   // Canonicalize immediate to the RHS.
    3359          26 :   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
    3360             :       isCommutativeIntrinsic(II))
    3361             :     std::swap(LHS, RHS);
    3362             : 
    3363             :   // Simplify multiplies.
    3364             :   Intrinsic::ID IID = II->getIntrinsicID();
    3365          26 :   switch (IID) {
    3366             :   default:
    3367             :     break;
    3368           5 :   case Intrinsic::smul_with_overflow:
    3369             :     if (const auto *C = dyn_cast<ConstantInt>(RHS))
    3370           1 :       if (C->getValue() == 2)
    3371             :         IID = Intrinsic::sadd_with_overflow;
    3372             :     break;
    3373           5 :   case Intrinsic::umul_with_overflow:
    3374             :     if (const auto *C = dyn_cast<ConstantInt>(RHS))
    3375           1 :       if (C->getValue() == 2)
    3376             :         IID = Intrinsic::uadd_with_overflow;
    3377             :     break;
    3378             :   }
    3379             : 
    3380             :   AArch64CC::CondCode TmpCC;
    3381          24 :   switch (IID) {
    3382             :   default:
    3383             :     return false;
    3384             :   case Intrinsic::sadd_with_overflow:
    3385             :   case Intrinsic::ssub_with_overflow:
    3386             :     TmpCC = AArch64CC::VS;
    3387             :     break;
    3388             :   case Intrinsic::uadd_with_overflow:
    3389             :     TmpCC = AArch64CC::HS;
    3390             :     break;
    3391           4 :   case Intrinsic::usub_with_overflow:
    3392             :     TmpCC = AArch64CC::LO;
    3393           4 :     break;
    3394           8 :   case Intrinsic::smul_with_overflow:
    3395             :   case Intrinsic::umul_with_overflow:
    3396             :     TmpCC = AArch64CC::NE;
    3397           8 :     break;
    3398             :   }
    3399             : 
    3400             :   // Check if both instructions are in the same basic block.
    3401          26 :   if (!isValueAvailable(II))
    3402             :     return false;
    3403             : 
    3404             :   // Make sure nothing is in the way
    3405             :   BasicBlock::const_iterator Start(I);
    3406             :   BasicBlock::const_iterator End(II);
    3407          66 :   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
    3408             :     // We only expect extractvalue instructions between the intrinsic and the
    3409             :     // instruction to be selected.
    3410          40 :     if (!isa<ExtractValueInst>(Itr))
    3411             :       return false;
    3412             : 
    3413             :     // Check that the extractvalue operand comes from the intrinsic.
    3414             :     const auto *EVI = cast<ExtractValueInst>(Itr);
    3415          40 :     if (EVI->getAggregateOperand() != II)
    3416             :       return false;
    3417             :   }
    3418             : 
    3419          26 :   CC = TmpCC;
    3420          26 :   return true;
    3421             : }
    3422             : 
    3423          93 : bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
    3424             :   // FIXME: Handle more intrinsics.
    3425          93 :   switch (II->getIntrinsicID()) {
    3426             :   default: return false;
    3427           2 :   case Intrinsic::frameaddress: {
    3428           2 :     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
    3429             :     MFI.setFrameAddressIsTaken(true);
    3430             : 
    3431           2 :     const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
    3432           2 :     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
    3433           4 :     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
    3434           4 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3435           4 :             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
    3436             :     // Recursively load frame address
    3437             :     // ldr x0, [fp]
    3438             :     // ldr x0, [x0]
    3439             :     // ldr x0, [x0]
    3440             :     // ...
    3441             :     unsigned DestReg;
    3442           4 :     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
    3443           4 :     while (Depth--) {
    3444           2 :       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
    3445             :                                 SrcReg, /*IsKill=*/true, 0);
    3446             :       assert(DestReg && "Unexpected LDR instruction emission failure.");
    3447             :       SrcReg = DestReg;
    3448             :     }
    3449             : 
    3450           2 :     updateValueMap(II, SrcReg);
    3451           2 :     return true;
    3452             :   }
    3453          20 :   case Intrinsic::memcpy:
    3454             :   case Intrinsic::memmove: {
    3455             :     const auto *MTI = cast<MemTransferInst>(II);
    3456             :     // Don't handle volatile.
    3457          20 :     if (MTI->isVolatile())
    3458             :       return false;
    3459             : 
    3460             :     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
    3461             :     // we would emit dead code because we don't currently handle memmoves.
    3462             :     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
    3463          20 :     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
    3464             :       // Small memcpy's are common enough that we want to do them without a call
    3465             :       // if possible.
    3466             :       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
    3467             :       unsigned Alignment = MinAlign(MTI->getDestAlignment(),
    3468          19 :                                     MTI->getSourceAlignment());
    3469          19 :       if (isMemCpySmall(Len, Alignment)) {
    3470             :         Address Dest, Src;
    3471          26 :         if (!computeAddress(MTI->getRawDest(), Dest) ||
    3472          13 :             !computeAddress(MTI->getRawSource(), Src))
    3473          13 :           return false;
    3474          13 :         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
    3475             :           return true;
    3476             :       }
    3477             :     }
    3478             : 
    3479           7 :     if (!MTI->getLength()->getType()->isIntegerTy(64))
    3480             :       return false;
    3481             : 
    3482          14 :     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
    3483             :       // Fast instruction selection doesn't support the special
    3484             :       // address spaces.
    3485             :       return false;
    3486             : 
    3487           7 :     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
    3488           7 :     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
    3489             :   }
    3490           1 :   case Intrinsic::memset: {
    3491             :     const MemSetInst *MSI = cast<MemSetInst>(II);
    3492             :     // Don't handle volatile.
    3493           1 :     if (MSI->isVolatile())
    3494             :       return false;
    3495             : 
    3496           1 :     if (!MSI->getLength()->getType()->isIntegerTy(64))
    3497             :       return false;
    3498             : 
    3499           1 :     if (MSI->getDestAddressSpace() > 255)
    3500             :       // Fast instruction selection doesn't support the special
    3501             :       // address spaces.
    3502             :       return false;
    3503             : 
    3504           1 :     return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
    3505             :   }
    3506          12 :   case Intrinsic::sin:
    3507             :   case Intrinsic::cos:
    3508             :   case Intrinsic::pow: {
    3509          12 :     MVT RetVT;
    3510          12 :     if (!isTypeLegal(II->getType(), RetVT))
    3511             :       return false;
    3512             : 
    3513          12 :     if (RetVT != MVT::f32 && RetVT != MVT::f64)
    3514             :       return false;
    3515             : 
    3516             :     static const RTLIB::Libcall LibCallTable[3][2] = {
    3517             :       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
    3518             :       { RTLIB::COS_F32, RTLIB::COS_F64 },
    3519             :       { RTLIB::POW_F32, RTLIB::POW_F64 }
    3520             :     };
    3521             :     RTLIB::Libcall LC;
    3522             :     bool Is64Bit = RetVT == MVT::f64;
    3523             :     switch (II->getIntrinsicID()) {
    3524           0 :     default:
    3525           0 :       llvm_unreachable("Unexpected intrinsic.");
    3526           4 :     case Intrinsic::sin:
    3527           4 :       LC = LibCallTable[0][Is64Bit];
    3528           4 :       break;
    3529           4 :     case Intrinsic::cos:
    3530           4 :       LC = LibCallTable[1][Is64Bit];
    3531           4 :       break;
    3532           4 :     case Intrinsic::pow:
    3533           4 :       LC = LibCallTable[2][Is64Bit];
    3534           4 :       break;
    3535             :     }
    3536             : 
    3537             :     ArgListTy Args;
    3538          12 :     Args.reserve(II->getNumArgOperands());
    3539             : 
    3540             :     // Populate the argument list.
    3541          28 :     for (auto &Arg : II->arg_operands()) {
    3542             :       ArgListEntry Entry;
    3543          16 :       Entry.Val = Arg;
    3544          16 :       Entry.Ty = Arg->getType();
    3545          16 :       Args.push_back(Entry);
    3546             :     }
    3547             : 
    3548          24 :     CallLoweringInfo CLI;
    3549          12 :     MCContext &Ctx = MF->getContext();
    3550             :     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
    3551          36 :                   TLI.getLibcallName(LC), std::move(Args));
    3552          12 :     if (!lowerCallTo(CLI))
    3553             :       return false;
    3554          12 :     updateValueMap(II, CLI.ResultReg);
    3555          12 :     return true;
    3556             :   }
    3557           2 :   case Intrinsic::fabs: {
    3558           2 :     MVT VT;
    3559           2 :     if (!isTypeLegal(II->getType(), VT))
    3560             :       return false;
    3561             : 
    3562             :     unsigned Opc;
    3563           2 :     switch (VT.SimpleTy) {
    3564             :     default:
    3565             :       return false;
    3566             :     case MVT::f32:
    3567             :       Opc = AArch64::FABSSr;
    3568             :       break;
    3569           1 :     case MVT::f64:
    3570             :       Opc = AArch64::FABSDr;
    3571           1 :       break;
    3572             :     }
    3573           2 :     unsigned SrcReg = getRegForValue(II->getOperand(0));
    3574           2 :     if (!SrcReg)
    3575             :       return false;
    3576           2 :     bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
    3577           2 :     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
    3578           4 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
    3579           2 :       .addReg(SrcReg, getKillRegState(SrcRegIsKill));
    3580           2 :     updateValueMap(II, ResultReg);
    3581           2 :     return true;
    3582             :   }
    3583           1 :   case Intrinsic::trap:
    3584           2 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
    3585             :         .addImm(1);
    3586           1 :     return true;
    3587             : 
    3588             :   case Intrinsic::sqrt: {
    3589             :     Type *RetTy = II->getCalledFunction()->getReturnType();
    3590             : 
    3591           2 :     MVT VT;
    3592           2 :     if (!isTypeLegal(RetTy, VT))
    3593             :       return false;
    3594             : 
    3595           2 :     unsigned Op0Reg = getRegForValue(II->getOperand(0));
    3596           2 :     if (!Op0Reg)
    3597             :       return false;
    3598           2 :     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
    3599             : 
    3600           2 :     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
    3601           2 :     if (!ResultReg)
    3602             :       return false;
    3603             : 
    3604           2 :     updateValueMap(II, ResultReg);
    3605           2 :     return true;
    3606             :   }
    3607             :   case Intrinsic::sadd_with_overflow:
    3608             :   case Intrinsic::uadd_with_overflow:
    3609             :   case Intrinsic::ssub_with_overflow:
    3610             :   case Intrinsic::usub_with_overflow:
    3611             :   case Intrinsic::smul_with_overflow:
    3612             :   case Intrinsic::umul_with_overflow: {
    3613             :     // This implements the basic lowering of the xalu with overflow intrinsics.
    3614             :     const Function *Callee = II->getCalledFunction();
    3615             :     auto *Ty = cast<StructType>(Callee->getReturnType());
    3616          48 :     Type *RetTy = Ty->getTypeAtIndex(0U);
    3617             : 
    3618          48 :     MVT VT;
    3619          48 :     if (!isTypeLegal(RetTy, VT))
    3620             :       return false;
    3621             : 
    3622          48 :     if (VT != MVT::i32 && VT != MVT::i64)
    3623             :       return false;
    3624             : 
    3625          48 :     const Value *LHS = II->getArgOperand(0);
    3626             :     const Value *RHS = II->getArgOperand(1);
    3627             :     // Canonicalize immediate to the RHS.
    3628          48 :     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
    3629             :         isCommutativeIntrinsic(II))
    3630             :       std::swap(LHS, RHS);
    3631             : 
    3632             :     // Simplify multiplies.
    3633             :     Intrinsic::ID IID = II->getIntrinsicID();
    3634             :     switch (IID) {
    3635             :     default:
    3636             :       break;
    3637           8 :     case Intrinsic::smul_with_overflow:
    3638             :       if (const auto *C = dyn_cast<ConstantInt>(RHS))
    3639           2 :         if (C->getValue() == 2) {
    3640             :           IID = Intrinsic::sadd_with_overflow;
    3641             :           RHS = LHS;
    3642             :         }
    3643             :       break;
    3644           9 :     case Intrinsic::umul_with_overflow:
    3645             :       if (const auto *C = dyn_cast<ConstantInt>(RHS))
    3646           3 :         if (C->getValue() == 2) {
    3647             :           IID = Intrinsic::uadd_with_overflow;
    3648             :           RHS = LHS;
    3649             :         }
    3650             :       break;
    3651             :     }
    3652             : 
    3653             :     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
    3654             :     AArch64CC::CondCode CC = AArch64CC::Invalid;
    3655             :     switch (IID) {
    3656           0 :     default: llvm_unreachable("Unexpected intrinsic!");
    3657          14 :     case Intrinsic::sadd_with_overflow:
    3658          14 :       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
    3659             :       CC = AArch64CC::VS;
    3660          14 :       break;
    3661           8 :     case Intrinsic::uadd_with_overflow:
    3662           8 :       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
    3663             :       CC = AArch64CC::HS;
    3664           8 :       break;
    3665           7 :     case Intrinsic::ssub_with_overflow:
    3666           7 :       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
    3667             :       CC = AArch64CC::VS;
    3668           7 :       break;
    3669           6 :     case Intrinsic::usub_with_overflow:
    3670           6 :       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
    3671             :       CC = AArch64CC::LO;
    3672           6 :       break;
    3673           6 :     case Intrinsic::smul_with_overflow: {
    3674             :       CC = AArch64CC::NE;
    3675           6 :       unsigned LHSReg = getRegForValue(LHS);
    3676           6 :       if (!LHSReg)
    3677             :         return false;
    3678           6 :       bool LHSIsKill = hasTrivialKill(LHS);
    3679             : 
    3680           6 :       unsigned RHSReg = getRegForValue(RHS);
    3681           6 :       if (!RHSReg)
    3682             :         return false;
    3683           6 :       bool RHSIsKill = hasTrivialKill(RHS);
    3684             : 
    3685           6 :       if (VT == MVT::i32) {
    3686           3 :         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
    3687           3 :         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
    3688             :                                        /*IsKill=*/false, 32);
    3689           3 :         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
    3690             :                                             AArch64::sub_32);
    3691           3 :         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
    3692             :                                               AArch64::sub_32);
    3693           3 :         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
    3694             :                     AArch64_AM::ASR, 31, /*WantResult=*/false);
    3695             :       } else {
    3696             :         assert(VT == MVT::i64 && "Unexpected value type.");
    3697             :         // LHSReg and RHSReg cannot be killed by this Mul, since they are
    3698             :         // reused in the next instruction.
    3699           3 :         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
    3700             :                             /*IsKill=*/false);
    3701           3 :         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
    3702             :                                         RHSReg, RHSIsKill);
    3703           3 :         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
    3704             :                     AArch64_AM::ASR, 63, /*WantResult=*/false);
    3705             :       }
    3706             :       break;
    3707             :     }
    3708           7 :     case Intrinsic::umul_with_overflow: {
    3709             :       CC = AArch64CC::NE;
    3710           7 :       unsigned LHSReg = getRegForValue(LHS);
    3711           7 :       if (!LHSReg)
    3712             :         return false;
    3713           7 :       bool LHSIsKill = hasTrivialKill(LHS);
    3714             : 
    3715           7 :       unsigned RHSReg = getRegForValue(RHS);
    3716           7 :       if (!RHSReg)
    3717             :         return false;
    3718           7 :       bool RHSIsKill = hasTrivialKill(RHS);
    3719             : 
    3720           7 :       if (VT == MVT::i32) {
    3721           3 :         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
    3722           3 :         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
    3723             :                     /*IsKill=*/false, AArch64_AM::LSR, 32,
    3724             :                     /*WantResult=*/false);
    3725           3 :         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
    3726             :                                             AArch64::sub_32);
    3727             :       } else {
    3728             :         assert(VT == MVT::i64 && "Unexpected value type.");
    3729             :         // LHSReg and RHSReg cannot be killed by this Mul, since they are
    3730             :         // reused in the next instruction.
    3731           4 :         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
    3732             :                             /*IsKill=*/false);
    3733           4 :         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
    3734             :                                         RHSReg, RHSIsKill);
    3735           4 :         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
    3736             :                     /*IsKill=*/false, /*WantResult=*/false);
    3737             :       }
    3738             :       break;
    3739             :     }
    3740             :     }
    3741             : 
    3742          48 :     if (MulReg) {
    3743          13 :       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
    3744          13 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3745          26 :               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
    3746             :     }
    3747             : 
    3748          48 :     if (!ResultReg1)
    3749             :       return false;
    3750             : 
    3751          96 :     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
    3752             :                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
    3753             :                                   /*IsKill=*/true, getInvertedCondCode(CC));
    3754             :     (void)ResultReg2;
    3755             :     assert((ResultReg1 + 1) == ResultReg2 &&
    3756             :            "Nonconsecutive result registers.");
    3757          48 :     updateValueMap(II, ResultReg1, 2);
    3758          48 :     return true;
    3759             :   }
    3760             :   }
    3761             :   return false;
    3762             : }
    3763             : 
    3764        1302 : bool AArch64FastISel::selectRet(const Instruction *I) {
    3765             :   const ReturnInst *Ret = cast<ReturnInst>(I);
    3766        1302 :   const Function &F = *I->getParent()->getParent();
    3767             : 
    3768        1302 :   if (!FuncInfo.CanLowerReturn)
    3769             :     return false;
    3770             : 
    3771        1302 :   if (F.isVarArg())
    3772             :     return false;
    3773             : 
    3774        2602 :   if (TLI.supportSwiftError() &&
    3775        1301 :       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
    3776          11 :     return false;
    3777             : 
    3778        1290 :   if (TLI.supportSplitCSR(FuncInfo.MF))
    3779             :     return false;
    3780             : 
    3781             :   // Build a list of return value registers.
    3782             :   SmallVector<unsigned, 4> RetRegs;
    3783             : 
    3784        1287 :   if (Ret->getNumOperands() > 0) {
    3785             :     CallingConv::ID CC = F.getCallingConv();
    3786             :     SmallVector<ISD::OutputArg, 4> Outs;
    3787        1900 :     GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
    3788             : 
    3789             :     // Analyze operands of the call, assigning locations to each operand.
    3790             :     SmallVector<CCValAssign, 16> ValLocs;
    3791         950 :     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
    3792         950 :     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
    3793             :                                                      : RetCC_AArch64_AAPCS;
    3794         950 :     CCInfo.AnalyzeReturn(Outs, RetCC);
    3795             : 
    3796             :     // Only handle a single return value for now.
    3797         950 :     if (ValLocs.size() != 1)
    3798         101 :       return false;
    3799             : 
    3800             :     CCValAssign &VA = ValLocs[0];
    3801             :     const Value *RV = Ret->getOperand(0);
    3802             : 
    3803             :     // Don't bother handling odd stuff for now.
    3804         943 :     if ((VA.getLocInfo() != CCValAssign::Full) &&
    3805             :         (VA.getLocInfo() != CCValAssign::BCvt))
    3806             :       return false;
    3807             : 
    3808             :     // Only handle register returns for now.
    3809         943 :     if (!VA.isRegLoc())
    3810             :       return false;
    3811             : 
    3812         943 :     unsigned Reg = getRegForValue(RV);
    3813         943 :     if (Reg == 0)
    3814             :       return false;
    3815             : 
    3816         939 :     unsigned SrcReg = Reg + VA.getValNo();
    3817         939 :     unsigned DestReg = VA.getLocReg();
    3818             :     // Avoid a cross-class copy. This is very unlikely.
    3819        1878 :     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
    3820             :       return false;
    3821             : 
    3822         939 :     EVT RVEVT = TLI.getValueType(DL, RV->getType());
    3823         939 :     if (!RVEVT.isSimple())
    3824             :       return false;
    3825             : 
    3826             :     // Vectors (of > 1 lane) in big endian need tricky handling.
    3827        1034 :     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
    3828          89 :         !Subtarget->isLittleEndian())
    3829             :       return false;
    3830             : 
    3831             :     MVT RVVT = RVEVT.getSimpleVT();
    3832         879 :     if (RVVT == MVT::f128)
    3833             :       return false;
    3834             : 
    3835             :     MVT DestVT = VA.getValVT();
    3836             :     // Special handling for extended integers.
    3837         871 :     if (RVVT != DestVT) {
    3838         173 :       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
    3839             :         return false;
    3840             : 
    3841         173 :       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
    3842             :         return false;
    3843             : 
    3844         151 :       bool IsZExt = Outs[0].Flags.isZExt();
    3845         151 :       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
    3846         151 :       if (SrcReg == 0)
    3847             :         return false;
    3848             :     }
    3849             : 
    3850             :     // Make the copy.
    3851        1698 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3852        1698 :             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
    3853             : 
    3854             :     // Add register to return instruction.
    3855         849 :     RetRegs.push_back(VA.getLocReg());
    3856             :   }
    3857             : 
    3858        1186 :   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3859        2372 :                                     TII.get(AArch64::RET_ReallyLR));
    3860        2035 :   for (unsigned RetReg : RetRegs)
    3861         849 :     MIB.addReg(RetReg, RegState::Implicit);
    3862             :   return true;
    3863             : }
    3864             : 
    3865          14 : bool AArch64FastISel::selectTrunc(const Instruction *I) {
    3866          14 :   Type *DestTy = I->getType();
    3867          14 :   Value *Op = I->getOperand(0);
    3868          14 :   Type *SrcTy = Op->getType();
    3869             : 
    3870          14 :   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
    3871          14 :   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
    3872          14 :   if (!SrcEVT.isSimple())
    3873             :     return false;
    3874          14 :   if (!DestEVT.isSimple())
    3875             :     return false;
    3876             : 
    3877             :   MVT SrcVT = SrcEVT.getSimpleVT();
    3878             :   MVT DestVT = DestEVT.getSimpleVT();
    3879             : 
    3880          14 :   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
    3881             :       SrcVT != MVT::i8)
    3882             :     return false;
    3883          12 :   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
    3884             :       DestVT != MVT::i1)
    3885             :     return false;
    3886             : 
    3887          12 :   unsigned SrcReg = getRegForValue(Op);
    3888          12 :   if (!SrcReg)
    3889             :     return false;
    3890          12 :   bool SrcIsKill = hasTrivialKill(Op);
    3891             : 
    3892             :   // If we're truncating from i64 to a smaller non-legal type then generate an
    3893             :   // AND. Otherwise, we know the high bits are undefined and a truncate only
    3894             :   // generate a COPY. We cannot mark the source register also as result
    3895             :   // register, because this can incorrectly transfer the kill flag onto the
    3896             :   // source register.
    3897             :   unsigned ResultReg;
    3898          12 :   if (SrcVT == MVT::i64) {
    3899             :     uint64_t Mask = 0;
    3900             :     switch (DestVT.SimpleTy) {
    3901             :     default:
    3902             :       // Trunc i64 to i32 is handled by the target-independent fast-isel.
    3903             :       return false;
    3904             :     case MVT::i1:
    3905             :       Mask = 0x1;
    3906             :       break;
    3907             :     case MVT::i8:
    3908             :       Mask = 0xff;
    3909             :       break;
    3910             :     case MVT::i16:
    3911             :       Mask = 0xffff;
    3912             :       break;
    3913             :     }
    3914             :     // Issue an extract_subreg to get the lower 32-bits.
    3915          12 :     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
    3916             :                                                 AArch64::sub_32);
    3917             :     // Create the AND instruction which performs the actual truncation.
    3918           6 :     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
    3919             :     assert(ResultReg && "Unexpected AND instruction emission failure.");
    3920             :   } else {
    3921           6 :     ResultReg = createResultReg(&AArch64::GPR32RegClass);
    3922           6 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3923          12 :             TII.get(TargetOpcode::COPY), ResultReg)
    3924           6 :         .addReg(SrcReg, getKillRegState(SrcIsKill));
    3925             :   }
    3926             : 
    3927          12 :   updateValueMap(I, ResultReg);
    3928          12 :   return true;
    3929             : }
    3930             : 
    3931         141 : unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
    3932             :   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
    3933             :           DestVT == MVT::i64) &&
    3934             :          "Unexpected value type.");
    3935             :   // Handle i8 and i16 as i32.
    3936         141 :   if (DestVT == MVT::i8 || DestVT == MVT::i16)
    3937             :     DestVT = MVT::i32;
    3938             : 
    3939         141 :   if (IsZExt) {
    3940         132 :     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
    3941             :     assert(ResultReg && "Unexpected AND instruction emission failure.");
    3942         132 :     if (DestVT == MVT::i64) {
    3943             :       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
    3944             :       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
    3945           0 :       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
    3946           0 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    3947           0 :               TII.get(AArch64::SUBREG_TO_REG), Reg64)
    3948             :           .addImm(0)
    3949           0 :           .addReg(ResultReg)
    3950             :           .addImm(AArch64::sub_32);
    3951             :       ResultReg = Reg64;
    3952             :     }
    3953         132 :     return ResultReg;
    3954             :   } else {
    3955           9 :     if (DestVT == MVT::i64) {
    3956             :       // FIXME: We're SExt i1 to i64.
    3957             :       return 0;
    3958             :     }
    3959           9 :     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
    3960           9 :                             /*TODO:IsKill=*/false, 0, 0);
    3961             :   }
    3962             : }
    3963             : 
    3964          18 : unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
    3965             :                                       unsigned Op1, bool Op1IsKill) {
    3966             :   unsigned Opc, ZReg;
    3967          18 :   switch (RetVT.SimpleTy) {
    3968             :   default: return 0;
    3969             :   case MVT::i8:
    3970             :   case MVT::i16:
    3971             :   case MVT::i32:
    3972             :     RetVT = MVT::i32;
    3973           5 :     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
    3974             :   case MVT::i64:
    3975             :     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
    3976             :   }
    3977             : 
    3978             :   const TargetRegisterClass *RC =
    3979          18 :       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    3980          18 :   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
    3981          18 :                           /*IsKill=*/ZReg, true);
    3982             : }
    3983             : 
    3984             : unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
    3985             :                                         unsigned Op1, bool Op1IsKill) {
    3986             :   if (RetVT != MVT::i64)
    3987             :     return 0;
    3988             : 
    3989           3 :   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
    3990             :                           Op0, Op0IsKill, Op1, Op1IsKill,
    3991             :                           AArch64::XZR, /*IsKill=*/true);
    3992             : }
    3993             : 
    3994             : unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
    3995             :                                         unsigned Op1, bool Op1IsKill) {
    3996             :   if (RetVT != MVT::i64)
    3997             :     return 0;
    3998             : 
    3999           3 :   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
    4000             :                           Op0, Op0IsKill, Op1, Op1IsKill,
    4001             :                           AArch64::XZR, /*IsKill=*/true);
    4002             : }
    4003             : 
    4004           4 : unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
    4005             :                                      unsigned Op1Reg, bool Op1IsKill) {
    4006             :   unsigned Opc = 0;
    4007             :   bool NeedTrunc = false;
    4008             :   uint64_t Mask = 0;
    4009             :   switch (RetVT.SimpleTy) {
    4010             :   default: return 0;
    4011             :   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
    4012             :   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
    4013             :   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
    4014             :   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
    4015             :   }
    4016             : 
    4017             :   const TargetRegisterClass *RC =
    4018           4 :       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4019           4 :   if (NeedTrunc) {
    4020           2 :     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
    4021             :     Op1IsKill = true;
    4022             :   }
    4023           4 :   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
    4024             :                                        Op1IsKill);
    4025           4 :   if (NeedTrunc)
    4026           2 :     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
    4027             :   return ResultReg;
    4028             : }
    4029             : 
    4030          53 : unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
    4031             :                                      bool Op0IsKill, uint64_t Shift,
    4032             :                                      bool IsZExt) {
    4033             :   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
    4034             :          "Unexpected source/return type pair.");
    4035             :   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
    4036             :           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
    4037             :          "Unexpected source value type.");
    4038             :   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
    4039             :           RetVT == MVT::i64) && "Unexpected return value type.");
    4040             : 
    4041          53 :   bool Is64Bit = (RetVT == MVT::i64);
    4042          53 :   unsigned RegSize = Is64Bit ? 64 : 32;
    4043          53 :   unsigned DstBits = RetVT.getSizeInBits();
    4044          53 :   unsigned SrcBits = SrcVT.getSizeInBits();
    4045             :   const TargetRegisterClass *RC =
    4046          53 :       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4047             : 
    4048             :   // Just emit a copy for "zero" shifts.
    4049          53 :   if (Shift == 0) {
    4050           2 :     if (RetVT == SrcVT) {
    4051           1 :       unsigned ResultReg = createResultReg(RC);
    4052           2 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4053           2 :               TII.get(TargetOpcode::COPY), ResultReg)
    4054           1 :           .addReg(Op0, getKillRegState(Op0IsKill));
    4055           1 :       return ResultReg;
    4056             :     } else
    4057           1 :       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
    4058             :   }
    4059             : 
    4060             :   // Don't deal with undefined shifts.
    4061          51 :   if (Shift >= DstBits)
    4062             :     return 0;
    4063             : 
    4064             :   // For immediate shifts we can fold the zero-/sign-extension into the shift.
    4065             :   // {S|U}BFM Wd, Wn, #r, #s
    4066             :   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
    4067             : 
    4068             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4069             :   // %2 = shl i16 %1, 4
    4070             :   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
    4071             :   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
    4072             :   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
    4073             :   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
    4074             : 
    4075             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4076             :   // %2 = shl i16 %1, 8
    4077             :   // Wd<32+7-24,32-24> = Wn<7:0>
    4078             :   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
    4079             :   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
    4080             :   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
    4081             : 
    4082             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4083             :   // %2 = shl i16 %1, 12
    4084             :   // Wd<32+3-20,32-20> = Wn<3:0>
    4085             :   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
    4086             :   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
    4087             :   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
    4088             : 
    4089          37 :   unsigned ImmR = RegSize - Shift;
    4090             :   // Limit the width to the length of the source type.
    4091          37 :   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
    4092             :   static const unsigned OpcTable[2][2] = {
    4093             :     {AArch64::SBFMWri, AArch64::SBFMXri},
    4094             :     {AArch64::UBFMWri, AArch64::UBFMXri}
    4095             :   };
    4096          37 :   unsigned Opc = OpcTable[IsZExt][Is64Bit];
    4097          37 :   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
    4098          20 :     unsigned TmpReg = MRI.createVirtualRegister(RC);
    4099          10 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4100          20 :             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
    4101             :         .addImm(0)
    4102          10 :         .addReg(Op0, getKillRegState(Op0IsKill))
    4103             :         .addImm(AArch64::sub_32);
    4104             :     Op0 = TmpReg;
    4105             :     Op0IsKill = true;
    4106             :   }
    4107          37 :   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
    4108             : }
    4109             : 
    4110           4 : unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
    4111             :                                      unsigned Op1Reg, bool Op1IsKill) {
    4112             :   unsigned Opc = 0;
    4113             :   bool NeedTrunc = false;
    4114             :   uint64_t Mask = 0;
    4115             :   switch (RetVT.SimpleTy) {
    4116             :   default: return 0;
    4117             :   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
    4118             :   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
    4119             :   case MVT::i32: Opc = AArch64::LSRVWr; break;
    4120             :   case MVT::i64: Opc = AArch64::LSRVXr; break;
    4121             :   }
    4122             : 
    4123             :   const TargetRegisterClass *RC =
    4124           4 :       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4125           4 :   if (NeedTrunc) {
    4126           2 :     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
    4127           2 :     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
    4128             :     Op0IsKill = Op1IsKill = true;
    4129             :   }
    4130           4 :   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
    4131             :                                        Op1IsKill);
    4132           4 :   if (NeedTrunc)
    4133           2 :     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
    4134             :   return ResultReg;
    4135             : }
    4136             : 
    4137          27 : unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
    4138             :                                      bool Op0IsKill, uint64_t Shift,
    4139             :                                      bool IsZExt) {
    4140             :   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
    4141             :          "Unexpected source/return type pair.");
    4142             :   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
    4143             :           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
    4144             :          "Unexpected source value type.");
    4145             :   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
    4146             :           RetVT == MVT::i64) && "Unexpected return value type.");
    4147             : 
    4148          27 :   bool Is64Bit = (RetVT == MVT::i64);
    4149          27 :   unsigned RegSize = Is64Bit ? 64 : 32;
    4150          27 :   unsigned DstBits = RetVT.getSizeInBits();
    4151          27 :   unsigned SrcBits = SrcVT.getSizeInBits();
    4152             :   const TargetRegisterClass *RC =
    4153          27 :       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4154             : 
    4155             :   // Just emit a copy for "zero" shifts.
    4156          27 :   if (Shift == 0) {
    4157           2 :     if (RetVT == SrcVT) {
    4158           1 :       unsigned ResultReg = createResultReg(RC);
    4159           2 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4160           2 :               TII.get(TargetOpcode::COPY), ResultReg)
    4161           1 :       .addReg(Op0, getKillRegState(Op0IsKill));
    4162           1 :       return ResultReg;
    4163             :     } else
    4164           1 :       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
    4165             :   }
    4166             : 
    4167             :   // Don't deal with undefined shifts.
    4168          25 :   if (Shift >= DstBits)
    4169             :     return 0;
    4170             : 
    4171             :   // For immediate shifts we can fold the zero-/sign-extension into the shift.
    4172             :   // {S|U}BFM Wd, Wn, #r, #s
    4173             :   // Wd<s-r:0> = Wn<s:r> when r <= s
    4174             : 
    4175             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4176             :   // %2 = lshr i16 %1, 4
    4177             :   // Wd<7-4:0> = Wn<7:4>
    4178             :   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
    4179             :   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
    4180             :   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
    4181             : 
    4182             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4183             :   // %2 = lshr i16 %1, 8
    4184             :   // Wd<7-7,0> = Wn<7:7>
    4185             :   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
    4186             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
    4187             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
    4188             : 
    4189             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4190             :   // %2 = lshr i16 %1, 12
    4191             :   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
    4192             :   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
    4193             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
    4194             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
    4195             : 
    4196          25 :   if (Shift >= SrcBits && IsZExt)
    4197           6 :     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
    4198             : 
    4199             :   // It is not possible to fold a sign-extend into the LShr instruction. In this
    4200             :   // case emit a sign-extend.
    4201          22 :   if (!IsZExt) {
    4202           4 :     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
    4203           4 :     if (!Op0)
    4204             :       return 0;
    4205             :     Op0IsKill = true;
    4206           4 :     SrcVT = RetVT;
    4207           4 :     SrcBits = SrcVT.getSizeInBits();
    4208             :     IsZExt = true;
    4209             :   }
    4210             : 
    4211          22 :   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
    4212             :   unsigned ImmS = SrcBits - 1;
    4213             :   static const unsigned OpcTable[2][2] = {
    4214             :     {AArch64::SBFMWri, AArch64::SBFMXri},
    4215             :     {AArch64::UBFMWri, AArch64::UBFMXri}
    4216             :   };
    4217          22 :   unsigned Opc = OpcTable[IsZExt][Is64Bit];
    4218          22 :   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
    4219           0 :     unsigned TmpReg = MRI.createVirtualRegister(RC);
    4220           0 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4221           0 :             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
    4222             :         .addImm(0)
    4223           0 :         .addReg(Op0, getKillRegState(Op0IsKill))
    4224             :         .addImm(AArch64::sub_32);
    4225             :     Op0 = TmpReg;
    4226             :     Op0IsKill = true;
    4227             :   }
    4228          22 :   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
    4229             : }
    4230             : 
    4231           4 : unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
    4232             :                                      unsigned Op1Reg, bool Op1IsKill) {
    4233             :   unsigned Opc = 0;
    4234             :   bool NeedTrunc = false;
    4235             :   uint64_t Mask = 0;
    4236             :   switch (RetVT.SimpleTy) {
    4237             :   default: return 0;
    4238             :   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
    4239             :   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
    4240             :   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
    4241             :   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
    4242             :   }
    4243             : 
    4244             :   const TargetRegisterClass *RC =
    4245           4 :       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4246           4 :   if (NeedTrunc) {
    4247           2 :     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
    4248           2 :     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
    4249             :     Op0IsKill = Op1IsKill = true;
    4250             :   }
    4251           4 :   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
    4252             :                                        Op1IsKill);
    4253           4 :   if (NeedTrunc)
    4254           2 :     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
    4255             :   return ResultReg;
    4256             : }
    4257             : 
    4258          28 : unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
    4259             :                                      bool Op0IsKill, uint64_t Shift,
    4260             :                                      bool IsZExt) {
    4261             :   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
    4262             :          "Unexpected source/return type pair.");
    4263             :   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
    4264             :           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
    4265             :          "Unexpected source value type.");
    4266             :   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
    4267             :           RetVT == MVT::i64) && "Unexpected return value type.");
    4268             : 
    4269          28 :   bool Is64Bit = (RetVT == MVT::i64);
    4270          28 :   unsigned RegSize = Is64Bit ? 64 : 32;
    4271          28 :   unsigned DstBits = RetVT.getSizeInBits();
    4272          28 :   unsigned SrcBits = SrcVT.getSizeInBits();
    4273             :   const TargetRegisterClass *RC =
    4274          28 :       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4275             : 
    4276             :   // Just emit a copy for "zero" shifts.
    4277          28 :   if (Shift == 0) {
    4278           2 :     if (RetVT == SrcVT) {
    4279           1 :       unsigned ResultReg = createResultReg(RC);
    4280           2 :       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4281           2 :               TII.get(TargetOpcode::COPY), ResultReg)
    4282           1 :       .addReg(Op0, getKillRegState(Op0IsKill));
    4283           1 :       return ResultReg;
    4284             :     } else
    4285           1 :       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
    4286             :   }
    4287             : 
    4288             :   // Don't deal with undefined shifts.
    4289          26 :   if (Shift >= DstBits)
    4290             :     return 0;
    4291             : 
    4292             :   // For immediate shifts we can fold the zero-/sign-extension into the shift.
    4293             :   // {S|U}BFM Wd, Wn, #r, #s
    4294             :   // Wd<s-r:0> = Wn<s:r> when r <= s
    4295             : 
    4296             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4297             :   // %2 = ashr i16 %1, 4
    4298             :   // Wd<7-4:0> = Wn<7:4>
    4299             :   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
    4300             :   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
    4301             :   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
    4302             : 
    4303             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4304             :   // %2 = ashr i16 %1, 8
    4305             :   // Wd<7-7,0> = Wn<7:7>
    4306             :   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
    4307             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
    4308             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
    4309             : 
    4310             :   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
    4311             :   // %2 = ashr i16 %1, 12
    4312             :   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
    4313             :   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
    4314             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
    4315             :   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
    4316             : 
    4317          26 :   if (Shift >= SrcBits && IsZExt)
    4318           6 :     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
    4319             : 
    4320          23 :   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
    4321             :   unsigned ImmS = SrcBits - 1;
    4322             :   static const unsigned OpcTable[2][2] = {
    4323             :     {AArch64::SBFMWri, AArch64::SBFMXri},
    4324             :     {AArch64::UBFMWri, AArch64::UBFMXri}
    4325             :   };
    4326          23 :   unsigned Opc = OpcTable[IsZExt][Is64Bit];
    4327          23 :   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
    4328           2 :     unsigned TmpReg = MRI.createVirtualRegister(RC);
    4329           1 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4330           2 :             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
    4331             :         .addImm(0)
    4332           1 :         .addReg(Op0, getKillRegState(Op0IsKill))
    4333             :         .addImm(AArch64::sub_32);
    4334             :     Op0 = TmpReg;
    4335             :     Op0IsKill = true;
    4336             :   }
    4337          23 :   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
    4338             : }
    4339             : 
    4340         411 : unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
    4341             :                                      bool IsZExt) {
    4342             :   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
    4343             : 
    4344             :   // FastISel does not have plumbing to deal with extensions where the SrcVT or
    4345             :   // DestVT are odd things, so test to make sure that they are both types we can
    4346             :   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
    4347             :   // bail out to SelectionDAG.
    4348         410 :   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
    4349         821 :        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
    4350         270 :       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
    4351          28 :        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
    4352             :     return 0;
    4353             : 
    4354             :   unsigned Opc;
    4355             :   unsigned Imm = 0;
    4356             : 
    4357         411 :   switch (SrcVT.SimpleTy) {
    4358             :   default:
    4359             :     return 0;
    4360         141 :   case MVT::i1:
    4361         141 :     return emiti1Ext(SrcReg, DestVT, IsZExt);
    4362             :   case MVT::i8:
    4363         144 :     if (DestVT == MVT::i64)
    4364          15 :       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
    4365             :     else
    4366         129 :       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
    4367             :     Imm = 7;
    4368             :     break;
    4369             :   case MVT::i16:
    4370          98 :     if (DestVT == MVT::i64)
    4371          15 :       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
    4372             :     else
    4373          83 :       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
    4374             :     Imm = 15;
    4375             :     break;
    4376          28 :   case MVT::i32:
    4377             :     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
    4378          28 :     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
    4379             :     Imm = 31;
    4380             :     break;
    4381             :   }
    4382             : 
    4383             :   // Handle i8 and i16 as i32.
    4384         270 :   if (DestVT == MVT::i8 || DestVT == MVT::i16)
    4385             :     DestVT = MVT::i32;
    4386         267 :   else if (DestVT == MVT::i64) {
    4387         116 :     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
    4388          58 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4389         116 :             TII.get(AArch64::SUBREG_TO_REG), Src64)
    4390             :         .addImm(0)
    4391          58 :         .addReg(SrcReg)
    4392             :         .addImm(AArch64::sub_32);
    4393             :     SrcReg = Src64;
    4394             :   }
    4395             : 
    4396             :   const TargetRegisterClass *RC =
    4397         270 :       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4398         270 :   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
    4399             : }
    4400             : 
    4401             : static bool isZExtLoad(const MachineInstr *LI) {
    4402           2 :   switch (LI->getOpcode()) {
    4403             :   default:
    4404             :     return false;
    4405             :   case AArch64::LDURBBi:
    4406             :   case AArch64::LDURHHi:
    4407             :   case AArch64::LDURWi:
    4408             :   case AArch64::LDRBBui:
    4409             :   case AArch64::LDRHHui:
    4410             :   case AArch64::LDRWui:
    4411             :   case AArch64::LDRBBroX:
    4412             :   case AArch64::LDRHHroX:
    4413             :   case AArch64::LDRWroX:
    4414             :   case AArch64::LDRBBroW:
    4415             :   case AArch64::LDRHHroW:
    4416             :   case AArch64::LDRWroW:
    4417             :     return true;
    4418             :   }
    4419             : }
    4420             : 
    4421             : static bool isSExtLoad(const MachineInstr *LI) {
    4422           0 :   switch (LI->getOpcode()) {
    4423             :   default:
    4424             :     return false;
    4425             :   case AArch64::LDURSBWi:
    4426             :   case AArch64::LDURSHWi:
    4427             :   case AArch64::LDURSBXi:
    4428             :   case AArch64::LDURSHXi:
    4429             :   case AArch64::LDURSWi:
    4430             :   case AArch64::LDRSBWui:
    4431             :   case AArch64::LDRSHWui:
    4432             :   case AArch64::LDRSBXui:
    4433             :   case AArch64::LDRSHXui:
    4434             :   case AArch64::LDRSWui:
    4435             :   case AArch64::LDRSBWroX:
    4436             :   case AArch64::LDRSHWroX:
    4437             :   case AArch64::LDRSBXroX:
    4438             :   case AArch64::LDRSHXroX:
    4439             :   case AArch64::LDRSWroX:
    4440             :   case AArch64::LDRSBWroW:
    4441             :   case AArch64::LDRSHWroW:
    4442             :   case AArch64::LDRSBXroW:
    4443             :   case AArch64::LDRSHXroW:
    4444             :   case AArch64::LDRSWroW:
    4445             :     return true;
    4446             :   }
    4447             : }
    4448             : 
    4449         193 : bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
    4450             :                                          MVT SrcVT) {
    4451         193 :   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
    4452          85 :   if (!LI || !LI->hasOneUse())
    4453             :     return false;
    4454             : 
    4455             :   // Check if the load instruction has already been selected.
    4456          85 :   unsigned Reg = lookUpRegForValue(LI);
    4457          85 :   if (!Reg)
    4458             :     return false;
    4459             : 
    4460           1 :   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
    4461           1 :   if (!MI)
    4462             :     return false;
    4463             : 
    4464             :   // Check if the correct load instruction has been emitted - SelectionDAG might
    4465             :   // have emitted a zero-extending load, but we need a sign-extending load.
    4466             :   bool IsZExt = isa<ZExtInst>(I);
    4467             :   const auto *LoadMI = MI;
    4468           2 :   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
    4469           0 :       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
    4470           0 :     unsigned LoadReg = MI->getOperand(1).getReg();
    4471           0 :     LoadMI = MRI.getUniqueVRegDef(LoadReg);
    4472             :     assert(LoadMI && "Expected valid instruction");
    4473             :   }
    4474           1 :   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
    4475             :     return false;
    4476             : 
    4477             :   // Nothing to be done.
    4478           1 :   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
    4479           0 :     updateValueMap(I, Reg);
    4480           0 :     return true;
    4481             :   }
    4482             : 
    4483           1 :   if (IsZExt) {
    4484           1 :     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
    4485           1 :     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4486           2 :             TII.get(AArch64::SUBREG_TO_REG), Reg64)
    4487             :         .addImm(0)
    4488           1 :         .addReg(Reg, getKillRegState(true))
    4489             :         .addImm(AArch64::sub_32);
    4490             :     Reg = Reg64;
    4491             :   } else {
    4492             :     assert((MI->getOpcode() == TargetOpcode::COPY &&
    4493             :             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
    4494             :            "Expected copy instruction");
    4495           0 :     Reg = MI->getOperand(1).getReg();
    4496           0 :     MI->eraseFromParent();
    4497             :   }
    4498           1 :   updateValueMap(I, Reg);
    4499           1 :   return true;
    4500             : }
    4501             : 
    4502         200 : bool AArch64FastISel::selectIntExt(const Instruction *I) {
    4503             :   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
    4504             :          "Unexpected integer extend instruction.");
    4505         200 :   MVT RetVT;
    4506         200 :   MVT SrcVT;
    4507         200 :   if (!isTypeSupported(I->getType(), RetVT))
    4508             :     return false;
    4509             : 
    4510         386 :   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
    4511             :     return false;
    4512             : 
    4513             :   // Try to optimize already sign-/zero-extended values from load instructions.
    4514         193 :   if (optimizeIntExtLoad(I, RetVT, SrcVT))
    4515             :     return true;
    4516             : 
    4517         384 :   unsigned SrcReg = getRegForValue(I->getOperand(0));
    4518         192 :   if (!SrcReg)
    4519             :     return false;
    4520         192 :   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
    4521             : 
    4522             :   // Try to optimize already sign-/zero-extended values from function arguments.
    4523             :   bool IsZExt = isa<ZExtInst>(I);
    4524             :   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
    4525          78 :     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
    4526          66 :       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
    4527          13 :         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
    4528          13 :         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    4529          26 :                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
    4530             :             .addImm(0)
    4531          13 :             .addReg(SrcReg, getKillRegState(SrcIsKill))
    4532             :             .addImm(AArch64::sub_32);
    4533             :         SrcReg = ResultReg;
    4534             :       }
    4535             :       // Conservatively clear all kill flags from all uses, because we are
    4536             :       // replacing a sign-/zero-extend instruction at IR level with a nop at MI
    4537             :       // level. The result of the instruction at IR level might have been
    4538             :       // trivially dead, which is now not longer true.
    4539          66 :       unsigned UseReg = lookUpRegForValue(I);
    4540          66 :       if (UseReg)
    4541          66 :         MRI.clearKillFlags(UseReg);
    4542             : 
    4543          66 :       updateValueMap(I, SrcReg);
    4544          66 :       return true;
    4545             :     }
    4546             :   }
    4547             : 
    4548         126 :   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
    4549         126 :   if (!ResultReg)
    4550             :     return false;
    4551             : 
    4552         126 :   updateValueMap(I, ResultReg);
    4553         126 :   return true;
    4554             : }
    4555             : 
    4556           8 : bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
    4557           8 :   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
    4558           8 :   if (!DestEVT.isSimple())
    4559             :     return false;
    4560             : 
    4561             :   MVT DestVT = DestEVT.getSimpleVT();
    4562           8 :   if (DestVT != MVT::i64 && DestVT != MVT::i32)
    4563             :     return false;
    4564             : 
    4565             :   unsigned DivOpc;
    4566             :   bool Is64bit = (DestVT == MVT::i64);
    4567           8 :   switch (ISDOpcode) {
    4568             :   default:
    4569             :     return false;
    4570           4 :   case ISD::SREM:
    4571           4 :     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
    4572             :     break;
    4573           4 :   case ISD::UREM:
    4574           4 :     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
    4575             :     break;
    4576             :   }
    4577           8 :   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
    4578          16 :   unsigned Src0Reg = getRegForValue(I->getOperand(0));
    4579           8 :   if (!Src0Reg)
    4580             :     return false;
    4581           8 :   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
    4582             : 
    4583           8 :   unsigned Src1Reg = getRegForValue(I->getOperand(1));
    4584           8 :   if (!Src1Reg)
    4585             :     return false;
    4586           8 :   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
    4587             : 
    4588             :   const TargetRegisterClass *RC =
    4589           8 :       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    4590           8 :   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
    4591             :                                      Src1Reg, /*IsKill=*/false);
    4592             :   assert(QuotReg && "Unexpected DIV instruction emission failure.");
    4593             :   // The remainder is computed as numerator - (quotient * denominator) using the
    4594             :   // MSUB instruction.
    4595           8 :   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
    4596             :                                         Src1Reg, Src1IsKill, Src0Reg,
    4597             :                                         Src0IsKill);
    4598           8 :   updateValueMap(I, ResultReg);
    4599           8 :   return true;
    4600             : }
    4601             : 
    4602          10 : bool AArch64FastISel::selectMul(const Instruction *I) {
    4603          10 :   MVT VT;
    4604          10 :   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
    4605             :     return false;
    4606             : 
    4607          20 :   if (VT.isVector())
    4608           0 :     return selectBinaryOp(I, ISD::MUL);
    4609             : 
    4610          10 :   const Value *Src0 = I->getOperand(0);
    4611             :   const Value *Src1 = I->getOperand(1);
    4612             :   if (const auto *C = dyn_cast<ConstantInt>(Src0))
    4613           0 :     if (C->getValue().isPowerOf2())
    4614             :       std::swap(Src0, Src1);
    4615             : 
    4616             :   // Try to simplify to a shift instruction.
    4617             :   if (const auto *C = dyn_cast<ConstantInt>(Src1))
    4618           4 :     if (C->getValue().isPowerOf2()) {
    4619           2 :       uint64_t ShiftVal = C->getValue().logBase2();
    4620           2 :       MVT SrcVT = VT;
    4621             :       bool IsZExt = true;
    4622             :       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
    4623           0 :         if (!isIntExtFree(ZExt)) {
    4624           0 :           MVT VT;
    4625           0 :           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
    4626           0 :             SrcVT = VT;
    4627             :             IsZExt = true;
    4628             :             Src0 = ZExt->getOperand(0);
    4629             :           }
    4630             :         }
    4631             :       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
    4632           0 :         if (!isIntExtFree(SExt)) {
    4633           0 :           MVT VT;
    4634           0 :           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
    4635           0 :             SrcVT = VT;
    4636             :             IsZExt = false;
    4637             :             Src0 = SExt->getOperand(0);
    4638             :           }
    4639             :         }
    4640             :       }
    4641             : 
    4642           2 :       unsigned Src0Reg = getRegForValue(Src0);
    4643           2 :       if (!Src0Reg)
    4644           2 :         return false;
    4645           2 :       bool Src0IsKill = hasTrivialKill(Src0);
    4646             : 
    4647             :       unsigned ResultReg =
    4648           2 :           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
    4649             : 
    4650           2 :       if (ResultReg) {
    4651           2 :         updateValueMap(I, ResultReg);
    4652           2 :         return true;
    4653             :       }
    4654             :     }
    4655             : 
    4656          16 :   unsigned Src0Reg = getRegForValue(I->getOperand(0));
    4657           8 :   if (!Src0Reg)
    4658             :     return false;
    4659           8 :   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
    4660             : 
    4661           8 :   unsigned Src1Reg = getRegForValue(I->getOperand(1));
    4662           8 :   if (!Src1Reg)
    4663             :     return false;
    4664           8 :   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
    4665             : 
    4666           8 :   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
    4667             : 
    4668           8 :   if (!ResultReg)
    4669             :     return false;
    4670             : 
    4671           8 :   updateValueMap(I, ResultReg);
    4672           8 :   return true;
    4673             : }
    4674             : 
    4675         102 : bool AArch64FastISel::selectShift(const Instruction *I) {
    4676         102 :   MVT RetVT;
    4677         102 :   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
    4678             :     return false;
    4679             : 
    4680         204 :   if (RetVT.isVector())
    4681           0 :     return selectOperator(I, I->getOpcode());
    4682             : 
    4683         102 :   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
    4684             :     unsigned ResultReg = 0;
    4685             :     uint64_t ShiftVal = C->getZExtValue();
    4686          90 :     MVT SrcVT = RetVT;
    4687          90 :     bool IsZExt = I->getOpcode() != Instruction::AShr;
    4688             :     const Value *Op0 = I->getOperand(0);
    4689             :     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
    4690          22 :       if (!isIntExtFree(ZExt)) {
    4691          22 :         MVT TmpVT;
    4692          44 :         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
    4693          22 :           SrcVT = TmpVT;
    4694             :           IsZExt = true;
    4695             :           Op0 = ZExt->getOperand(0);
    4696             :         }
    4697             :       }
    4698             :     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
    4699          19 :       if (!isIntExtFree(SExt)) {
    4700          19 :         MVT TmpVT;
    4701          38 :         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
    4702          19 :           SrcVT = TmpVT;
    4703             :           IsZExt = false;
    4704             :           Op0 = SExt->getOperand(0);
    4705             :         }
    4706             :       }
    4707             :     }
    4708             : 
    4709          90 :     unsigned Op0Reg = getRegForValue(Op0);
    4710          90 :     if (!Op0Reg)
    4711             :       return false;
    4712          90 :     bool Op0IsKill = hasTrivialKill(Op0);
    4713             : 
    4714          90 :     switch (I->getOpcode()) {
    4715           0 :     default: llvm_unreachable("Unexpected instruction.");
    4716          48 :     case Instruction::Shl:
    4717          48 :       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
    4718          48 :       break;
    4719          18 :     case Instruction::AShr:
    4720          18 :       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
    4721          18 :       break;
    4722          24 :     case Instruction::LShr:
    4723          24 :       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
    4724          24 :       break;
    4725             :     }
    4726          90 :     if (!ResultReg)
    4727             :       return false;
    4728             : 
    4729          76 :     updateValueMap(I, ResultReg);
    4730          76 :     return true;
    4731             :   }
    4732             : 
    4733          24 :   unsigned Op0Reg = getRegForValue(I->getOperand(0));
    4734          12 :   if (!Op0Reg)
    4735             :     return false;
    4736          12 :   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
    4737             : 
    4738          12 :   unsigned Op1Reg = getRegForValue(I->getOperand(1));
    4739          12 :   if (!Op1Reg)
    4740             :     return false;
    4741          12 :   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
    4742             : 
    4743             :   unsigned ResultReg = 0;
    4744          12 :   switch (I->getOpcode()) {
    4745           0 :   default: llvm_unreachable("Unexpected instruction.");
    4746           4 :   case Instruction::Shl:
    4747           4 :     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
    4748           4 :     break;
    4749           4 :   case Instruction::AShr:
    4750           4 :     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
    4751           4 :     break;
    4752           4 :   case Instruction::LShr:
    4753           4 :     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
    4754           4 :     break;
    4755             :   }
    4756             : 
    4757          12 :   if (!ResultReg)
    4758             :     return false;
    4759             : 
    4760          12 :   updateValueMap(I, ResultReg);
    4761          12 :   return true;
    4762             : }
    4763             : 
    4764          23 : bool AArch64FastISel::selectBitCast(const Instruction *I) {
    4765          23 :   MVT RetVT, SrcVT;
    4766             : 
    4767          46 :   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
    4768             :     return false;
    4769          23 :   if (!isTypeLegal(I->getType(), RetVT))
    4770             :     return false;
    4771             : 
    4772             :   unsigned Opc;
    4773          23 :   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
    4774             :     Opc = AArch64::FMOVWSr;
    4775          22 :   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
    4776             :     Opc = AArch64::FMOVXDr;
    4777          19 :   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
    4778             :     Opc = AArch64::FMOVSWr;
    4779          18 :   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
    4780             :     Opc = AArch64::FMOVDXr;
    4781             :   else
    4782             :     return false;
    4783             : 
    4784             :   const TargetRegisterClass *RC = nullptr;
    4785           8 :   switch (RetVT.SimpleTy) {
    4786           0 :   default: llvm_unreachable("Unexpected value type.");
    4787             :   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
    4788           3 :   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
    4789           1 :   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
    4790           3 :   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
    4791             :   }
    4792          16 :   unsigned Op0Reg = getRegForValue(I->getOperand(0));
    4793           8 :   if (!Op0Reg)
    4794             :     return false;
    4795           8 :   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
    4796           8 :   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
    4797             : 
    4798           8 :   if (!ResultReg)
    4799             :     return false;
    4800             : 
    4801           8 :   updateValueMap(I, ResultReg);
    4802           8 :   return true;
    4803             : }
    4804             : 
    4805           4 : bool AArch64FastISel::selectFRem(const Instruction *I) {
    4806           4 :   MVT RetVT;
    4807           4 :   if (!isTypeLegal(I->getType(), RetVT))
    4808             :     return false;
    4809             : 
    4810             :   RTLIB::Libcall LC;
    4811           4 :   switch (RetVT.SimpleTy) {
    4812             :   default:
    4813             :     return false;
    4814             :   case MVT::f32:
    4815             :     LC = RTLIB::REM_F32;
    4816             :     break;
    4817           2 :   case MVT::f64:
    4818             :     LC = RTLIB::REM_F64;
    4819           2 :     break;
    4820             :   }
    4821             : 
    4822             :   ArgListTy Args;
    4823           4 :   Args.reserve(I->getNumOperands());
    4824             : 
    4825             :   // Populate the argument list.
    4826          16 :   for (auto &Arg : I->operands()) {
    4827             :     ArgListEntry Entry;
    4828           8 :     Entry.Val = Arg;
    4829           8 :     Entry.Ty = Arg->getType();
    4830           8 :     Args.push_back(Entry);
    4831             :   }
    4832             : 
    4833           8 :   CallLoweringInfo CLI;
    4834           4 :   MCContext &Ctx = MF->getContext();
    4835             :   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
    4836          12 :                 TLI.getLibcallName(LC), std::move(Args));
    4837           4 :   if (!lowerCallTo(CLI))
    4838             :     return false;
    4839           4 :   updateValueMap(I, CLI.ResultReg);
    4840           4 :   return true;
    4841             : }
    4842             : 
    4843          14 : bool AArch64FastISel::selectSDiv(const Instruction *I) {
    4844          14 :   MVT VT;
    4845          14 :   if (!isTypeLegal(I->getType(), VT))
    4846             :     return false;
    4847             : 
    4848          28 :   if (!isa<ConstantInt>(I->getOperand(1)))
    4849           0 :     return selectBinaryOp(I, ISD::SDIV);
    4850             : 
    4851             :   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
    4852          28 :   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
    4853          26 :       !(C.isPowerOf2() || (-C).isPowerOf2()))
    4854           0 :     return selectBinaryOp(I, ISD::SDIV);
    4855             : 
    4856          14 :   unsigned Lg2 = C.countTrailingZeros();
    4857          28 :   unsigned Src0Reg = getRegForValue(I->getOperand(0));
    4858          14 :   if (!Src0Reg)
    4859             :     return false;
    4860          14 :   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
    4861             : 
    4862          14 :   if (cast<BinaryOperator>(I)->isExact()) {
    4863           3 :     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
    4864           3 :     if (!ResultReg)
    4865             :       return false;
    4866           3 :     updateValueMap(I, ResultReg);
    4867           3 :     return true;
    4868             :   }
    4869             : 
    4870          11 :   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
    4871          11 :   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
    4872          11 :   if (!AddReg)
    4873             :     return false;
    4874             : 
    4875             :   // (Src0 < 0) ? Pow2 - 1 : 0;
    4876          11 :   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
    4877             :     return false;
    4878             : 
    4879             :   unsigned SelectOpc;
    4880             :   const TargetRegisterClass *RC;
    4881          11 :   if (VT == MVT::i64) {
    4882             :     SelectOpc = AArch64::CSELXr;
    4883             :     RC = &AArch64::GPR64RegClass;
    4884             :   } else {
    4885             :     SelectOpc = AArch64::CSELWr;
    4886             :     RC = &AArch64::GPR32RegClass;
    4887             :   }
    4888             :   unsigned SelectReg =
    4889          11 :       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
    4890             :                        Src0IsKill, AArch64CC::LT);
    4891          11 :   if (!SelectReg)
    4892             :     return false;
    4893             : 
    4894             :   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
    4895             :   // negate the result.
    4896          11 :   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
    4897             :   unsigned ResultReg;
    4898          11 :   if (C.isNegative())
    4899           4 :     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
    4900             :                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
    4901             :   else
    4902           7 :     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
    4903             : 
    4904          11 :   if (!ResultReg)
    4905             :     return false;
    4906             : 
    4907          11 :   updateValueMap(I, ResultReg);
    4908          11 :   return true;
    4909             : }
    4910             : 
    4911             : /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
    4912             : /// have to duplicate it for AArch64, because otherwise we would fail during the
    4913             : /// sign-extend emission.
    4914           3 : std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
    4915           3 :   unsigned IdxN = getRegForValue(Idx);
    4916           3 :   if (IdxN == 0)
    4917             :     // Unhandled operand. Halt "fast" selection and bail.
    4918           0 :     return std::pair<unsigned, bool>(0, false);
    4919             : 
    4920           3 :   bool IdxNIsKill = hasTrivialKill(Idx);
    4921             : 
    4922             :   // If the index is smaller or larger than intptr_t, truncate or extend it.
    4923           3 :   MVT PtrVT = TLI.getPointerTy(DL);
    4924           3 :   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
    4925           3 :   if (IdxVT.bitsLT(PtrVT)) {
    4926           1 :     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
    4927             :     IdxNIsKill = true;
    4928           2 :   } else if (IdxVT.bitsGT(PtrVT))
    4929           0 :     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
    4930           3 :   return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
    4931             : }
    4932             : 
    4933             : /// This is mostly a copy of the existing FastISel GEP code, but we have to
    4934             : /// duplicate it for AArch64, because otherwise we would bail out even for
    4935             : /// simple cases. This is because the standard fastEmit functions don't cover
    4936             : /// MUL at all and ADD is lowered very inefficientily.
    4937          16 : bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
    4938          32 :   unsigned N = getRegForValue(I->getOperand(0));
    4939          16 :   if (!N)
    4940             :     return false;
    4941          16 :   bool NIsKill = hasTrivialKill(I->getOperand(0));
    4942             : 
    4943             :   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
    4944             :   // into a single N = N + TotalOffset.
    4945             :   uint64_t TotalOffs = 0;
    4946          16 :   MVT VT = TLI.getPointerTy(DL);
    4947          34 :   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
    4948          52 :        GTI != E; ++GTI) {
    4949             :     const Value *Idx = GTI.getOperand();
    4950           2 :     if (auto *StTy = GTI.getStructTypeOrNull()) {
    4951           2 :       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
    4952             :       // N = N + Offset
    4953           2 :       if (Field)
    4954           2 :         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
    4955             :     } else {
    4956          16 :       Type *Ty = GTI.getIndexedType();
    4957             : 
    4958             :       // If this is a constant subscript, handle it quickly.
    4959             :       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
    4960          13 :         if (CI->isZero())
    4961          13 :           continue;
    4962             :         // N = N + Offset
    4963          11 :         TotalOffs +=
    4964          11 :             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
    4965          11 :         continue;
    4966             :       }
    4967           3 :       if (TotalOffs) {
    4968           0 :         N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
    4969           0 :         if (!N)
    4970           0 :           return false;
    4971             :         NIsKill = true;
    4972             :         TotalOffs = 0;
    4973             :       }
    4974             : 
    4975             :       // N = N + Idx * ElementSize;
    4976           3 :       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
    4977           3 :       std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
    4978           3 :       unsigned IdxN = Pair.first;
    4979           3 :       bool IdxNIsKill = Pair.second;
    4980           3 :       if (!IdxN)
    4981             :         return false;
    4982             : 
    4983           3 :       if (ElementSize != 1) {
    4984           3 :         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
    4985           3 :         if (!C)
    4986             :           return false;
    4987           3 :         IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
    4988           3 :         if (!IdxN)
    4989             :           return false;
    4990             :         IdxNIsKill = true;
    4991             :       }
    4992           3 :       N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
    4993           3 :       if (!N)
    4994             :         return false;
    4995             :     }
    4996             :   }
    4997          16 :   if (TotalOffs) {
    4998          13 :     N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
    4999          13 :     if (!N)
    5000             :       return false;
    5001             :   }
    5002          16 :   updateValueMap(I, N);
    5003          16 :   return true;
    5004             : }
    5005             : 
    5006           3 : bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
    5007             :   assert(TM.getOptLevel() == CodeGenOpt::None &&
    5008             :          "cmpxchg survived AtomicExpand at optlevel > -O0");
    5009             : 
    5010           3 :   auto *RetPairTy = cast<StructType>(I->getType());
    5011           3 :   Type *RetTy = RetPairTy->getTypeAtIndex(0U);
    5012             :   assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
    5013             :          "cmpxchg has a non-i1 status result");
    5014             : 
    5015           3 :   MVT VT;
    5016           3 :   if (!isTypeLegal(RetTy, VT))
    5017             :     return false;
    5018             : 
    5019             :   const TargetRegisterClass *ResRC;
    5020             :   unsigned Opc, CmpOpc;
    5021             :   // This only supports i32/i64, because i8/i16 aren't legal, and the generic
    5022             :   // extractvalue selection doesn't support that.
    5023           3 :   if (VT == MVT::i32) {
    5024             :     Opc = AArch64::CMP_SWAP_32;
    5025             :     CmpOpc = AArch64::SUBSWrs;
    5026             :     ResRC = &AArch64::GPR32RegClass;
    5027           1 :   } else if (VT == MVT::i64) {
    5028             :     Opc = AArch64::CMP_SWAP_64;
    5029             :     CmpOpc = AArch64::SUBSXrs;
    5030             :     ResRC = &AArch64::GPR64RegClass;
    5031             :   } else {
    5032             :     return false;
    5033             :   }
    5034             : 
    5035           3 :   const MCInstrDesc &II = TII.get(Opc);
    5036             : 
    5037           3 :   const unsigned AddrReg = constrainOperandRegClass(
    5038             :       II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
    5039           3 :   const unsigned DesiredReg = constrainOperandRegClass(
    5040           3 :       II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
    5041           3 :   const unsigned NewReg = constrainOperandRegClass(
    5042           3 :       II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
    5043             : 
    5044           3 :   const unsigned ResultReg1 = createResultReg(ResRC);
    5045           3 :   const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
    5046           3 :   const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
    5047             : 
    5048             :   // FIXME: MachineMemOperand doesn't support cmpxchg yet.
    5049           3 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    5050             :       .addDef(ResultReg1)
    5051             :       .addDef(ScratchReg)
    5052             :       .addUse(AddrReg)
    5053             :       .addUse(DesiredReg)
    5054             :       .addUse(NewReg);
    5055             : 
    5056           6 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
    5057           3 :       .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
    5058             :       .addUse(ResultReg1)
    5059             :       .addUse(DesiredReg)
    5060             :       .addImm(0);
    5061             : 
    5062           6 :   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
    5063             :       .addDef(ResultReg2)
    5064             :       .addUse(AArch64::WZR)
    5065             :       .addUse(AArch64::WZR)
    5066             :       .addImm(AArch64CC::NE);
    5067             : 
    5068             :   assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
    5069           3 :   updateValueMap(I, ResultReg1, 2);
    5070           3 :   return true;
    5071             : }
    5072             : 
    5073        3902 : bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
    5074        3902 :   switch (I->getOpcode()) {
    5075             :   default:
    5076             :     break;
    5077         284 :   case Instruction::Add:
    5078             :   case Instruction::Sub:
    5079         284 :     return selectAddSub(I);
    5080          10 :   case Instruction::Mul:
    5081          10 :     return selectMul(I);
    5082          14 :   case Instruction::SDiv:
    5083          14 :     return selectSDiv(I);
    5084           4 :   case Instruction::SRem:
    5085           4 :     if (!selectBinaryOp(I, ISD::SREM))
    5086           4 :       return selectRem(I, ISD::SREM);
    5087             :     return true;
    5088           4 :   case Instruction::URem:
    5089           4 :     if (!selectBinaryOp(I, ISD::UREM))
    5090           4 :       return selectRem(I, ISD::UREM);
    5091             :     return true;
    5092         102 :   case Instruction::Shl:
    5093             :   case Instruction::LShr:
    5094             :   case Instruction::AShr:
    5095         102 :     return selectShift(I);
    5096          89 :   case Instruction::And:
    5097             :   case Instruction::Or:
    5098             :   case Instruction::Xor:
    5099          89 :     return selectLogicalOp(I);
    5100         272 :   case Instruction::Br:
    5101         272 :     return selectBranch(I);
    5102           0 :   case Instruction::IndirectBr:
    5103           0 :     return selectIndirectBr(I);
    5104          52 :   case Instruction::BitCast:
    5105          52 :     if (!FastISel::selectBitCast(I))
    5106          23 :       return selectBitCast(I);
    5107             :     return true;
    5108           4 :   case Instruction::FPToSI:
    5109           4 :     if (!selectCast(I, ISD::FP_TO_SINT))
    5110           1 :       return selectFPToInt(I, /*Signed=*/true);
    5111             :     return true;
    5112           6 :   case Instruction::FPToUI:
    5113           6 :     return selectFPToInt(I, /*Signed=*/false);
    5114         200 :   case Instruction::ZExt:
    5115             :   case Instruction::SExt:
    5116         200 :     return selectIntExt(I);
    5117          26 :   case Instruction::Trunc:
    5118          26 :     if (!selectCast(I, ISD::TRUNCATE))
    5119          14 :       return selectTrunc(I);
    5120             :     return true;
    5121           5 :   case Instruction::FPExt:
    5122           5 :     return selectFPExt(I);
    5123           2 :   case Instruction::FPTrunc:
    5124           2 :     return selectFPTrunc(I);
    5125          17 :   case Instruction::SIToFP:
    5126          17 :     if (!selectCast(I, ISD::SINT_TO_FP))
    5127           9 :       return selectIntToFP(I, /*Signed=*/true);
    5128             :     return true;
    5129          13 :   case Instruction::UIToFP:
    5130          13 :     return selectIntToFP(I, /*Signed=*/false);
    5131         375 :   case Instruction::Load:
    5132         375 :     return selectLoad(I);
    5133         421 :   case Instruction::Store:
    5134         421 :     return selectStore(I);
    5135          57 :   case Instruction::FCmp:
    5136             :   case Instruction::ICmp:
    5137          57 :     return selectCmp(I);
    5138          53 :   case Instruction::Select:
    5139          53 :     return selectSelect(I);
    5140        1302 :   case Instruction::Ret:
    5141        1302 :     return selectRet(I);
    5142           4 :   case Instruction::FRem:
    5143           4 :     return selectFRem(I);
    5144          16 :   case Instruction::GetElementPtr:
    5145          16 :     return selectGetElementPtr(I);
    5146             :   case Instruction::AtomicCmpXchg:
    5147           3 :     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
    5148             :   }
    5149             : 
    5150             :   // Silence warnings.
    5151             :   (void)&CC_AArch64_DarwinPCS_VarArg;
    5152             :   (void)&CC_AArch64_Win64_VarArg;
    5153             : 
    5154             :   // fall-back to target-independent instruction selection.
    5155         567 :   return selectOperator(I, I->getOpcode());
    5156             : }
    5157             : 
    5158             : namespace llvm {
    5159             : 
    5160        1222 : FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
    5161             :                                         const TargetLibraryInfo *LibInfo) {
    5162        1222 :   return new AArch64FastISel(FuncInfo, LibInfo);
    5163             : }
    5164             : 
    5165             : } // end namespace llvm

Generated by: LCOV version 1.13