LLVM  10.0.0svn
AArch64FastISel.cpp
Go to the documentation of this file.
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
20 #include "Utils/AArch64BaseInfo.h"
21 #include "llvm/ADT/APFloat.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/CodeGen/FastISel.h"
39 #include "llvm/IR/Argument.h"
40 #include "llvm/IR/Attributes.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/Constant.h"
44 #include "llvm/IR/Constants.h"
45 #include "llvm/IR/DataLayout.h"
46 #include "llvm/IR/DerivedTypes.h"
47 #include "llvm/IR/Function.h"
49 #include "llvm/IR/GlobalValue.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/IntrinsicInst.h"
54 #include "llvm/IR/Intrinsics.h"
55 #include "llvm/IR/Operator.h"
56 #include "llvm/IR/Type.h"
57 #include "llvm/IR/User.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/MC/MCInstrDesc.h"
60 #include "llvm/MC/MCRegisterInfo.h"
61 #include "llvm/MC/MCSymbol.h"
63 #include "llvm/Support/Casting.h"
64 #include "llvm/Support/CodeGen.h"
65 #include "llvm/Support/Compiler.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <iterator>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 namespace {
78 
79 class AArch64FastISel final : public FastISel {
80  class Address {
81  public:
82  using BaseKind = enum {
83  RegBase,
84  FrameIndexBase
85  };
86 
87  private:
88  BaseKind Kind = RegBase;
90  union {
91  unsigned Reg;
92  int FI;
93  } Base;
94  unsigned OffsetReg = 0;
95  unsigned Shift = 0;
96  int64_t Offset = 0;
97  const GlobalValue *GV = nullptr;
98 
99  public:
100  Address() { Base.Reg = 0; }
101 
102  void setKind(BaseKind K) { Kind = K; }
103  BaseKind getKind() const { return Kind; }
104  void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
106  bool isRegBase() const { return Kind == RegBase; }
107  bool isFIBase() const { return Kind == FrameIndexBase; }
108 
109  void setReg(unsigned Reg) {
110  assert(isRegBase() && "Invalid base register access!");
111  Base.Reg = Reg;
112  }
113 
114  unsigned getReg() const {
115  assert(isRegBase() && "Invalid base register access!");
116  return Base.Reg;
117  }
118 
119  void setOffsetReg(unsigned Reg) {
120  OffsetReg = Reg;
121  }
122 
123  unsigned getOffsetReg() const {
124  return OffsetReg;
125  }
126 
127  void setFI(unsigned FI) {
128  assert(isFIBase() && "Invalid base frame index access!");
129  Base.FI = FI;
130  }
131 
132  unsigned getFI() const {
133  assert(isFIBase() && "Invalid base frame index access!");
134  return Base.FI;
135  }
136 
137  void setOffset(int64_t O) { Offset = O; }
138  int64_t getOffset() { return Offset; }
139  void setShift(unsigned S) { Shift = S; }
140  unsigned getShift() { return Shift; }
141 
142  void setGlobalValue(const GlobalValue *G) { GV = G; }
143  const GlobalValue *getGlobalValue() { return GV; }
144  };
145 
146  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
147  /// make the right decision when generating code for different targets.
148  const AArch64Subtarget *Subtarget;
150 
151  bool fastLowerArguments() override;
152  bool fastLowerCall(CallLoweringInfo &CLI) override;
153  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
154 
155 private:
156  // Selection routines.
157  bool selectAddSub(const Instruction *I);
158  bool selectLogicalOp(const Instruction *I);
159  bool selectLoad(const Instruction *I);
160  bool selectStore(const Instruction *I);
161  bool selectBranch(const Instruction *I);
162  bool selectIndirectBr(const Instruction *I);
163  bool selectCmp(const Instruction *I);
164  bool selectSelect(const Instruction *I);
165  bool selectFPExt(const Instruction *I);
166  bool selectFPTrunc(const Instruction *I);
167  bool selectFPToInt(const Instruction *I, bool Signed);
168  bool selectIntToFP(const Instruction *I, bool Signed);
169  bool selectRem(const Instruction *I, unsigned ISDOpcode);
170  bool selectRet(const Instruction *I);
171  bool selectTrunc(const Instruction *I);
172  bool selectIntExt(const Instruction *I);
173  bool selectMul(const Instruction *I);
174  bool selectShift(const Instruction *I);
175  bool selectBitCast(const Instruction *I);
176  bool selectFRem(const Instruction *I);
177  bool selectSDiv(const Instruction *I);
178  bool selectGetElementPtr(const Instruction *I);
179  bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
180 
181  // Utility helper routines.
182  bool isTypeLegal(Type *Ty, MVT &VT);
183  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
184  bool isValueAvailable(const Value *V) const;
185  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
186  bool computeCallAddress(const Value *V, Address &Addr);
187  bool simplifyAddress(Address &Addr, MVT VT);
188  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
190  unsigned ScaleFactor, MachineMemOperand *MMO);
191  bool isMemCpySmall(uint64_t Len, unsigned Alignment);
192  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
193  unsigned Alignment);
194  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
195  const Value *Cond);
196  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
197  bool optimizeSelect(const SelectInst *SI);
198  std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
199 
200  // Emit helper routines.
201  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
202  const Value *RHS, bool SetFlags = false,
203  bool WantResult = true, bool IsZExt = false);
204  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
205  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
206  bool SetFlags = false, bool WantResult = true);
207  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
208  bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
209  bool WantResult = true);
210  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
211  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
212  AArch64_AM::ShiftExtendType ShiftType,
213  uint64_t ShiftImm, bool SetFlags = false,
214  bool WantResult = true);
215  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
216  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
218  uint64_t ShiftImm, bool SetFlags = false,
219  bool WantResult = true);
220 
221  // Emit functions.
222  bool emitCompareAndBranch(const BranchInst *BI);
223  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
224  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
225  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
226  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
227  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
228  MachineMemOperand *MMO = nullptr);
229  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
230  MachineMemOperand *MMO = nullptr);
231  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
232  MachineMemOperand *MMO = nullptr);
233  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
234  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
235  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
236  bool SetFlags = false, bool WantResult = true,
237  bool IsZExt = false);
238  unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
239  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
240  bool SetFlags = false, bool WantResult = true,
241  bool IsZExt = false);
242  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
243  unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
244  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
245  unsigned RHSReg, bool RHSIsKill,
246  AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
247  bool WantResult = true);
248  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
249  const Value *RHS);
250  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251  bool LHSIsKill, uint64_t Imm);
252  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
253  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
254  uint64_t ShiftImm);
255  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
256  unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
257  unsigned Op1, bool Op1IsKill);
258  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
259  unsigned Op1, bool Op1IsKill);
260  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
261  unsigned Op1, bool Op1IsKill);
262  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
263  unsigned Op1Reg, bool Op1IsKill);
264  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
265  uint64_t Imm, bool IsZExt = true);
266  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
267  unsigned Op1Reg, bool Op1IsKill);
268  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
269  uint64_t Imm, bool IsZExt = true);
270  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
271  unsigned Op1Reg, bool Op1IsKill);
272  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
273  uint64_t Imm, bool IsZExt = false);
274 
275  unsigned materializeInt(const ConstantInt *CI, MVT VT);
276  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
277  unsigned materializeGV(const GlobalValue *GV);
278 
279  // Call handling routines.
280 private:
281  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
282  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
283  unsigned &NumBytes);
284  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
285 
286 public:
287  // Backend specific FastISel code.
288  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
289  unsigned fastMaterializeConstant(const Constant *C) override;
290  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
291 
292  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
293  const TargetLibraryInfo *LibInfo)
294  : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
295  Subtarget =
296  &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
297  Context = &FuncInfo.Fn->getContext();
298  }
299 
300  bool fastSelectInstruction(const Instruction *I) override;
301 
302 #include "AArch64GenFastISel.inc"
303 };
304 
305 } // end anonymous namespace
306 
307 /// Check if the sign-/zero-extend will be a noop.
308 static bool isIntExtFree(const Instruction *I) {
309  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
310  "Unexpected integer extend instruction.");
311  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
312  "Unexpected value type.");
313  bool IsZExt = isa<ZExtInst>(I);
314 
315  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
316  if (LI->hasOneUse())
317  return true;
318 
319  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
320  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
321  return true;
322 
323  return false;
324 }
325 
326 /// Determine the implicit scale factor that is applied by a memory
327 /// operation for a given value type.
328 static unsigned getImplicitScaleFactor(MVT VT) {
329  switch (VT.SimpleTy) {
330  default:
331  return 0; // invalid
332  case MVT::i1: // fall-through
333  case MVT::i8:
334  return 1;
335  case MVT::i16:
336  return 2;
337  case MVT::i32: // fall-through
338  case MVT::f32:
339  return 4;
340  case MVT::i64: // fall-through
341  case MVT::f64:
342  return 8;
343  }
344 }
345 
346 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
347  if (CC == CallingConv::WebKit_JS)
348  return CC_AArch64_WebKit_JS;
349  if (CC == CallingConv::GHC)
350  return CC_AArch64_GHC;
351  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
352 }
353 
354 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
355  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
356  "Alloca should always return a pointer.");
357 
358  // Don't handle dynamic allocas.
359  if (!FuncInfo.StaticAllocaMap.count(AI))
360  return 0;
361 
363  FuncInfo.StaticAllocaMap.find(AI);
364 
365  if (SI != FuncInfo.StaticAllocaMap.end()) {
366  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
367  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
368  ResultReg)
369  .addFrameIndex(SI->second)
370  .addImm(0)
371  .addImm(0);
372  return ResultReg;
373  }
374 
375  return 0;
376 }
377 
378 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
379  if (VT > MVT::i64)
380  return 0;
381 
382  if (!CI->isZero())
383  return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
384 
385  // Create a copy from the zero register to materialize a "0" value.
386  const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
387  : &AArch64::GPR32RegClass;
388  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
389  unsigned ResultReg = createResultReg(RC);
390  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
391  ResultReg).addReg(ZeroReg, getKillRegState(true));
392  return ResultReg;
393 }
394 
395 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
396  // Positive zero (+0.0) has to be materialized with a fmov from the zero
397  // register, because the immediate version of fmov cannot encode zero.
398  if (CFP->isNullValue())
399  return fastMaterializeFloatZero(CFP);
400 
401  if (VT != MVT::f32 && VT != MVT::f64)
402  return 0;
403 
404  const APFloat Val = CFP->getValueAPF();
405  bool Is64Bit = (VT == MVT::f64);
406  // This checks to see if we can use FMOV instructions to materialize
407  // a constant, otherwise we have to materialize via the constant pool.
408  int Imm =
409  Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
410  if (Imm != -1) {
411  unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
412  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
413  }
414 
415  // For the MachO large code model materialize the FP constant in code.
416  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
417  unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
418  const TargetRegisterClass *RC = Is64Bit ?
419  &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
420 
421  unsigned TmpReg = createResultReg(RC);
422  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
423  .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
424 
425  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
426  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
427  TII.get(TargetOpcode::COPY), ResultReg)
428  .addReg(TmpReg, getKillRegState(true));
429 
430  return ResultReg;
431  }
432 
433  // Materialize via constant pool. MachineConstantPool wants an explicit
434  // alignment.
435  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
436  if (Align == 0)
437  Align = DL.getTypeAllocSize(CFP->getType());
438 
439  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
440  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
441  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
442  ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
443 
444  unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
445  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
446  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
447  .addReg(ADRPReg)
449  return ResultReg;
450 }
451 
452 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
453  // We can't handle thread-local variables quickly yet.
454  if (GV->isThreadLocal())
455  return 0;
456 
457  // MachO still uses GOT for large code-model accesses, but ELF requires
458  // movz/movk sequences, which FastISel doesn't handle yet.
459  if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
460  return 0;
461 
462  unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
463 
464  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
465  if (!DestEVT.isSimple())
466  return 0;
467 
468  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
469  unsigned ResultReg;
470 
471  if (OpFlags & AArch64II::MO_GOT) {
472  // ADRP + LDRX
473  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
474  ADRPReg)
475  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
476 
477  unsigned LdrOpc;
478  if (Subtarget->isTargetILP32()) {
479  ResultReg = createResultReg(&AArch64::GPR32RegClass);
480  LdrOpc = AArch64::LDRWui;
481  } else {
482  ResultReg = createResultReg(&AArch64::GPR64RegClass);
483  LdrOpc = AArch64::LDRXui;
484  }
485  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc),
486  ResultReg)
487  .addReg(ADRPReg)
488  .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
489  AArch64II::MO_NC | OpFlags);
490  if (!Subtarget->isTargetILP32())
491  return ResultReg;
492 
493  // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
494  // so we must extend the result on ILP32.
495  unsigned Result64 = createResultReg(&AArch64::GPR64RegClass);
496  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
497  TII.get(TargetOpcode::SUBREG_TO_REG))
498  .addDef(Result64)
499  .addImm(0)
500  .addReg(ResultReg, RegState::Kill)
501  .addImm(AArch64::sub_32);
502  return Result64;
503  } else {
504  // ADRP + ADDX
505  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
506  ADRPReg)
507  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
508 
509  ResultReg = createResultReg(&AArch64::GPR64spRegClass);
510  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
511  ResultReg)
512  .addReg(ADRPReg)
513  .addGlobalAddress(GV, 0,
515  .addImm(0);
516  }
517  return ResultReg;
518 }
519 
520 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
521  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
522 
523  // Only handle simple types.
524  if (!CEVT.isSimple())
525  return 0;
526  MVT VT = CEVT.getSimpleVT();
527  // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
528  // 'null' pointers need to have a somewhat special treatment.
529  if (const auto *CPN = dyn_cast<ConstantPointerNull>(C)) {
530  (void)CPN;
531  assert(CPN->getType()->getPointerAddressSpace() == 0 &&
532  "Unexpected address space");
533  assert(VT == MVT::i64 && "Expected 64-bit pointers");
534  return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
535  }
536 
537  if (const auto *CI = dyn_cast<ConstantInt>(C))
538  return materializeInt(CI, VT);
539  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
540  return materializeFP(CFP, VT);
541  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
542  return materializeGV(GV);
543 
544  return 0;
545 }
546 
547 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
548  assert(CFP->isNullValue() &&
549  "Floating-point constant is not a positive zero.");
550  MVT VT;
551  if (!isTypeLegal(CFP->getType(), VT))
552  return 0;
553 
554  if (VT != MVT::f32 && VT != MVT::f64)
555  return 0;
556 
557  bool Is64Bit = (VT == MVT::f64);
558  unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
559  unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
560  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
561 }
562 
563 /// Check if the multiply is by a power-of-2 constant.
564 static bool isMulPowOf2(const Value *I) {
565  if (const auto *MI = dyn_cast<MulOperator>(I)) {
566  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
567  if (C->getValue().isPowerOf2())
568  return true;
569  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
570  if (C->getValue().isPowerOf2())
571  return true;
572  }
573  return false;
574 }
575 
576 // Computes the address to get to an object.
577 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
578 {
579  const User *U = nullptr;
580  unsigned Opcode = Instruction::UserOp1;
581  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
582  // Don't walk into other basic blocks unless the object is an alloca from
583  // another block, otherwise it may not have a virtual register assigned.
584  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
585  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
586  Opcode = I->getOpcode();
587  U = I;
588  }
589  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
590  Opcode = C->getOpcode();
591  U = C;
592  }
593 
594  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
595  if (Ty->getAddressSpace() > 255)
596  // Fast instruction selection doesn't support the special
597  // address spaces.
598  return false;
599 
600  switch (Opcode) {
601  default:
602  break;
603  case Instruction::BitCast:
604  // Look through bitcasts.
605  return computeAddress(U->getOperand(0), Addr, Ty);
606 
607  case Instruction::IntToPtr:
608  // Look past no-op inttoptrs.
609  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
610  TLI.getPointerTy(DL))
611  return computeAddress(U->getOperand(0), Addr, Ty);
612  break;
613 
614  case Instruction::PtrToInt:
615  // Look past no-op ptrtoints.
616  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
617  return computeAddress(U->getOperand(0), Addr, Ty);
618  break;
619 
620  case Instruction::GetElementPtr: {
621  Address SavedAddr = Addr;
622  uint64_t TmpOffset = Addr.getOffset();
623 
624  // Iterate through the GEP folding the constants into offsets where
625  // we can.
626  for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
627  GTI != E; ++GTI) {
628  const Value *Op = GTI.getOperand();
629  if (StructType *STy = GTI.getStructTypeOrNull()) {
630  const StructLayout *SL = DL.getStructLayout(STy);
631  unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
632  TmpOffset += SL->getElementOffset(Idx);
633  } else {
634  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
635  while (true) {
636  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
637  // Constant-offset addressing.
638  TmpOffset += CI->getSExtValue() * S;
639  break;
640  }
641  if (canFoldAddIntoGEP(U, Op)) {
642  // A compatible add with a constant operand. Fold the constant.
643  ConstantInt *CI =
644  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
645  TmpOffset += CI->getSExtValue() * S;
646  // Iterate on the other operand.
647  Op = cast<AddOperator>(Op)->getOperand(0);
648  continue;
649  }
650  // Unsupported
651  goto unsupported_gep;
652  }
653  }
654  }
655 
656  // Try to grab the base operand now.
657  Addr.setOffset(TmpOffset);
658  if (computeAddress(U->getOperand(0), Addr, Ty))
659  return true;
660 
661  // We failed, restore everything and try the other options.
662  Addr = SavedAddr;
663 
664  unsupported_gep:
665  break;
666  }
667  case Instruction::Alloca: {
668  const AllocaInst *AI = cast<AllocaInst>(Obj);
670  FuncInfo.StaticAllocaMap.find(AI);
671  if (SI != FuncInfo.StaticAllocaMap.end()) {
672  Addr.setKind(Address::FrameIndexBase);
673  Addr.setFI(SI->second);
674  return true;
675  }
676  break;
677  }
678  case Instruction::Add: {
679  // Adds of constants are common and easy enough.
680  const Value *LHS = U->getOperand(0);
681  const Value *RHS = U->getOperand(1);
682 
683  if (isa<ConstantInt>(LHS))
684  std::swap(LHS, RHS);
685 
686  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
687  Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
688  return computeAddress(LHS, Addr, Ty);
689  }
690 
691  Address Backup = Addr;
692  if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
693  return true;
694  Addr = Backup;
695 
696  break;
697  }
698  case Instruction::Sub: {
699  // Subs of constants are common and easy enough.
700  const Value *LHS = U->getOperand(0);
701  const Value *RHS = U->getOperand(1);
702 
703  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
704  Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
705  return computeAddress(LHS, Addr, Ty);
706  }
707  break;
708  }
709  case Instruction::Shl: {
710  if (Addr.getOffsetReg())
711  break;
712 
713  const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
714  if (!CI)
715  break;
716 
717  unsigned Val = CI->getZExtValue();
718  if (Val < 1 || Val > 3)
719  break;
720 
721  uint64_t NumBytes = 0;
722  if (Ty && Ty->isSized()) {
723  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
724  NumBytes = NumBits / 8;
725  if (!isPowerOf2_64(NumBits))
726  NumBytes = 0;
727  }
728 
729  if (NumBytes != (1ULL << Val))
730  break;
731 
732  Addr.setShift(Val);
733  Addr.setExtendType(AArch64_AM::LSL);
734 
735  const Value *Src = U->getOperand(0);
736  if (const auto *I = dyn_cast<Instruction>(Src)) {
737  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
738  // Fold the zext or sext when it won't become a noop.
739  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
740  if (!isIntExtFree(ZE) &&
741  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
742  Addr.setExtendType(AArch64_AM::UXTW);
743  Src = ZE->getOperand(0);
744  }
745  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
746  if (!isIntExtFree(SE) &&
747  SE->getOperand(0)->getType()->isIntegerTy(32)) {
748  Addr.setExtendType(AArch64_AM::SXTW);
749  Src = SE->getOperand(0);
750  }
751  }
752  }
753  }
754 
755  if (const auto *AI = dyn_cast<BinaryOperator>(Src))
756  if (AI->getOpcode() == Instruction::And) {
757  const Value *LHS = AI->getOperand(0);
758  const Value *RHS = AI->getOperand(1);
759 
760  if (const auto *C = dyn_cast<ConstantInt>(LHS))
761  if (C->getValue() == 0xffffffff)
762  std::swap(LHS, RHS);
763 
764  if (const auto *C = dyn_cast<ConstantInt>(RHS))
765  if (C->getValue() == 0xffffffff) {
766  Addr.setExtendType(AArch64_AM::UXTW);
767  unsigned Reg = getRegForValue(LHS);
768  if (!Reg)
769  return false;
770  bool RegIsKill = hasTrivialKill(LHS);
771  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
772  AArch64::sub_32);
773  Addr.setOffsetReg(Reg);
774  return true;
775  }
776  }
777 
778  unsigned Reg = getRegForValue(Src);
779  if (!Reg)
780  return false;
781  Addr.setOffsetReg(Reg);
782  return true;
783  }
784  case Instruction::Mul: {
785  if (Addr.getOffsetReg())
786  break;
787 
788  if (!isMulPowOf2(U))
789  break;
790 
791  const Value *LHS = U->getOperand(0);
792  const Value *RHS = U->getOperand(1);
793 
794  // Canonicalize power-of-2 value to the RHS.
795  if (const auto *C = dyn_cast<ConstantInt>(LHS))
796  if (C->getValue().isPowerOf2())
797  std::swap(LHS, RHS);
798 
799  assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
800  const auto *C = cast<ConstantInt>(RHS);
801  unsigned Val = C->getValue().logBase2();
802  if (Val < 1 || Val > 3)
803  break;
804 
805  uint64_t NumBytes = 0;
806  if (Ty && Ty->isSized()) {
807  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
808  NumBytes = NumBits / 8;
809  if (!isPowerOf2_64(NumBits))
810  NumBytes = 0;
811  }
812 
813  if (NumBytes != (1ULL << Val))
814  break;
815 
816  Addr.setShift(Val);
817  Addr.setExtendType(AArch64_AM::LSL);
818 
819  const Value *Src = LHS;
820  if (const auto *I = dyn_cast<Instruction>(Src)) {
821  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
822  // Fold the zext or sext when it won't become a noop.
823  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
824  if (!isIntExtFree(ZE) &&
825  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
826  Addr.setExtendType(AArch64_AM::UXTW);
827  Src = ZE->getOperand(0);
828  }
829  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
830  if (!isIntExtFree(SE) &&
831  SE->getOperand(0)->getType()->isIntegerTy(32)) {
832  Addr.setExtendType(AArch64_AM::SXTW);
833  Src = SE->getOperand(0);
834  }
835  }
836  }
837  }
838 
839  unsigned Reg = getRegForValue(Src);
840  if (!Reg)
841  return false;
842  Addr.setOffsetReg(Reg);
843  return true;
844  }
845  case Instruction::And: {
846  if (Addr.getOffsetReg())
847  break;
848 
849  if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
850  break;
851 
852  const Value *LHS = U->getOperand(0);
853  const Value *RHS = U->getOperand(1);
854 
855  if (const auto *C = dyn_cast<ConstantInt>(LHS))
856  if (C->getValue() == 0xffffffff)
857  std::swap(LHS, RHS);
858 
859  if (const auto *C = dyn_cast<ConstantInt>(RHS))
860  if (C->getValue() == 0xffffffff) {
861  Addr.setShift(0);
862  Addr.setExtendType(AArch64_AM::LSL);
863  Addr.setExtendType(AArch64_AM::UXTW);
864 
865  unsigned Reg = getRegForValue(LHS);
866  if (!Reg)
867  return false;
868  bool RegIsKill = hasTrivialKill(LHS);
869  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
870  AArch64::sub_32);
871  Addr.setOffsetReg(Reg);
872  return true;
873  }
874  break;
875  }
876  case Instruction::SExt:
877  case Instruction::ZExt: {
878  if (!Addr.getReg() || Addr.getOffsetReg())
879  break;
880 
881  const Value *Src = nullptr;
882  // Fold the zext or sext when it won't become a noop.
883  if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
884  if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
885  Addr.setExtendType(AArch64_AM::UXTW);
886  Src = ZE->getOperand(0);
887  }
888  } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
889  if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
890  Addr.setExtendType(AArch64_AM::SXTW);
891  Src = SE->getOperand(0);
892  }
893  }
894 
895  if (!Src)
896  break;
897 
898  Addr.setShift(0);
899  unsigned Reg = getRegForValue(Src);
900  if (!Reg)
901  return false;
902  Addr.setOffsetReg(Reg);
903  return true;
904  }
905  } // end switch
906 
907  if (Addr.isRegBase() && !Addr.getReg()) {
908  unsigned Reg = getRegForValue(Obj);
909  if (!Reg)
910  return false;
911  Addr.setReg(Reg);
912  return true;
913  }
914 
915  if (!Addr.getOffsetReg()) {
916  unsigned Reg = getRegForValue(Obj);
917  if (!Reg)
918  return false;
919  Addr.setOffsetReg(Reg);
920  return true;
921  }
922 
923  return false;
924 }
925 
926 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
927  const User *U = nullptr;
928  unsigned Opcode = Instruction::UserOp1;
929  bool InMBB = true;
930 
931  if (const auto *I = dyn_cast<Instruction>(V)) {
932  Opcode = I->getOpcode();
933  U = I;
934  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
935  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
936  Opcode = C->getOpcode();
937  U = C;
938  }
939 
940  switch (Opcode) {
941  default: break;
942  case Instruction::BitCast:
943  // Look past bitcasts if its operand is in the same BB.
944  if (InMBB)
945  return computeCallAddress(U->getOperand(0), Addr);
946  break;
947  case Instruction::IntToPtr:
948  // Look past no-op inttoptrs if its operand is in the same BB.
949  if (InMBB &&
950  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
951  TLI.getPointerTy(DL))
952  return computeCallAddress(U->getOperand(0), Addr);
953  break;
954  case Instruction::PtrToInt:
955  // Look past no-op ptrtoints if its operand is in the same BB.
956  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
957  return computeCallAddress(U->getOperand(0), Addr);
958  break;
959  }
960 
961  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
962  Addr.setGlobalValue(GV);
963  return true;
964  }
965 
966  // If all else fails, try to materialize the value in a register.
967  if (!Addr.getGlobalValue()) {
968  Addr.setReg(getRegForValue(V));
969  return Addr.getReg() != 0;
970  }
971 
972  return false;
973 }
974 
975 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
976  EVT evt = TLI.getValueType(DL, Ty, true);
977 
978  if (Subtarget->isTargetILP32() && Ty->isPointerTy())
979  return false;
980 
981  // Only handle simple types.
982  if (evt == MVT::Other || !evt.isSimple())
983  return false;
984  VT = evt.getSimpleVT();
985 
986  // This is a legal type, but it's not something we handle in fast-isel.
987  if (VT == MVT::f128)
988  return false;
989 
990  // Handle all other legal types, i.e. a register that will directly hold this
991  // value.
992  return TLI.isTypeLegal(VT);
993 }
994 
995 /// Determine if the value type is supported by FastISel.
996 ///
997 /// FastISel for AArch64 can handle more value types than are legal. This adds
998 /// simple value type such as i1, i8, and i16.
999 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1000  if (Ty->isVectorTy() && !IsVectorAllowed)
1001  return false;
1002 
1003  if (isTypeLegal(Ty, VT))
1004  return true;
1005 
1006  // If this is a type than can be sign or zero-extended to a basic operation
1007  // go ahead and accept it now.
1008  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1009  return true;
1010 
1011  return false;
1012 }
1013 
1014 bool AArch64FastISel::isValueAvailable(const Value *V) const {
1015  if (!isa<Instruction>(V))
1016  return true;
1017 
1018  const auto *I = cast<Instruction>(V);
1019  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1020 }
1021 
1022 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1023  if (Subtarget->isTargetILP32())
1024  return false;
1025 
1026  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1027  if (!ScaleFactor)
1028  return false;
1029 
1030  bool ImmediateOffsetNeedsLowering = false;
1031  bool RegisterOffsetNeedsLowering = false;
1032  int64_t Offset = Addr.getOffset();
1033  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1034  ImmediateOffsetNeedsLowering = true;
1035  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1036  !isUInt<12>(Offset / ScaleFactor))
1037  ImmediateOffsetNeedsLowering = true;
1038 
1039  // Cannot encode an offset register and an immediate offset in the same
1040  // instruction. Fold the immediate offset into the load/store instruction and
1041  // emit an additional add to take care of the offset register.
1042  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1043  RegisterOffsetNeedsLowering = true;
1044 
1045  // Cannot encode zero register as base.
1046  if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1047  RegisterOffsetNeedsLowering = true;
1048 
1049  // If this is a stack pointer and the offset needs to be simplified then put
1050  // the alloca address into a register, set the base type back to register and
1051  // continue. This should almost never happen.
1052  if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1053  {
1054  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1055  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1056  ResultReg)
1057  .addFrameIndex(Addr.getFI())
1058  .addImm(0)
1059  .addImm(0);
1060  Addr.setKind(Address::RegBase);
1061  Addr.setReg(ResultReg);
1062  }
1063 
1064  if (RegisterOffsetNeedsLowering) {
1065  unsigned ResultReg = 0;
1066  if (Addr.getReg()) {
1067  if (Addr.getExtendType() == AArch64_AM::SXTW ||
1068  Addr.getExtendType() == AArch64_AM::UXTW )
1069  ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1070  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1071  /*TODO:IsKill=*/false, Addr.getExtendType(),
1072  Addr.getShift());
1073  else
1074  ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1075  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1076  /*TODO:IsKill=*/false, AArch64_AM::LSL,
1077  Addr.getShift());
1078  } else {
1079  if (Addr.getExtendType() == AArch64_AM::UXTW)
1080  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1081  /*Op0IsKill=*/false, Addr.getShift(),
1082  /*IsZExt=*/true);
1083  else if (Addr.getExtendType() == AArch64_AM::SXTW)
1084  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1085  /*Op0IsKill=*/false, Addr.getShift(),
1086  /*IsZExt=*/false);
1087  else
1088  ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1089  /*Op0IsKill=*/false, Addr.getShift());
1090  }
1091  if (!ResultReg)
1092  return false;
1093 
1094  Addr.setReg(ResultReg);
1095  Addr.setOffsetReg(0);
1096  Addr.setShift(0);
1097  Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1098  }
1099 
1100  // Since the offset is too large for the load/store instruction get the
1101  // reg+offset into a register.
1102  if (ImmediateOffsetNeedsLowering) {
1103  unsigned ResultReg;
1104  if (Addr.getReg())
1105  // Try to fold the immediate into the add instruction.
1106  ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1107  else
1108  ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1109 
1110  if (!ResultReg)
1111  return false;
1112  Addr.setReg(ResultReg);
1113  Addr.setOffset(0);
1114  }
1115  return true;
1116 }
1117 
1118 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1119  const MachineInstrBuilder &MIB,
1121  unsigned ScaleFactor,
1122  MachineMemOperand *MMO) {
1123  int64_t Offset = Addr.getOffset() / ScaleFactor;
1124  // Frame base works a bit differently. Handle it separately.
1125  if (Addr.isFIBase()) {
1126  int FI = Addr.getFI();
1127  // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1128  // and alignment should be based on the VT.
1129  MMO = FuncInfo.MF->getMachineMemOperand(
1130  MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1131  MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1132  // Now add the rest of the operands.
1133  MIB.addFrameIndex(FI).addImm(Offset);
1134  } else {
1135  assert(Addr.isRegBase() && "Unexpected address kind.");
1136  const MCInstrDesc &II = MIB->getDesc();
1137  unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1138  Addr.setReg(
1139  constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1140  Addr.setOffsetReg(
1141  constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1142  if (Addr.getOffsetReg()) {
1143  assert(Addr.getOffset() == 0 && "Unexpected offset");
1144  bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1145  Addr.getExtendType() == AArch64_AM::SXTX;
1146  MIB.addReg(Addr.getReg());
1147  MIB.addReg(Addr.getOffsetReg());
1148  MIB.addImm(IsSigned);
1149  MIB.addImm(Addr.getShift() != 0);
1150  } else
1151  MIB.addReg(Addr.getReg()).addImm(Offset);
1152  }
1153 
1154  if (MMO)
1155  MIB.addMemOperand(MMO);
1156 }
1157 
1158 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1159  const Value *RHS, bool SetFlags,
1160  bool WantResult, bool IsZExt) {
1162  bool NeedExtend = false;
1163  switch (RetVT.SimpleTy) {
1164  default:
1165  return 0;
1166  case MVT::i1:
1167  NeedExtend = true;
1168  break;
1169  case MVT::i8:
1170  NeedExtend = true;
1171  ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1172  break;
1173  case MVT::i16:
1174  NeedExtend = true;
1175  ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1176  break;
1177  case MVT::i32: // fall-through
1178  case MVT::i64:
1179  break;
1180  }
1181  MVT SrcVT = RetVT;
1182  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1183 
1184  // Canonicalize immediates to the RHS first.
1185  if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1186  std::swap(LHS, RHS);
1187 
1188  // Canonicalize mul by power of 2 to the RHS.
1189  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1190  if (isMulPowOf2(LHS))
1191  std::swap(LHS, RHS);
1192 
1193  // Canonicalize shift immediate to the RHS.
1194  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1195  if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1196  if (isa<ConstantInt>(SI->getOperand(1)))
1197  if (SI->getOpcode() == Instruction::Shl ||
1198  SI->getOpcode() == Instruction::LShr ||
1199  SI->getOpcode() == Instruction::AShr )
1200  std::swap(LHS, RHS);
1201 
1202  unsigned LHSReg = getRegForValue(LHS);
1203  if (!LHSReg)
1204  return 0;
1205  bool LHSIsKill = hasTrivialKill(LHS);
1206 
1207  if (NeedExtend)
1208  LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1209 
1210  unsigned ResultReg = 0;
1211  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1212  uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1213  if (C->isNegative())
1214  ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1215  SetFlags, WantResult);
1216  else
1217  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1218  WantResult);
1219  } else if (const auto *C = dyn_cast<Constant>(RHS))
1220  if (C->isNullValue())
1221  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1222  WantResult);
1223 
1224  if (ResultReg)
1225  return ResultReg;
1226 
1227  // Only extend the RHS within the instruction if there is a valid extend type.
1228  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1229  isValueAvailable(RHS)) {
1230  if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1231  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1232  if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1233  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1234  if (!RHSReg)
1235  return 0;
1236  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1237  return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1238  RHSIsKill, ExtendType, C->getZExtValue(),
1239  SetFlags, WantResult);
1240  }
1241  unsigned RHSReg = getRegForValue(RHS);
1242  if (!RHSReg)
1243  return 0;
1244  bool RHSIsKill = hasTrivialKill(RHS);
1245  return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1246  ExtendType, 0, SetFlags, WantResult);
1247  }
1248 
1249  // Check if the mul can be folded into the instruction.
1250  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1251  if (isMulPowOf2(RHS)) {
1252  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1253  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1254 
1255  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1256  if (C->getValue().isPowerOf2())
1257  std::swap(MulLHS, MulRHS);
1258 
1259  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1260  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1261  unsigned RHSReg = getRegForValue(MulLHS);
1262  if (!RHSReg)
1263  return 0;
1264  bool RHSIsKill = hasTrivialKill(MulLHS);
1265  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1266  RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1267  WantResult);
1268  if (ResultReg)
1269  return ResultReg;
1270  }
1271  }
1272 
1273  // Check if the shift can be folded into the instruction.
1274  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1275  if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1276  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1278  switch (SI->getOpcode()) {
1279  default: break;
1280  case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1281  case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1282  case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1283  }
1284  uint64_t ShiftVal = C->getZExtValue();
1285  if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1286  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1287  if (!RHSReg)
1288  return 0;
1289  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1290  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1291  RHSIsKill, ShiftType, ShiftVal, SetFlags,
1292  WantResult);
1293  if (ResultReg)
1294  return ResultReg;
1295  }
1296  }
1297  }
1298  }
1299 
1300  unsigned RHSReg = getRegForValue(RHS);
1301  if (!RHSReg)
1302  return 0;
1303  bool RHSIsKill = hasTrivialKill(RHS);
1304 
1305  if (NeedExtend)
1306  RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1307 
1308  return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1309  SetFlags, WantResult);
1310 }
1311 
1312 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1313  bool LHSIsKill, unsigned RHSReg,
1314  bool RHSIsKill, bool SetFlags,
1315  bool WantResult) {
1316  assert(LHSReg && RHSReg && "Invalid register number.");
1317 
1318  if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1319  RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1320  return 0;
1321 
1322  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1323  return 0;
1324 
1325  static const unsigned OpcTable[2][2][2] = {
1326  { { AArch64::SUBWrr, AArch64::SUBXrr },
1327  { AArch64::ADDWrr, AArch64::ADDXrr } },
1328  { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1329  { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1330  };
1331  bool Is64Bit = RetVT == MVT::i64;
1332  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1333  const TargetRegisterClass *RC =
1334  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1335  unsigned ResultReg;
1336  if (WantResult)
1337  ResultReg = createResultReg(RC);
1338  else
1339  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1340 
1341  const MCInstrDesc &II = TII.get(Opc);
1342  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1343  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1344  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1345  .addReg(LHSReg, getKillRegState(LHSIsKill))
1346  .addReg(RHSReg, getKillRegState(RHSIsKill));
1347  return ResultReg;
1348 }
1349 
1350 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1351  bool LHSIsKill, uint64_t Imm,
1352  bool SetFlags, bool WantResult) {
1353  assert(LHSReg && "Invalid register number.");
1354 
1355  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1356  return 0;
1357 
1358  unsigned ShiftImm;
1359  if (isUInt<12>(Imm))
1360  ShiftImm = 0;
1361  else if ((Imm & 0xfff000) == Imm) {
1362  ShiftImm = 12;
1363  Imm >>= 12;
1364  } else
1365  return 0;
1366 
1367  static const unsigned OpcTable[2][2][2] = {
1368  { { AArch64::SUBWri, AArch64::SUBXri },
1369  { AArch64::ADDWri, AArch64::ADDXri } },
1370  { { AArch64::SUBSWri, AArch64::SUBSXri },
1371  { AArch64::ADDSWri, AArch64::ADDSXri } }
1372  };
1373  bool Is64Bit = RetVT == MVT::i64;
1374  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1375  const TargetRegisterClass *RC;
1376  if (SetFlags)
1377  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1378  else
1379  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1380  unsigned ResultReg;
1381  if (WantResult)
1382  ResultReg = createResultReg(RC);
1383  else
1384  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1385 
1386  const MCInstrDesc &II = TII.get(Opc);
1387  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1388  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1389  .addReg(LHSReg, getKillRegState(LHSIsKill))
1390  .addImm(Imm)
1391  .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1392  return ResultReg;
1393 }
1394 
1395 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1396  bool LHSIsKill, unsigned RHSReg,
1397  bool RHSIsKill,
1398  AArch64_AM::ShiftExtendType ShiftType,
1399  uint64_t ShiftImm, bool SetFlags,
1400  bool WantResult) {
1401  assert(LHSReg && RHSReg && "Invalid register number.");
1402  assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1403  RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1404 
1405  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1406  return 0;
1407 
1408  // Don't deal with undefined shifts.
1409  if (ShiftImm >= RetVT.getSizeInBits())
1410  return 0;
1411 
1412  static const unsigned OpcTable[2][2][2] = {
1413  { { AArch64::SUBWrs, AArch64::SUBXrs },
1414  { AArch64::ADDWrs, AArch64::ADDXrs } },
1415  { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1416  { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1417  };
1418  bool Is64Bit = RetVT == MVT::i64;
1419  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1420  const TargetRegisterClass *RC =
1421  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1422  unsigned ResultReg;
1423  if (WantResult)
1424  ResultReg = createResultReg(RC);
1425  else
1426  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1427 
1428  const MCInstrDesc &II = TII.get(Opc);
1429  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1430  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1431  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1432  .addReg(LHSReg, getKillRegState(LHSIsKill))
1433  .addReg(RHSReg, getKillRegState(RHSIsKill))
1434  .addImm(getShifterImm(ShiftType, ShiftImm));
1435  return ResultReg;
1436 }
1437 
1438 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1439  bool LHSIsKill, unsigned RHSReg,
1440  bool RHSIsKill,
1442  uint64_t ShiftImm, bool SetFlags,
1443  bool WantResult) {
1444  assert(LHSReg && RHSReg && "Invalid register number.");
1445  assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1446  RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1447 
1448  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1449  return 0;
1450 
1451  if (ShiftImm >= 4)
1452  return 0;
1453 
1454  static const unsigned OpcTable[2][2][2] = {
1455  { { AArch64::SUBWrx, AArch64::SUBXrx },
1456  { AArch64::ADDWrx, AArch64::ADDXrx } },
1457  { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1458  { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1459  };
1460  bool Is64Bit = RetVT == MVT::i64;
1461  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1462  const TargetRegisterClass *RC = nullptr;
1463  if (SetFlags)
1464  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1465  else
1466  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1467  unsigned ResultReg;
1468  if (WantResult)
1469  ResultReg = createResultReg(RC);
1470  else
1471  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1472 
1473  const MCInstrDesc &II = TII.get(Opc);
1474  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1475  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1476  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1477  .addReg(LHSReg, getKillRegState(LHSIsKill))
1478  .addReg(RHSReg, getKillRegState(RHSIsKill))
1479  .addImm(getArithExtendImm(ExtType, ShiftImm));
1480  return ResultReg;
1481 }
1482 
1483 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1484  Type *Ty = LHS->getType();
1485  EVT EVT = TLI.getValueType(DL, Ty, true);
1486  if (!EVT.isSimple())
1487  return false;
1488  MVT VT = EVT.getSimpleVT();
1489 
1490  switch (VT.SimpleTy) {
1491  default:
1492  return false;
1493  case MVT::i1:
1494  case MVT::i8:
1495  case MVT::i16:
1496  case MVT::i32:
1497  case MVT::i64:
1498  return emitICmp(VT, LHS, RHS, IsZExt);
1499  case MVT::f32:
1500  case MVT::f64:
1501  return emitFCmp(VT, LHS, RHS);
1502  }
1503 }
1504 
1505 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1506  bool IsZExt) {
1507  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1508  IsZExt) != 0;
1509 }
1510 
1511 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1512  uint64_t Imm) {
1513  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1514  /*SetFlags=*/true, /*WantResult=*/false) != 0;
1515 }
1516 
1517 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1518  if (RetVT != MVT::f32 && RetVT != MVT::f64)
1519  return false;
1520 
1521  // Check to see if the 2nd operand is a constant that we can encode directly
1522  // in the compare.
1523  bool UseImm = false;
1524  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1525  if (CFP->isZero() && !CFP->isNegative())
1526  UseImm = true;
1527 
1528  unsigned LHSReg = getRegForValue(LHS);
1529  if (!LHSReg)
1530  return false;
1531  bool LHSIsKill = hasTrivialKill(LHS);
1532 
1533  if (UseImm) {
1534  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1535  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1536  .addReg(LHSReg, getKillRegState(LHSIsKill));
1537  return true;
1538  }
1539 
1540  unsigned RHSReg = getRegForValue(RHS);
1541  if (!RHSReg)
1542  return false;
1543  bool RHSIsKill = hasTrivialKill(RHS);
1544 
1545  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1546  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1547  .addReg(LHSReg, getKillRegState(LHSIsKill))
1548  .addReg(RHSReg, getKillRegState(RHSIsKill));
1549  return true;
1550 }
1551 
1552 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1553  bool SetFlags, bool WantResult, bool IsZExt) {
1554  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1555  IsZExt);
1556 }
1557 
1558 /// This method is a wrapper to simplify add emission.
1559 ///
1560 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1561 /// that fails, then try to materialize the immediate into a register and use
1562 /// emitAddSub_rr instead.
1563 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1564  int64_t Imm) {
1565  unsigned ResultReg;
1566  if (Imm < 0)
1567  ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1568  else
1569  ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1570 
1571  if (ResultReg)
1572  return ResultReg;
1573 
1574  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1575  if (!CReg)
1576  return 0;
1577 
1578  ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1579  return ResultReg;
1580 }
1581 
1582 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1583  bool SetFlags, bool WantResult, bool IsZExt) {
1584  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1585  IsZExt);
1586 }
1587 
1588 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1589  bool LHSIsKill, unsigned RHSReg,
1590  bool RHSIsKill, bool WantResult) {
1591  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1592  RHSIsKill, /*SetFlags=*/true, WantResult);
1593 }
1594 
1595 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1596  bool LHSIsKill, unsigned RHSReg,
1597  bool RHSIsKill,
1598  AArch64_AM::ShiftExtendType ShiftType,
1599  uint64_t ShiftImm, bool WantResult) {
1600  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1601  RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1602  WantResult);
1603 }
1604 
1605 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1606  const Value *LHS, const Value *RHS) {
1607  // Canonicalize immediates to the RHS first.
1608  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1609  std::swap(LHS, RHS);
1610 
1611  // Canonicalize mul by power-of-2 to the RHS.
1612  if (LHS->hasOneUse() && isValueAvailable(LHS))
1613  if (isMulPowOf2(LHS))
1614  std::swap(LHS, RHS);
1615 
1616  // Canonicalize shift immediate to the RHS.
1617  if (LHS->hasOneUse() && isValueAvailable(LHS))
1618  if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1619  if (isa<ConstantInt>(SI->getOperand(1)))
1620  std::swap(LHS, RHS);
1621 
1622  unsigned LHSReg = getRegForValue(LHS);
1623  if (!LHSReg)
1624  return 0;
1625  bool LHSIsKill = hasTrivialKill(LHS);
1626 
1627  unsigned ResultReg = 0;
1628  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1629  uint64_t Imm = C->getZExtValue();
1630  ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1631  }
1632  if (ResultReg)
1633  return ResultReg;
1634 
1635  // Check if the mul can be folded into the instruction.
1636  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1637  if (isMulPowOf2(RHS)) {
1638  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1639  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1640 
1641  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1642  if (C->getValue().isPowerOf2())
1643  std::swap(MulLHS, MulRHS);
1644 
1645  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1646  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1647 
1648  unsigned RHSReg = getRegForValue(MulLHS);
1649  if (!RHSReg)
1650  return 0;
1651  bool RHSIsKill = hasTrivialKill(MulLHS);
1652  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1653  RHSIsKill, ShiftVal);
1654  if (ResultReg)
1655  return ResultReg;
1656  }
1657  }
1658 
1659  // Check if the shift can be folded into the instruction.
1660  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1661  if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1662  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1663  uint64_t ShiftVal = C->getZExtValue();
1664  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1665  if (!RHSReg)
1666  return 0;
1667  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1668  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1669  RHSIsKill, ShiftVal);
1670  if (ResultReg)
1671  return ResultReg;
1672  }
1673  }
1674 
1675  unsigned RHSReg = getRegForValue(RHS);
1676  if (!RHSReg)
1677  return 0;
1678  bool RHSIsKill = hasTrivialKill(RHS);
1679 
1680  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1681  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1682  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1683  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1684  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1685  }
1686  return ResultReg;
1687 }
1688 
1689 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1690  unsigned LHSReg, bool LHSIsKill,
1691  uint64_t Imm) {
1692  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1693  "ISD nodes are not consecutive!");
1694  static const unsigned OpcTable[3][2] = {
1695  { AArch64::ANDWri, AArch64::ANDXri },
1696  { AArch64::ORRWri, AArch64::ORRXri },
1697  { AArch64::EORWri, AArch64::EORXri }
1698  };
1699  const TargetRegisterClass *RC;
1700  unsigned Opc;
1701  unsigned RegSize;
1702  switch (RetVT.SimpleTy) {
1703  default:
1704  return 0;
1705  case MVT::i1:
1706  case MVT::i8:
1707  case MVT::i16:
1708  case MVT::i32: {
1709  unsigned Idx = ISDOpc - ISD::AND;
1710  Opc = OpcTable[Idx][0];
1711  RC = &AArch64::GPR32spRegClass;
1712  RegSize = 32;
1713  break;
1714  }
1715  case MVT::i64:
1716  Opc = OpcTable[ISDOpc - ISD::AND][1];
1717  RC = &AArch64::GPR64spRegClass;
1718  RegSize = 64;
1719  break;
1720  }
1721 
1722  if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1723  return 0;
1724 
1725  unsigned ResultReg =
1726  fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1727  AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1728  if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1729  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1730  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1731  }
1732  return ResultReg;
1733 }
1734 
1735 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1736  unsigned LHSReg, bool LHSIsKill,
1737  unsigned RHSReg, bool RHSIsKill,
1738  uint64_t ShiftImm) {
1739  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1740  "ISD nodes are not consecutive!");
1741  static const unsigned OpcTable[3][2] = {
1742  { AArch64::ANDWrs, AArch64::ANDXrs },
1743  { AArch64::ORRWrs, AArch64::ORRXrs },
1744  { AArch64::EORWrs, AArch64::EORXrs }
1745  };
1746 
1747  // Don't deal with undefined shifts.
1748  if (ShiftImm >= RetVT.getSizeInBits())
1749  return 0;
1750 
1751  const TargetRegisterClass *RC;
1752  unsigned Opc;
1753  switch (RetVT.SimpleTy) {
1754  default:
1755  return 0;
1756  case MVT::i1:
1757  case MVT::i8:
1758  case MVT::i16:
1759  case MVT::i32:
1760  Opc = OpcTable[ISDOpc - ISD::AND][0];
1761  RC = &AArch64::GPR32RegClass;
1762  break;
1763  case MVT::i64:
1764  Opc = OpcTable[ISDOpc - ISD::AND][1];
1765  RC = &AArch64::GPR64RegClass;
1766  break;
1767  }
1768  unsigned ResultReg =
1769  fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1771  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1772  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1773  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1774  }
1775  return ResultReg;
1776 }
1777 
1778 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1779  uint64_t Imm) {
1780  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1781 }
1782 
1783 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1784  bool WantZExt, MachineMemOperand *MMO) {
1785  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1786  return 0;
1787 
1788  // Simplify this down to something we can handle.
1789  if (!simplifyAddress(Addr, VT))
1790  return 0;
1791 
1792  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1793  if (!ScaleFactor)
1794  llvm_unreachable("Unexpected value type.");
1795 
1796  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1797  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1798  bool UseScaled = true;
1799  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1800  UseScaled = false;
1801  ScaleFactor = 1;
1802  }
1803 
1804  static const unsigned GPOpcTable[2][8][4] = {
1805  // Sign-extend.
1806  { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1807  AArch64::LDURXi },
1808  { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1809  AArch64::LDURXi },
1810  { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1811  AArch64::LDRXui },
1812  { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1813  AArch64::LDRXui },
1814  { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1815  AArch64::LDRXroX },
1816  { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1817  AArch64::LDRXroX },
1818  { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1819  AArch64::LDRXroW },
1820  { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1821  AArch64::LDRXroW }
1822  },
1823  // Zero-extend.
1824  { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1825  AArch64::LDURXi },
1826  { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1827  AArch64::LDURXi },
1828  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1829  AArch64::LDRXui },
1830  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1831  AArch64::LDRXui },
1832  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1833  AArch64::LDRXroX },
1834  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1835  AArch64::LDRXroX },
1836  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1837  AArch64::LDRXroW },
1838  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1839  AArch64::LDRXroW }
1840  }
1841  };
1842 
1843  static const unsigned FPOpcTable[4][2] = {
1844  { AArch64::LDURSi, AArch64::LDURDi },
1845  { AArch64::LDRSui, AArch64::LDRDui },
1846  { AArch64::LDRSroX, AArch64::LDRDroX },
1847  { AArch64::LDRSroW, AArch64::LDRDroW }
1848  };
1849 
1850  unsigned Opc;
1851  const TargetRegisterClass *RC;
1852  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1853  Addr.getOffsetReg();
1854  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1855  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1856  Addr.getExtendType() == AArch64_AM::SXTW)
1857  Idx++;
1858 
1859  bool IsRet64Bit = RetVT == MVT::i64;
1860  switch (VT.SimpleTy) {
1861  default:
1862  llvm_unreachable("Unexpected value type.");
1863  case MVT::i1: // Intentional fall-through.
1864  case MVT::i8:
1865  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1866  RC = (IsRet64Bit && !WantZExt) ?
1867  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1868  break;
1869  case MVT::i16:
1870  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1871  RC = (IsRet64Bit && !WantZExt) ?
1872  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1873  break;
1874  case MVT::i32:
1875  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1876  RC = (IsRet64Bit && !WantZExt) ?
1877  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1878  break;
1879  case MVT::i64:
1880  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1881  RC = &AArch64::GPR64RegClass;
1882  break;
1883  case MVT::f32:
1884  Opc = FPOpcTable[Idx][0];
1885  RC = &AArch64::FPR32RegClass;
1886  break;
1887  case MVT::f64:
1888  Opc = FPOpcTable[Idx][1];
1889  RC = &AArch64::FPR64RegClass;
1890  break;
1891  }
1892 
1893  // Create the base instruction, then add the operands.
1894  unsigned ResultReg = createResultReg(RC);
1895  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1896  TII.get(Opc), ResultReg);
1897  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1898 
1899  // Loading an i1 requires special handling.
1900  if (VT == MVT::i1) {
1901  unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1902  assert(ANDReg && "Unexpected AND instruction emission failure.");
1903  ResultReg = ANDReg;
1904  }
1905 
1906  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1907  // the 32bit reg to a 64bit reg.
1908  if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1909  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1910  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1911  TII.get(AArch64::SUBREG_TO_REG), Reg64)
1912  .addImm(0)
1913  .addReg(ResultReg, getKillRegState(true))
1914  .addImm(AArch64::sub_32);
1915  ResultReg = Reg64;
1916  }
1917  return ResultReg;
1918 }
1919 
1920 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1921  MVT VT;
1922  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1923  return false;
1924 
1925  if (VT.isVector())
1926  return selectOperator(I, I->getOpcode());
1927 
1928  unsigned ResultReg;
1929  switch (I->getOpcode()) {
1930  default:
1931  llvm_unreachable("Unexpected instruction.");
1932  case Instruction::Add:
1933  ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1934  break;
1935  case Instruction::Sub:
1936  ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1937  break;
1938  }
1939  if (!ResultReg)
1940  return false;
1941 
1942  updateValueMap(I, ResultReg);
1943  return true;
1944 }
1945 
1946 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1947  MVT VT;
1948  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1949  return false;
1950 
1951  if (VT.isVector())
1952  return selectOperator(I, I->getOpcode());
1953 
1954  unsigned ResultReg;
1955  switch (I->getOpcode()) {
1956  default:
1957  llvm_unreachable("Unexpected instruction.");
1958  case Instruction::And:
1959  ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1960  break;
1961  case Instruction::Or:
1962  ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1963  break;
1964  case Instruction::Xor:
1965  ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1966  break;
1967  }
1968  if (!ResultReg)
1969  return false;
1970 
1971  updateValueMap(I, ResultReg);
1972  return true;
1973 }
1974 
1975 bool AArch64FastISel::selectLoad(const Instruction *I) {
1976  MVT VT;
1977  // Verify we have a legal type before going any further. Currently, we handle
1978  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1979  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1980  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1981  cast<LoadInst>(I)->isAtomic())
1982  return false;
1983 
1984  const Value *SV = I->getOperand(0);
1985  if (TLI.supportSwiftError()) {
1986  // Swifterror values can come from either a function parameter with
1987  // swifterror attribute or an alloca with swifterror attribute.
1988  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1989  if (Arg->hasSwiftErrorAttr())
1990  return false;
1991  }
1992 
1993  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1994  if (Alloca->isSwiftError())
1995  return false;
1996  }
1997  }
1998 
1999  // See if we can handle this address.
2000  Address Addr;
2001  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
2002  return false;
2003 
2004  // Fold the following sign-/zero-extend into the load instruction.
2005  bool WantZExt = true;
2006  MVT RetVT = VT;
2007  const Value *IntExtVal = nullptr;
2008  if (I->hasOneUse()) {
2009  if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
2010  if (isTypeSupported(ZE->getType(), RetVT))
2011  IntExtVal = ZE;
2012  else
2013  RetVT = VT;
2014  } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
2015  if (isTypeSupported(SE->getType(), RetVT))
2016  IntExtVal = SE;
2017  else
2018  RetVT = VT;
2019  WantZExt = false;
2020  }
2021  }
2022 
2023  unsigned ResultReg =
2024  emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
2025  if (!ResultReg)
2026  return false;
2027 
2028  // There are a few different cases we have to handle, because the load or the
2029  // sign-/zero-extend might not be selected by FastISel if we fall-back to
2030  // SelectionDAG. There is also an ordering issue when both instructions are in
2031  // different basic blocks.
2032  // 1.) The load instruction is selected by FastISel, but the integer extend
2033  // not. This usually happens when the integer extend is in a different
2034  // basic block and SelectionDAG took over for that basic block.
2035  // 2.) The load instruction is selected before the integer extend. This only
2036  // happens when the integer extend is in a different basic block.
2037  // 3.) The load instruction is selected by SelectionDAG and the integer extend
2038  // by FastISel. This happens if there are instructions between the load
2039  // and the integer extend that couldn't be selected by FastISel.
2040  if (IntExtVal) {
2041  // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2042  // could select it. Emit a copy to subreg if necessary. FastISel will remove
2043  // it when it selects the integer extend.
2044  unsigned Reg = lookUpRegForValue(IntExtVal);
2045  auto *MI = MRI.getUniqueVRegDef(Reg);
2046  if (!MI) {
2047  if (RetVT == MVT::i64 && VT <= MVT::i32) {
2048  if (WantZExt) {
2049  // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2050  MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2051  ResultReg = std::prev(I)->getOperand(0).getReg();
2052  removeDeadCode(I, std::next(I));
2053  } else
2054  ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2055  /*IsKill=*/true,
2056  AArch64::sub_32);
2057  }
2058  updateValueMap(I, ResultReg);
2059  return true;
2060  }
2061 
2062  // The integer extend has already been emitted - delete all the instructions
2063  // that have been emitted by the integer extend lowering code and use the
2064  // result from the load instruction directly.
2065  while (MI) {
2066  Reg = 0;
2067  for (auto &Opnd : MI->uses()) {
2068  if (Opnd.isReg()) {
2069  Reg = Opnd.getReg();
2070  break;
2071  }
2072  }
2074  removeDeadCode(I, std::next(I));
2075  MI = nullptr;
2076  if (Reg)
2077  MI = MRI.getUniqueVRegDef(Reg);
2078  }
2079  updateValueMap(IntExtVal, ResultReg);
2080  return true;
2081  }
2082 
2083  updateValueMap(I, ResultReg);
2084  return true;
2085 }
2086 
2087 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2088  unsigned AddrReg,
2089  MachineMemOperand *MMO) {
2090  unsigned Opc;
2091  switch (VT.SimpleTy) {
2092  default: return false;
2093  case MVT::i8: Opc = AArch64::STLRB; break;
2094  case MVT::i16: Opc = AArch64::STLRH; break;
2095  case MVT::i32: Opc = AArch64::STLRW; break;
2096  case MVT::i64: Opc = AArch64::STLRX; break;
2097  }
2098 
2099  const MCInstrDesc &II = TII.get(Opc);
2100  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2101  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2102  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2103  .addReg(SrcReg)
2104  .addReg(AddrReg)
2105  .addMemOperand(MMO);
2106  return true;
2107 }
2108 
2109 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2110  MachineMemOperand *MMO) {
2111  if (!TLI.allowsMisalignedMemoryAccesses(VT))
2112  return false;
2113 
2114  // Simplify this down to something we can handle.
2115  if (!simplifyAddress(Addr, VT))
2116  return false;
2117 
2118  unsigned ScaleFactor = getImplicitScaleFactor(VT);
2119  if (!ScaleFactor)
2120  llvm_unreachable("Unexpected value type.");
2121 
2122  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2123  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2124  bool UseScaled = true;
2125  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2126  UseScaled = false;
2127  ScaleFactor = 1;
2128  }
2129 
2130  static const unsigned OpcTable[4][6] = {
2131  { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2132  AArch64::STURSi, AArch64::STURDi },
2133  { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2134  AArch64::STRSui, AArch64::STRDui },
2135  { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2136  AArch64::STRSroX, AArch64::STRDroX },
2137  { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2138  AArch64::STRSroW, AArch64::STRDroW }
2139  };
2140 
2141  unsigned Opc;
2142  bool VTIsi1 = false;
2143  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2144  Addr.getOffsetReg();
2145  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2146  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2147  Addr.getExtendType() == AArch64_AM::SXTW)
2148  Idx++;
2149 
2150  switch (VT.SimpleTy) {
2151  default: llvm_unreachable("Unexpected value type.");
2152  case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH;
2153  case MVT::i8: Opc = OpcTable[Idx][0]; break;
2154  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2155  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2156  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2157  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2158  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2159  }
2160 
2161  // Storing an i1 requires special handling.
2162  if (VTIsi1 && SrcReg != AArch64::WZR) {
2163  unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2164  assert(ANDReg && "Unexpected AND instruction emission failure.");
2165  SrcReg = ANDReg;
2166  }
2167  // Create the base instruction, then add the operands.
2168  const MCInstrDesc &II = TII.get(Opc);
2169  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2170  MachineInstrBuilder MIB =
2171  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2172  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2173 
2174  return true;
2175 }
2176 
2177 bool AArch64FastISel::selectStore(const Instruction *I) {
2178  MVT VT;
2179  const Value *Op0 = I->getOperand(0);
2180  // Verify we have a legal type before going any further. Currently, we handle
2181  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2182  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2183  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2184  return false;
2185 
2186  const Value *PtrV = I->getOperand(1);
2187  if (TLI.supportSwiftError()) {
2188  // Swifterror values can come from either a function parameter with
2189  // swifterror attribute or an alloca with swifterror attribute.
2190  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2191  if (Arg->hasSwiftErrorAttr())
2192  return false;
2193  }
2194 
2195  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2196  if (Alloca->isSwiftError())
2197  return false;
2198  }
2199  }
2200 
2201  // Get the value to be stored into a register. Use the zero register directly
2202  // when possible to avoid an unnecessary copy and a wasted register.
2203  unsigned SrcReg = 0;
2204  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2205  if (CI->isZero())
2206  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2207  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2208  if (CF->isZero() && !CF->isNegative()) {
2209  VT = MVT::getIntegerVT(VT.getSizeInBits());
2210  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2211  }
2212  }
2213 
2214  if (!SrcReg)
2215  SrcReg = getRegForValue(Op0);
2216 
2217  if (!SrcReg)
2218  return false;
2219 
2220  auto *SI = cast<StoreInst>(I);
2221 
2222  // Try to emit a STLR for seq_cst/release.
2223  if (SI->isAtomic()) {
2224  AtomicOrdering Ord = SI->getOrdering();
2225  // The non-atomic instructions are sufficient for relaxed stores.
2226  if (isReleaseOrStronger(Ord)) {
2227  // The STLR addressing mode only supports a base reg; pass that directly.
2228  unsigned AddrReg = getRegForValue(PtrV);
2229  return emitStoreRelease(VT, SrcReg, AddrReg,
2230  createMachineMemOperandFor(I));
2231  }
2232  }
2233 
2234  // See if we can handle this address.
2235  Address Addr;
2236  if (!computeAddress(PtrV, Addr, Op0->getType()))
2237  return false;
2238 
2239  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2240  return false;
2241  return true;
2242 }
2243 
2245  switch (Pred) {
2246  case CmpInst::FCMP_ONE:
2247  case CmpInst::FCMP_UEQ:
2248  default:
2249  // AL is our "false" for now. The other two need more compares.
2250  return AArch64CC::AL;
2251  case CmpInst::ICMP_EQ:
2252  case CmpInst::FCMP_OEQ:
2253  return AArch64CC::EQ;
2254  case CmpInst::ICMP_SGT:
2255  case CmpInst::FCMP_OGT:
2256  return AArch64CC::GT;
2257  case CmpInst::ICMP_SGE:
2258  case CmpInst::FCMP_OGE:
2259  return AArch64CC::GE;
2260  case CmpInst::ICMP_UGT:
2261  case CmpInst::FCMP_UGT:
2262  return AArch64CC::HI;
2263  case CmpInst::FCMP_OLT:
2264  return AArch64CC::MI;
2265  case CmpInst::ICMP_ULE:
2266  case CmpInst::FCMP_OLE:
2267  return AArch64CC::LS;
2268  case CmpInst::FCMP_ORD:
2269  return AArch64CC::VC;
2270  case CmpInst::FCMP_UNO:
2271  return AArch64CC::VS;
2272  case CmpInst::FCMP_UGE:
2273  return AArch64CC::PL;
2274  case CmpInst::ICMP_SLT:
2275  case CmpInst::FCMP_ULT:
2276  return AArch64CC::LT;
2277  case CmpInst::ICMP_SLE:
2278  case CmpInst::FCMP_ULE:
2279  return AArch64CC::LE;
2280  case CmpInst::FCMP_UNE:
2281  case CmpInst::ICMP_NE:
2282  return AArch64CC::NE;
2283  case CmpInst::ICMP_UGE:
2284  return AArch64CC::HS;
2285  case CmpInst::ICMP_ULT:
2286  return AArch64CC::LO;
2287  }
2288 }
2289 
2290 /// Try to emit a combined compare-and-branch instruction.
2291 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2292  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2293  // will not be produced, as they are conditional branch instructions that do
2294  // not set flags.
2295  if (FuncInfo.MF->getFunction().hasFnAttribute(
2296  Attribute::SpeculativeLoadHardening))
2297  return false;
2298 
2299  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2300  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2301  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2302 
2303  const Value *LHS = CI->getOperand(0);
2304  const Value *RHS = CI->getOperand(1);
2305 
2306  MVT VT;
2307  if (!isTypeSupported(LHS->getType(), VT))
2308  return false;
2309 
2310  unsigned BW = VT.getSizeInBits();
2311  if (BW > 64)
2312  return false;
2313 
2314  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2315  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2316 
2317  // Try to take advantage of fallthrough opportunities.
2318  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2319  std::swap(TBB, FBB);
2320  Predicate = CmpInst::getInversePredicate(Predicate);
2321  }
2322 
2323  int TestBit = -1;
2324  bool IsCmpNE;
2325  switch (Predicate) {
2326  default:
2327  return false;
2328  case CmpInst::ICMP_EQ:
2329  case CmpInst::ICMP_NE:
2330  if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2331  std::swap(LHS, RHS);
2332 
2333  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2334  return false;
2335 
2336  if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2337  if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2338  const Value *AndLHS = AI->getOperand(0);
2339  const Value *AndRHS = AI->getOperand(1);
2340 
2341  if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2342  if (C->getValue().isPowerOf2())
2343  std::swap(AndLHS, AndRHS);
2344 
2345  if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2346  if (C->getValue().isPowerOf2()) {
2347  TestBit = C->getValue().logBase2();
2348  LHS = AndLHS;
2349  }
2350  }
2351 
2352  if (VT == MVT::i1)
2353  TestBit = 0;
2354 
2355  IsCmpNE = Predicate == CmpInst::ICMP_NE;
2356  break;
2357  case CmpInst::ICMP_SLT:
2358  case CmpInst::ICMP_SGE:
2359  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2360  return false;
2361 
2362  TestBit = BW - 1;
2363  IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2364  break;
2365  case CmpInst::ICMP_SGT:
2366  case CmpInst::ICMP_SLE:
2367  if (!isa<ConstantInt>(RHS))
2368  return false;
2369 
2370  if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2371  return false;
2372 
2373  TestBit = BW - 1;
2374  IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2375  break;
2376  } // end switch
2377 
2378  static const unsigned OpcTable[2][2][2] = {
2379  { {AArch64::CBZW, AArch64::CBZX },
2380  {AArch64::CBNZW, AArch64::CBNZX} },
2381  { {AArch64::TBZW, AArch64::TBZX },
2382  {AArch64::TBNZW, AArch64::TBNZX} }
2383  };
2384 
2385  bool IsBitTest = TestBit != -1;
2386  bool Is64Bit = BW == 64;
2387  if (TestBit < 32 && TestBit >= 0)
2388  Is64Bit = false;
2389 
2390  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2391  const MCInstrDesc &II = TII.get(Opc);
2392 
2393  unsigned SrcReg = getRegForValue(LHS);
2394  if (!SrcReg)
2395  return false;
2396  bool SrcIsKill = hasTrivialKill(LHS);
2397 
2398  if (BW == 64 && !Is64Bit)
2399  SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2400  AArch64::sub_32);
2401 
2402  if ((BW < 32) && !IsBitTest)
2403  SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2404 
2405  // Emit the combined compare and branch instruction.
2406  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2407  MachineInstrBuilder MIB =
2408  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2409  .addReg(SrcReg, getKillRegState(SrcIsKill));
2410  if (IsBitTest)
2411  MIB.addImm(TestBit);
2412  MIB.addMBB(TBB);
2413 
2414  finishCondBranch(BI->getParent(), TBB, FBB);
2415  return true;
2416 }
2417 
2418 bool AArch64FastISel::selectBranch(const Instruction *I) {
2419  const BranchInst *BI = cast<BranchInst>(I);
2420  if (BI->isUnconditional()) {
2421  MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2422  fastEmitBranch(MSucc, BI->getDebugLoc());
2423  return true;
2424  }
2425 
2426  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2427  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2428 
2429  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2430  if (CI->hasOneUse() && isValueAvailable(CI)) {
2431  // Try to optimize or fold the cmp.
2432  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2433  switch (Predicate) {
2434  default:
2435  break;
2436  case CmpInst::FCMP_FALSE:
2437  fastEmitBranch(FBB, DbgLoc);
2438  return true;
2439  case CmpInst::FCMP_TRUE:
2440  fastEmitBranch(TBB, DbgLoc);
2441  return true;
2442  }
2443 
2444  // Try to emit a combined compare-and-branch first.
2445  if (emitCompareAndBranch(BI))
2446  return true;
2447 
2448  // Try to take advantage of fallthrough opportunities.
2449  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2450  std::swap(TBB, FBB);
2451  Predicate = CmpInst::getInversePredicate(Predicate);
2452  }
2453 
2454  // Emit the cmp.
2455  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2456  return false;
2457 
2458  // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2459  // instruction.
2460  AArch64CC::CondCode CC = getCompareCC(Predicate);
2462  switch (Predicate) {
2463  default:
2464  break;
2465  case CmpInst::FCMP_UEQ:
2466  ExtraCC = AArch64CC::EQ;
2467  CC = AArch64CC::VS;
2468  break;
2469  case CmpInst::FCMP_ONE:
2470  ExtraCC = AArch64CC::MI;
2471  CC = AArch64CC::GT;
2472  break;
2473  }
2474  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2475 
2476  // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2477  if (ExtraCC != AArch64CC::AL) {
2478  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2479  .addImm(ExtraCC)
2480  .addMBB(TBB);
2481  }
2482 
2483  // Emit the branch.
2484  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2485  .addImm(CC)
2486  .addMBB(TBB);
2487 
2488  finishCondBranch(BI->getParent(), TBB, FBB);
2489  return true;
2490  }
2491  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2492  uint64_t Imm = CI->getZExtValue();
2493  MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2494  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2495  .addMBB(Target);
2496 
2497  // Obtain the branch probability and add the target to the successor list.
2498  if (FuncInfo.BPI) {
2499  auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2500  BI->getParent(), Target->getBasicBlock());
2501  FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2502  } else
2503  FuncInfo.MBB->addSuccessorWithoutProb(Target);
2504  return true;
2505  } else {
2507  if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2508  // Fake request the condition, otherwise the intrinsic might be completely
2509  // optimized away.
2510  unsigned CondReg = getRegForValue(BI->getCondition());
2511  if (!CondReg)
2512  return false;
2513 
2514  // Emit the branch.
2515  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2516  .addImm(CC)
2517  .addMBB(TBB);
2518 
2519  finishCondBranch(BI->getParent(), TBB, FBB);
2520  return true;
2521  }
2522  }
2523 
2524  unsigned CondReg = getRegForValue(BI->getCondition());
2525  if (CondReg == 0)
2526  return false;
2527  bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2528 
2529  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2530  unsigned Opcode = AArch64::TBNZW;
2531  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2532  std::swap(TBB, FBB);
2533  Opcode = AArch64::TBZW;
2534  }
2535 
2536  const MCInstrDesc &II = TII.get(Opcode);
2537  unsigned ConstrainedCondReg
2538  = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2539  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2540  .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2541  .addImm(0)
2542  .addMBB(TBB);
2543 
2544  finishCondBranch(BI->getParent(), TBB, FBB);
2545  return true;
2546 }
2547 
2548 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2549  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2550  unsigned AddrReg = getRegForValue(BI->getOperand(0));
2551  if (AddrReg == 0)
2552  return false;
2553 
2554  // Emit the indirect branch.
2555  const MCInstrDesc &II = TII.get(AArch64::BR);
2556  AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2557  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2558 
2559  // Make sure the CFG is up-to-date.
2560  for (auto *Succ : BI->successors())
2561  FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2562 
2563  return true;
2564 }
2565 
2566 bool AArch64FastISel::selectCmp(const Instruction *I) {
2567  const CmpInst *CI = cast<CmpInst>(I);
2568 
2569  // Vectors of i1 are weird: bail out.
2570  if (CI->getType()->isVectorTy())
2571  return false;
2572 
2573  // Try to optimize or fold the cmp.
2574  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2575  unsigned ResultReg = 0;
2576  switch (Predicate) {
2577  default:
2578  break;
2579  case CmpInst::FCMP_FALSE:
2580  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2581  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2582  TII.get(TargetOpcode::COPY), ResultReg)
2583  .addReg(AArch64::WZR, getKillRegState(true));
2584  break;
2585  case CmpInst::FCMP_TRUE:
2586  ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2587  break;
2588  }
2589 
2590  if (ResultReg) {
2591  updateValueMap(I, ResultReg);
2592  return true;
2593  }
2594 
2595  // Emit the cmp.
2596  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2597  return false;
2598 
2599  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2600 
2601  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2602  // condition codes are inverted, because they are used by CSINC.
2603  static unsigned CondCodeTable[2][2] = {
2606  };
2607  unsigned *CondCodes = nullptr;
2608  switch (Predicate) {
2609  default:
2610  break;
2611  case CmpInst::FCMP_UEQ:
2612  CondCodes = &CondCodeTable[0][0];
2613  break;
2614  case CmpInst::FCMP_ONE:
2615  CondCodes = &CondCodeTable[1][0];
2616  break;
2617  }
2618 
2619  if (CondCodes) {
2620  unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2621  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2622  TmpReg1)
2623  .addReg(AArch64::WZR, getKillRegState(true))
2624  .addReg(AArch64::WZR, getKillRegState(true))
2625  .addImm(CondCodes[0]);
2626  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2627  ResultReg)
2628  .addReg(TmpReg1, getKillRegState(true))
2629  .addReg(AArch64::WZR, getKillRegState(true))
2630  .addImm(CondCodes[1]);
2631 
2632  updateValueMap(I, ResultReg);
2633  return true;
2634  }
2635 
2636  // Now set a register based on the comparison.
2637  AArch64CC::CondCode CC = getCompareCC(Predicate);
2638  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2639  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2640  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2641  ResultReg)
2642  .addReg(AArch64::WZR, getKillRegState(true))
2643  .addReg(AArch64::WZR, getKillRegState(true))
2644  .addImm(invertedCC);
2645 
2646  updateValueMap(I, ResultReg);
2647  return true;
2648 }
2649 
2650 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2651 /// value.
2652 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2653  if (!SI->getType()->isIntegerTy(1))
2654  return false;
2655 
2656  const Value *Src1Val, *Src2Val;
2657  unsigned Opc = 0;
2658  bool NeedExtraOp = false;
2659  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2660  if (CI->isOne()) {
2661  Src1Val = SI->getCondition();
2662  Src2Val = SI->getFalseValue();
2663  Opc = AArch64::ORRWrr;
2664  } else {
2665  assert(CI->isZero());
2666  Src1Val = SI->getFalseValue();
2667  Src2Val = SI->getCondition();
2668  Opc = AArch64::BICWrr;
2669  }
2670  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2671  if (CI->isOne()) {
2672  Src1Val = SI->getCondition();
2673  Src2Val = SI->getTrueValue();
2674  Opc = AArch64::ORRWrr;
2675  NeedExtraOp = true;
2676  } else {
2677  assert(CI->isZero());
2678  Src1Val = SI->getCondition();
2679  Src2Val = SI->getTrueValue();
2680  Opc = AArch64::ANDWrr;
2681  }
2682  }
2683 
2684  if (!Opc)
2685  return false;
2686 
2687  unsigned Src1Reg = getRegForValue(Src1Val);
2688  if (!Src1Reg)
2689  return false;
2690  bool Src1IsKill = hasTrivialKill(Src1Val);
2691 
2692  unsigned Src2Reg = getRegForValue(Src2Val);
2693  if (!Src2Reg)
2694  return false;
2695  bool Src2IsKill = hasTrivialKill(Src2Val);
2696 
2697  if (NeedExtraOp) {
2698  Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2699  Src1IsKill = true;
2700  }
2701  unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2702  Src1IsKill, Src2Reg, Src2IsKill);
2703  updateValueMap(SI, ResultReg);
2704  return true;
2705 }
2706 
2707 bool AArch64FastISel::selectSelect(const Instruction *I) {
2708  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2709  MVT VT;
2710  if (!isTypeSupported(I->getType(), VT))
2711  return false;
2712 
2713  unsigned Opc;
2714  const TargetRegisterClass *RC;
2715  switch (VT.SimpleTy) {
2716  default:
2717  return false;
2718  case MVT::i1:
2719  case MVT::i8:
2720  case MVT::i16:
2721  case MVT::i32:
2722  Opc = AArch64::CSELWr;
2723  RC = &AArch64::GPR32RegClass;
2724  break;
2725  case MVT::i64:
2726  Opc = AArch64::CSELXr;
2727  RC = &AArch64::GPR64RegClass;
2728  break;
2729  case MVT::f32:
2730  Opc = AArch64::FCSELSrrr;
2731  RC = &AArch64::FPR32RegClass;
2732  break;
2733  case MVT::f64:
2734  Opc = AArch64::FCSELDrrr;
2735  RC = &AArch64::FPR64RegClass;
2736  break;
2737  }
2738 
2739  const SelectInst *SI = cast<SelectInst>(I);
2740  const Value *Cond = SI->getCondition();
2743 
2744  if (optimizeSelect(SI))
2745  return true;
2746 
2747  // Try to pickup the flags, so we don't have to emit another compare.
2748  if (foldXALUIntrinsic(CC, I, Cond)) {
2749  // Fake request the condition to force emission of the XALU intrinsic.
2750  unsigned CondReg = getRegForValue(Cond);
2751  if (!CondReg)
2752  return false;
2753  } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2754  isValueAvailable(Cond)) {
2755  const auto *Cmp = cast<CmpInst>(Cond);
2756  // Try to optimize or fold the cmp.
2757  CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2758  const Value *FoldSelect = nullptr;
2759  switch (Predicate) {
2760  default:
2761  break;
2762  case CmpInst::FCMP_FALSE:
2763  FoldSelect = SI->getFalseValue();
2764  break;
2765  case CmpInst::FCMP_TRUE:
2766  FoldSelect = SI->getTrueValue();
2767  break;
2768  }
2769 
2770  if (FoldSelect) {
2771  unsigned SrcReg = getRegForValue(FoldSelect);
2772  if (!SrcReg)
2773  return false;
2774  unsigned UseReg = lookUpRegForValue(SI);
2775  if (UseReg)
2776  MRI.clearKillFlags(UseReg);
2777 
2778  updateValueMap(I, SrcReg);
2779  return true;
2780  }
2781 
2782  // Emit the cmp.
2783  if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2784  return false;
2785 
2786  // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2787  CC = getCompareCC(Predicate);
2788  switch (Predicate) {
2789  default:
2790  break;
2791  case CmpInst::FCMP_UEQ:
2792  ExtraCC = AArch64CC::EQ;
2793  CC = AArch64CC::VS;
2794  break;
2795  case CmpInst::FCMP_ONE:
2796  ExtraCC = AArch64CC::MI;
2797  CC = AArch64CC::GT;
2798  break;
2799  }
2800  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2801  } else {
2802  unsigned CondReg = getRegForValue(Cond);
2803  if (!CondReg)
2804  return false;
2805  bool CondIsKill = hasTrivialKill(Cond);
2806 
2807  const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2808  CondReg = constrainOperandRegClass(II, CondReg, 1);
2809 
2810  // Emit a TST instruction (ANDS wzr, reg, #imm).
2811  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2812  AArch64::WZR)
2813  .addReg(CondReg, getKillRegState(CondIsKill))
2815  }
2816 
2817  unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2818  bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2819 
2820  unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2821  bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2822 
2823  if (!Src1Reg || !Src2Reg)
2824  return false;
2825 
2826  if (ExtraCC != AArch64CC::AL) {
2827  Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2828  Src2IsKill, ExtraCC);
2829  Src2IsKill = true;
2830  }
2831  unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2832  Src2IsKill, CC);
2833  updateValueMap(I, ResultReg);
2834  return true;
2835 }
2836 
2837 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2838  Value *V = I->getOperand(0);
2839  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2840  return false;
2841 
2842  unsigned Op = getRegForValue(V);
2843  if (Op == 0)
2844  return false;
2845 
2846  unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2847  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2848  ResultReg).addReg(Op);
2849  updateValueMap(I, ResultReg);
2850  return true;
2851 }
2852 
2853 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2854  Value *V = I->getOperand(0);
2855  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2856  return false;
2857 
2858  unsigned Op = getRegForValue(V);
2859  if (Op == 0)
2860  return false;
2861 
2862  unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2863  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2864  ResultReg).addReg(Op);
2865  updateValueMap(I, ResultReg);
2866  return true;
2867 }
2868 
2869 // FPToUI and FPToSI
2870 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2871  MVT DestVT;
2872  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2873  return false;
2874 
2875  unsigned SrcReg = getRegForValue(I->getOperand(0));
2876  if (SrcReg == 0)
2877  return false;
2878 
2879  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2880  if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2881  return false;
2882 
2883  unsigned Opc;
2884  if (SrcVT == MVT::f64) {
2885  if (Signed)
2886  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2887  else
2888  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2889  } else {
2890  if (Signed)
2891  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2892  else
2893  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2894  }
2895  unsigned ResultReg = createResultReg(
2896  DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2897  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2898  .addReg(SrcReg);
2899  updateValueMap(I, ResultReg);
2900  return true;
2901 }
2902 
2903 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2904  MVT DestVT;
2905  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2906  return false;
2907  // Let regular ISEL handle FP16
2908  if (DestVT == MVT::f16)
2909  return false;
2910 
2911  assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2912  "Unexpected value type.");
2913 
2914  unsigned SrcReg = getRegForValue(I->getOperand(0));
2915  if (!SrcReg)
2916  return false;
2917  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2918 
2919  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2920 
2921  // Handle sign-extension.
2922  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2923  SrcReg =
2924  emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2925  if (!SrcReg)
2926  return false;
2927  SrcIsKill = true;
2928  }
2929 
2930  unsigned Opc;
2931  if (SrcVT == MVT::i64) {
2932  if (Signed)
2933  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2934  else
2935  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2936  } else {
2937  if (Signed)
2938  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2939  else
2940  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2941  }
2942 
2943  unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2944  SrcIsKill);
2945  updateValueMap(I, ResultReg);
2946  return true;
2947 }
2948 
2949 bool AArch64FastISel::fastLowerArguments() {
2950  if (!FuncInfo.CanLowerReturn)
2951  return false;
2952 
2953  const Function *F = FuncInfo.Fn;
2954  if (F->isVarArg())
2955  return false;
2956 
2957  CallingConv::ID CC = F->getCallingConv();
2958  if (CC != CallingConv::C && CC != CallingConv::Swift)
2959  return false;
2960 
2961  if (Subtarget->hasCustomCallingConv())
2962  return false;
2963 
2964  // Only handle simple cases of up to 8 GPR and FPR each.
2965  unsigned GPRCnt = 0;
2966  unsigned FPRCnt = 0;
2967  for (auto const &Arg : F->args()) {
2968  if (Arg.hasAttribute(Attribute::ByVal) ||
2969  Arg.hasAttribute(Attribute::InReg) ||
2970  Arg.hasAttribute(Attribute::StructRet) ||
2971  Arg.hasAttribute(Attribute::SwiftSelf) ||
2972  Arg.hasAttribute(Attribute::SwiftError) ||
2973  Arg.hasAttribute(Attribute::Nest))
2974  return false;
2975 
2976  Type *ArgTy = Arg.getType();
2977  if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2978  return false;
2979 
2980  EVT ArgVT = TLI.getValueType(DL, ArgTy);
2981  if (!ArgVT.isSimple())
2982  return false;
2983 
2984  MVT VT = ArgVT.getSimpleVT().SimpleTy;
2985  if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2986  return false;
2987 
2988  if (VT.isVector() &&
2989  (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2990  return false;
2991 
2992  if (VT >= MVT::i1 && VT <= MVT::i64)
2993  ++GPRCnt;
2994  else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2995  VT.is128BitVector())
2996  ++FPRCnt;
2997  else
2998  return false;
2999 
3000  if (GPRCnt > 8 || FPRCnt > 8)
3001  return false;
3002  }
3003 
3004  static const MCPhysReg Registers[6][8] = {
3005  { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
3006  AArch64::W5, AArch64::W6, AArch64::W7 },
3007  { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
3008  AArch64::X5, AArch64::X6, AArch64::X7 },
3009  { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
3010  AArch64::H5, AArch64::H6, AArch64::H7 },
3011  { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
3012  AArch64::S5, AArch64::S6, AArch64::S7 },
3013  { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
3014  AArch64::D5, AArch64::D6, AArch64::D7 },
3015  { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
3016  AArch64::Q5, AArch64::Q6, AArch64::Q7 }
3017  };
3018 
3019  unsigned GPRIdx = 0;
3020  unsigned FPRIdx = 0;
3021  for (auto const &Arg : F->args()) {
3022  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3023  unsigned SrcReg;
3024  const TargetRegisterClass *RC;
3025  if (VT >= MVT::i1 && VT <= MVT::i32) {
3026  SrcReg = Registers[0][GPRIdx++];
3027  RC = &AArch64::GPR32RegClass;
3028  VT = MVT::i32;
3029  } else if (VT == MVT::i64) {
3030  SrcReg = Registers[1][GPRIdx++];
3031  RC = &AArch64::GPR64RegClass;
3032  } else if (VT == MVT::f16) {
3033  SrcReg = Registers[2][FPRIdx++];
3034  RC = &AArch64::FPR16RegClass;
3035  } else if (VT == MVT::f32) {
3036  SrcReg = Registers[3][FPRIdx++];
3037  RC = &AArch64::FPR32RegClass;
3038  } else if ((VT == MVT::f64) || VT.is64BitVector()) {
3039  SrcReg = Registers[4][FPRIdx++];
3040  RC = &AArch64::FPR64RegClass;
3041  } else if (VT.is128BitVector()) {
3042  SrcReg = Registers[5][FPRIdx++];
3043  RC = &AArch64::FPR128RegClass;
3044  } else
3045  llvm_unreachable("Unexpected value type.");
3046 
3047  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3048  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3049  // Without this, EmitLiveInCopies may eliminate the livein if its only
3050  // use is a bitcast (which isn't turned into an instruction).
3051  unsigned ResultReg = createResultReg(RC);
3052  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3053  TII.get(TargetOpcode::COPY), ResultReg)
3054  .addReg(DstReg, getKillRegState(true));
3055  updateValueMap(&Arg, ResultReg);
3056  }
3057  return true;
3058 }
3059 
3060 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3061  SmallVectorImpl<MVT> &OutVTs,
3062  unsigned &NumBytes) {
3063  CallingConv::ID CC = CLI.CallConv;
3065  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3066  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3067 
3068  // Get a count of how many bytes are to be pushed on the stack.
3069  NumBytes = CCInfo.getNextStackOffset();
3070 
3071  // Issue CALLSEQ_START
3072  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3073  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3074  .addImm(NumBytes).addImm(0);
3075 
3076  // Process the args.
3077  for (CCValAssign &VA : ArgLocs) {
3078  const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3079  MVT ArgVT = OutVTs[VA.getValNo()];
3080 
3081  unsigned ArgReg = getRegForValue(ArgVal);
3082  if (!ArgReg)
3083  return false;
3084 
3085  // Handle arg promotion: SExt, ZExt, AExt.
3086  switch (VA.getLocInfo()) {
3087  case CCValAssign::Full:
3088  break;
3089  case CCValAssign::SExt: {
3090  MVT DestVT = VA.getLocVT();
3091  MVT SrcVT = ArgVT;
3092  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3093  if (!ArgReg)
3094  return false;
3095  break;
3096  }
3097  case CCValAssign::AExt:
3098  // Intentional fall-through.
3099  case CCValAssign::ZExt: {
3100  MVT DestVT = VA.getLocVT();
3101  MVT SrcVT = ArgVT;
3102  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3103  if (!ArgReg)
3104  return false;
3105  break;
3106  }
3107  default:
3108  llvm_unreachable("Unknown arg promotion!");
3109  }
3110 
3111  // Now copy/store arg to correct locations.
3112  if (VA.isRegLoc() && !VA.needsCustom()) {
3113  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3114  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3115  CLI.OutRegs.push_back(VA.getLocReg());
3116  } else if (VA.needsCustom()) {
3117  // FIXME: Handle custom args.
3118  return false;
3119  } else {
3120  assert(VA.isMemLoc() && "Assuming store on stack.");
3121 
3122  // Don't emit stores for undef values.
3123  if (isa<UndefValue>(ArgVal))
3124  continue;
3125 
3126  // Need to store on the stack.
3127  unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3128 
3129  unsigned BEAlign = 0;
3130  if (ArgSize < 8 && !Subtarget->isLittleEndian())
3131  BEAlign = 8 - ArgSize;
3132 
3133  Address Addr;
3134  Addr.setKind(Address::RegBase);
3135  Addr.setReg(AArch64::SP);
3136  Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3137 
3138  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3139  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3140  MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3141  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3142 
3143  if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3144  return false;
3145  }
3146  }
3147  return true;
3148 }
3149 
3150 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3151  unsigned NumBytes) {
3152  CallingConv::ID CC = CLI.CallConv;
3153 
3154  // Issue CALLSEQ_END
3155  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3156  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3157  .addImm(NumBytes).addImm(0);
3158 
3159  // Now the return value.
3160  if (RetVT != MVT::isVoid) {
3162  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3163  CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3164 
3165  // Only handle a single return value.
3166  if (RVLocs.size() != 1)
3167  return false;
3168 
3169  // Copy all of the result registers out of their specified physreg.
3170  MVT CopyVT = RVLocs[0].getValVT();
3171 
3172  // TODO: Handle big-endian results
3173  if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3174  return false;
3175 
3176  unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3177  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3178  TII.get(TargetOpcode::COPY), ResultReg)
3179  .addReg(RVLocs[0].getLocReg());
3180  CLI.InRegs.push_back(RVLocs[0].getLocReg());
3181 
3182  CLI.ResultReg = ResultReg;
3183  CLI.NumResultRegs = 1;
3184  }
3185 
3186  return true;
3187 }
3188 
3189 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3190  CallingConv::ID CC = CLI.CallConv;
3191  bool IsTailCall = CLI.IsTailCall;
3192  bool IsVarArg = CLI.IsVarArg;
3193  const Value *Callee = CLI.Callee;
3194  MCSymbol *Symbol = CLI.Symbol;
3195 
3196  if (!Callee && !Symbol)
3197  return false;
3198 
3199  // Allow SelectionDAG isel to handle tail calls.
3200  if (IsTailCall)
3201  return false;
3202 
3203  // FIXME: we could and should support this, but for now correctness at -O0 is
3204  // more important.
3205  if (Subtarget->isTargetILP32())
3206  return false;
3207 
3208  CodeModel::Model CM = TM.getCodeModel();
3209  // Only support the small-addressing and large code models.
3210  if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3211  return false;
3212 
3213  // FIXME: Add large code model support for ELF.
3214  if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3215  return false;
3216 
3217  // Let SDISel handle vararg functions.
3218  if (IsVarArg)
3219  return false;
3220 
3221  // FIXME: Only handle *simple* calls for now.
3222  MVT RetVT;
3223  if (CLI.RetTy->isVoidTy())
3224  RetVT = MVT::isVoid;
3225  else if (!isTypeLegal(CLI.RetTy, RetVT))
3226  return false;
3227 
3228  for (auto Flag : CLI.OutFlags)
3229  if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3230  Flag.isSwiftSelf() || Flag.isSwiftError())
3231  return false;
3232 
3233  // Set up the argument vectors.
3234  SmallVector<MVT, 16> OutVTs;
3235  OutVTs.reserve(CLI.OutVals.size());
3236 
3237  for (auto *Val : CLI.OutVals) {
3238  MVT VT;
3239  if (!isTypeLegal(Val->getType(), VT) &&
3240  !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3241  return false;
3242 
3243  // We don't handle vector parameters yet.
3244  if (VT.isVector() || VT.getSizeInBits() > 64)
3245  return false;
3246 
3247  OutVTs.push_back(VT);
3248  }
3249 
3250  Address Addr;
3251  if (Callee && !computeCallAddress(Callee, Addr))
3252  return false;
3253 
3254  // Handle the arguments now that we've gotten them.
3255  unsigned NumBytes;
3256  if (!processCallArgs(CLI, OutVTs, NumBytes))
3257  return false;
3258 
3259  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3260  if (RegInfo->isAnyArgRegReserved(*MF))
3261  RegInfo->emitReservedArgRegCallError(*MF);
3262 
3263  // Issue the call.
3264  MachineInstrBuilder MIB;
3265  if (Subtarget->useSmallAddressing()) {
3266  const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3267  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3268  if (Symbol)
3269  MIB.addSym(Symbol, 0);
3270  else if (Addr.getGlobalValue())
3271  MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3272  else if (Addr.getReg()) {
3273  unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3274  MIB.addReg(Reg);
3275  } else
3276  return false;
3277  } else {
3278  unsigned CallReg = 0;
3279  if (Symbol) {
3280  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3281  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3282  ADRPReg)
3283  .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3284 
3285  CallReg = createResultReg(&AArch64::GPR64RegClass);
3286  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3287  TII.get(AArch64::LDRXui), CallReg)
3288  .addReg(ADRPReg)
3289  .addSym(Symbol,
3291  } else if (Addr.getGlobalValue())
3292  CallReg = materializeGV(Addr.getGlobalValue());
3293  else if (Addr.getReg())
3294  CallReg = Addr.getReg();
3295 
3296  if (!CallReg)
3297  return false;
3298 
3299  const MCInstrDesc &II = TII.get(AArch64::BLR);
3300  CallReg = constrainOperandRegClass(II, CallReg, 0);
3301  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3302  }
3303 
3304  // Add implicit physical register uses to the call.
3305  for (auto Reg : CLI.OutRegs)
3306  MIB.addReg(Reg, RegState::Implicit);
3307 
3308  // Add a register mask with the call-preserved registers.
3309  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3310  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3311 
3312  CLI.Call = MIB;
3313 
3314  // Finish off the call including any return values.
3315  return finishCall(CLI, RetVT, NumBytes);
3316 }
3317 
3318 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3319  if (Alignment)
3320  return Len / Alignment <= 4;
3321  else
3322  return Len < 32;
3323 }
3324 
3325 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3326  uint64_t Len, unsigned Alignment) {
3327  // Make sure we don't bloat code by inlining very large memcpy's.
3328  if (!isMemCpySmall(Len, Alignment))
3329  return false;
3330 
3331  int64_t UnscaledOffset = 0;
3332  Address OrigDest = Dest;
3333  Address OrigSrc = Src;
3334 
3335  while (Len) {
3336  MVT VT;
3337  if (!Alignment || Alignment >= 8) {
3338  if (Len >= 8)
3339  VT = MVT::i64;
3340  else if (Len >= 4)
3341  VT = MVT::i32;
3342  else if (Len >= 2)
3343  VT = MVT::i16;
3344  else {
3345  VT = MVT::i8;
3346  }
3347  } else {
3348  // Bound based on alignment.
3349  if (Len >= 4 && Alignment == 4)
3350  VT = MVT::i32;
3351  else if (Len >= 2 && Alignment == 2)
3352  VT = MVT::i16;
3353  else {
3354  VT = MVT::i8;
3355  }
3356  }
3357 
3358  unsigned ResultReg = emitLoad(VT, VT, Src);
3359  if (!ResultReg)
3360  return false;
3361 
3362  if (!emitStore(VT, ResultReg, Dest))
3363  return false;
3364 
3365  int64_t Size = VT.getSizeInBits() / 8;
3366  Len -= Size;
3367  UnscaledOffset += Size;
3368 
3369  // We need to recompute the unscaled offset for each iteration.
3370  Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3371  Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3372  }
3373 
3374  return true;
3375 }
3376 
3377 /// Check if it is possible to fold the condition from the XALU intrinsic
3378 /// into the user. The condition code will only be updated on success.
3379 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3380  const Instruction *I,
3381  const Value *Cond) {
3382  if (!isa<ExtractValueInst>(Cond))
3383  return false;
3384 
3385  const auto *EV = cast<ExtractValueInst>(Cond);
3386  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3387  return false;
3388 
3389  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3390  MVT RetVT;
3391  const Function *Callee = II->getCalledFunction();
3392  Type *RetTy =
3393  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3394  if (!isTypeLegal(RetTy, RetVT))
3395  return false;
3396 
3397  if (RetVT != MVT::i32 && RetVT != MVT::i64)
3398  return false;
3399 
3400  const Value *LHS = II->getArgOperand(0);
3401  const Value *RHS = II->getArgOperand(1);
3402 
3403  // Canonicalize immediate to the RHS.
3404  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3405  isCommutativeIntrinsic(II))
3406  std::swap(LHS, RHS);
3407 
3408  // Simplify multiplies.
3409  Intrinsic::ID IID = II->getIntrinsicID();
3410  switch (IID) {
3411  default:
3412  break;
3413  case Intrinsic::smul_with_overflow:
3414  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3415  if (C->getValue() == 2)
3416  IID = Intrinsic::sadd_with_overflow;
3417  break;
3418  case Intrinsic::umul_with_overflow:
3419  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3420  if (C->getValue() == 2)
3421  IID = Intrinsic::uadd_with_overflow;
3422  break;
3423  }
3424 
3425  AArch64CC::CondCode TmpCC;
3426  switch (IID) {
3427  default:
3428  return false;
3429  case Intrinsic::sadd_with_overflow:
3430  case Intrinsic::ssub_with_overflow:
3431  TmpCC = AArch64CC::VS;
3432  break;
3433  case Intrinsic::uadd_with_overflow:
3434  TmpCC = AArch64CC::HS;
3435  break;
3436  case Intrinsic::usub_with_overflow:
3437  TmpCC = AArch64CC::LO;
3438  break;
3439  case Intrinsic::smul_with_overflow:
3440  case Intrinsic::umul_with_overflow:
3441  TmpCC = AArch64CC::NE;
3442  break;
3443  }
3444 
3445  // Check if both instructions are in the same basic block.
3446  if (!isValueAvailable(II))
3447  return false;
3448 
3449  // Make sure nothing is in the way
3450  BasicBlock::const_iterator Start(I);
3452  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3453  // We only expect extractvalue instructions between the intrinsic and the
3454  // instruction to be selected.
3455  if (!isa<ExtractValueInst>(Itr))
3456  return false;
3457 
3458  // Check that the extractvalue operand comes from the intrinsic.
3459  const auto *EVI = cast<ExtractValueInst>(Itr);
3460  if (EVI->getAggregateOperand() != II)
3461  return false;
3462  }
3463 
3464  CC = TmpCC;
3465  return true;
3466 }
3467 
3468 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3469  // FIXME: Handle more intrinsics.
3470  switch (II->getIntrinsicID()) {
3471  default: return false;
3472  case Intrinsic::frameaddress: {
3473  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3474  MFI.setFrameAddressIsTaken(true);
3475 
3476  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3477  Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3478  Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3479  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3480  TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3481  // Recursively load frame address
3482  // ldr x0, [fp]
3483  // ldr x0, [x0]
3484  // ldr x0, [x0]
3485  // ...
3486  unsigned DestReg;
3487  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3488  while (Depth--) {
3489  DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3490  SrcReg, /*IsKill=*/true, 0);
3491  assert(DestReg && "Unexpected LDR instruction emission failure.");
3492  SrcReg = DestReg;
3493  }
3494 
3495  updateValueMap(II, SrcReg);
3496  return true;
3497  }
3498  case Intrinsic::sponentry: {
3499  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3500 
3501  // SP = FP + Fixed Object + 16
3502  int FI = MFI.CreateFixedObject(4, 0, false);
3503  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3504  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3505  TII.get(AArch64::ADDXri), ResultReg)
3506  .addFrameIndex(FI)
3507  .addImm(0)
3508  .addImm(0);
3509 
3510  updateValueMap(II, ResultReg);
3511  return true;
3512  }
3513  case Intrinsic::memcpy:
3514  case Intrinsic::memmove: {
3515  const auto *MTI = cast<MemTransferInst>(II);
3516  // Don't handle volatile.
3517  if (MTI->isVolatile())
3518  return false;
3519 
3520  // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3521  // we would emit dead code because we don't currently handle memmoves.
3522  bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3523  if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3524  // Small memcpy's are common enough that we want to do them without a call
3525  // if possible.
3526  uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3527  unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3528  MTI->getSourceAlignment());
3529  if (isMemCpySmall(Len, Alignment)) {
3530  Address Dest, Src;
3531  if (!computeAddress(MTI->getRawDest(), Dest) ||
3532  !computeAddress(MTI->getRawSource(), Src))
3533  return false;
3534  if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3535  return true;
3536  }
3537  }
3538 
3539  if (!MTI->getLength()->getType()->isIntegerTy(64))
3540  return false;
3541 
3542  if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3543  // Fast instruction selection doesn't support the special
3544  // address spaces.
3545  return false;
3546 
3547  const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3548  return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3549  }
3550  case Intrinsic::memset: {
3551  const MemSetInst *MSI = cast<MemSetInst>(II);
3552  // Don't handle volatile.
3553  if (MSI->isVolatile())
3554  return false;
3555 
3556  if (!MSI->getLength()->getType()->isIntegerTy(64))
3557  return false;
3558 
3559  if (MSI->getDestAddressSpace() > 255)
3560  // Fast instruction selection doesn't support the special
3561  // address spaces.
3562  return false;
3563 
3564  return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3565  }
3566  case Intrinsic::sin:
3567  case Intrinsic::cos:
3568  case Intrinsic::pow: {
3569  MVT RetVT;
3570  if (!isTypeLegal(II->getType(), RetVT))
3571  return false;
3572 
3573  if (RetVT != MVT::f32 && RetVT != MVT::f64)
3574  return false;
3575 
3576  static const RTLIB::Libcall LibCallTable[3][2] = {
3577  { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3578  { RTLIB::COS_F32, RTLIB::COS_F64 },
3579  { RTLIB::POW_F32, RTLIB::POW_F64 }
3580  };
3581  RTLIB::Libcall LC;
3582  bool Is64Bit = RetVT == MVT::f64;
3583  switch (II->getIntrinsicID()) {
3584  default:
3585  llvm_unreachable("Unexpected intrinsic.");
3586  case Intrinsic::sin:
3587  LC = LibCallTable[0][Is64Bit];
3588  break;
3589  case Intrinsic::cos:
3590  LC = LibCallTable[1][Is64Bit];
3591  break;
3592  case Intrinsic::pow:
3593  LC = LibCallTable[2][Is64Bit];
3594  break;
3595  }
3596 
3597  ArgListTy Args;
3598  Args.reserve(II->getNumArgOperands());
3599 
3600  // Populate the argument list.
3601  for (auto &Arg : II->arg_operands()) {
3602  ArgListEntry Entry;
3603  Entry.Val = Arg;
3604  Entry.Ty = Arg->getType();
3605  Args.push_back(Entry);
3606  }
3607 
3608  CallLoweringInfo CLI;
3609  MCContext &Ctx = MF->getContext();
3610  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3611  TLI.getLibcallName(LC), std::move(Args));
3612  if (!lowerCallTo(CLI))
3613  return false;
3614  updateValueMap(II, CLI.ResultReg);
3615  return true;
3616  }
3617  case Intrinsic::fabs: {
3618  MVT VT;
3619  if (!isTypeLegal(II->getType(), VT))
3620  return false;
3621 
3622  unsigned Opc;
3623  switch (VT.SimpleTy) {
3624  default:
3625  return false;
3626  case MVT::f32:
3627  Opc = AArch64::FABSSr;
3628  break;
3629  case MVT::f64:
3630  Opc = AArch64::FABSDr;
3631  break;
3632  }
3633  unsigned SrcReg = getRegForValue(II->getOperand(0));
3634  if (!SrcReg)
3635  return false;
3636  bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3637  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3638  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3639  .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3640  updateValueMap(II, ResultReg);
3641  return true;
3642  }
3643  case Intrinsic::trap:
3644  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3645  .addImm(1);
3646  return true;
3647  case Intrinsic::debugtrap: {
3648  if (Subtarget->isTargetWindows()) {
3649  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3650  .addImm(0xF000);
3651  return true;
3652  }
3653  break;
3654  }
3655 
3656  case Intrinsic::sqrt: {
3657  Type *RetTy = II->getCalledFunction()->getReturnType();
3658 
3659  MVT VT;
3660  if (!isTypeLegal(RetTy, VT))
3661  return false;
3662 
3663  unsigned Op0Reg = getRegForValue(II->getOperand(0));
3664  if (!Op0Reg)
3665  return false;
3666  bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3667 
3668  unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3669  if (!ResultReg)
3670  return false;
3671 
3672  updateValueMap(II, ResultReg);
3673  return true;
3674  }
3675  case Intrinsic::sadd_with_overflow:
3676  case Intrinsic::uadd_with_overflow:
3677  case Intrinsic::ssub_with_overflow:
3678  case Intrinsic::usub_with_overflow:
3679  case Intrinsic::smul_with_overflow:
3680  case Intrinsic::umul_with_overflow: {
3681  // This implements the basic lowering of the xalu with overflow intrinsics.
3682  const Function *Callee = II->getCalledFunction();
3683  auto *Ty = cast<StructType>(Callee->getReturnType());
3684  Type *RetTy = Ty->getTypeAtIndex(0U);
3685 
3686  MVT VT;
3687  if (!isTypeLegal(RetTy, VT))
3688  return false;
3689 
3690  if (VT != MVT::i32 && VT != MVT::i64)
3691  return false;
3692 
3693  const Value *LHS = II->getArgOperand(0);
3694  const Value *RHS = II->getArgOperand(1);
3695  // Canonicalize immediate to the RHS.
3696  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3697  isCommutativeIntrinsic(II))
3698  std::swap(LHS, RHS);
3699 
3700  // Simplify multiplies.
3701  Intrinsic::ID IID = II->getIntrinsicID();
3702  switch (IID) {
3703  default:
3704  break;
3705  case Intrinsic::smul_with_overflow:
3706  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3707  if (C->getValue() == 2) {
3708  IID = Intrinsic::sadd_with_overflow;
3709  RHS = LHS;
3710  }
3711  break;
3712  case Intrinsic::umul_with_overflow:
3713  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3714  if (C->getValue() == 2) {
3715  IID = Intrinsic::uadd_with_overflow;
3716  RHS = LHS;
3717  }
3718  break;
3719  }
3720 
3721  unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3723  switch (IID) {
3724  default: llvm_unreachable("Unexpected intrinsic!");
3725  case Intrinsic::sadd_with_overflow:
3726  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3727  CC = AArch64CC::VS;
3728  break;
3729  case Intrinsic::uadd_with_overflow:
3730  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3731  CC = AArch64CC::HS;
3732  break;
3733  case Intrinsic::ssub_with_overflow:
3734  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3735  CC = AArch64CC::VS;
3736  break;
3737  case Intrinsic::usub_with_overflow:
3738  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3739  CC = AArch64CC::LO;
3740  break;
3741  case Intrinsic::smul_with_overflow: {
3742  CC = AArch64CC::NE;
3743  unsigned LHSReg = getRegForValue(LHS);
3744  if (!LHSReg)
3745  return false;
3746  bool LHSIsKill = hasTrivialKill(LHS);
3747 
3748  unsigned RHSReg = getRegForValue(RHS);
3749  if (!RHSReg)
3750  return false;
3751  bool RHSIsKill = hasTrivialKill(RHS);
3752 
3753  if (VT == MVT::i32) {
3754  MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3755  unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3756  /*IsKill=*/false, 32);
3757  MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3758  AArch64::sub_32);
3759  ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3760  AArch64::sub_32);
3761  emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3762  AArch64_AM::ASR, 31, /*WantResult=*/false);
3763  } else {
3764  assert(VT == MVT::i64 && "Unexpected value type.");
3765  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3766  // reused in the next instruction.
3767  MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3768  /*IsKill=*/false);
3769  unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3770  RHSReg, RHSIsKill);
3771  emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3772  AArch64_AM::ASR, 63, /*WantResult=*/false);
3773  }
3774  break;
3775  }
3776  case Intrinsic::umul_with_overflow: {
3777  CC = AArch64CC::NE;
3778  unsigned LHSReg = getRegForValue(LHS);
3779  if (!LHSReg)
3780  return false;
3781  bool LHSIsKill = hasTrivialKill(LHS);
3782 
3783  unsigned RHSReg = getRegForValue(RHS);
3784  if (!RHSReg)
3785  return false;
3786  bool RHSIsKill = hasTrivialKill(RHS);
3787 
3788  if (VT == MVT::i32) {
3789  MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3790  emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3791  /*IsKill=*/false, AArch64_AM::LSR, 32,
3792  /*WantResult=*/false);
3793  MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3794  AArch64::sub_32);
3795  } else {
3796  assert(VT == MVT::i64 && "Unexpected value type.");
3797  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3798  // reused in the next instruction.
3799  MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3800  /*IsKill=*/false);
3801  unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3802  RHSReg, RHSIsKill);
3803  emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3804  /*IsKill=*/false, /*WantResult=*/false);
3805  }
3806  break;
3807  }
3808  }
3809 
3810  if (MulReg) {
3811  ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3812  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3813  TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3814  }
3815 
3816  if (!ResultReg1)
3817  return false;
3818 
3819  ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3820  AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3821  /*IsKill=*/true, getInvertedCondCode(CC));
3822  (void)ResultReg2;
3823  assert((ResultReg1 + 1) == ResultReg2 &&
3824  "Nonconsecutive result registers.");
3825  updateValueMap(II, ResultReg1, 2);
3826  return true;
3827  }
3828  }
3829  return false;
3830 }
3831 
3832 bool AArch64FastISel::selectRet(const Instruction *I) {
3833  const ReturnInst *Ret = cast<ReturnInst>(I);
3834  const Function &F = *I->getParent()->getParent();
3835 
3836  if (!FuncInfo.CanLowerReturn)
3837  return false;
3838 
3839  // FIXME: in principle it could. Mostly just a case of zero extending outgoing
3840  // pointers.
3841  if (Subtarget->isTargetILP32())
3842  return false;
3843 
3844  if (F.isVarArg())
3845  return false;
3846 
3847  if (TLI.supportSwiftError() &&
3848  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3849  return false;
3850 
3851  if (TLI.supportSplitCSR(FuncInfo.MF))
3852  return false;
3853 
3854  // Build a list of return value registers.
3855  SmallVector<unsigned, 4> RetRegs;
3856 
3857  if (Ret->getNumOperands() > 0) {
3858  CallingConv::ID CC = F.getCallingConv();
3860  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3861 
3862  // Analyze operands of the call, assigning locations to each operand.
3864  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3867  CCInfo.AnalyzeReturn(Outs, RetCC);
3868 
3869  // Only handle a single return value for now.
3870  if (ValLocs.size() != 1)
3871  return false;
3872 
3873  CCValAssign &VA = ValLocs[0];
3874  const Value *RV = Ret->getOperand(0);
3875 
3876  // Don't bother handling odd stuff for now.
3877  if ((VA.getLocInfo() != CCValAssign::Full) &&
3878  (VA.getLocInfo() != CCValAssign::BCvt))
3879  return false;
3880 
3881  // Only handle register returns for now.
3882  if (!VA.isRegLoc())
3883  return false;
3884 
3885  unsigned Reg = getRegForValue(RV);
3886  if (Reg == 0)
3887  return false;
3888 
3889  unsigned SrcReg = Reg + VA.getValNo();
3890  Register DestReg = VA.getLocReg();
3891  // Avoid a cross-class copy. This is very unlikely.
3892  if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3893  return false;
3894 
3895  EVT RVEVT = TLI.getValueType(DL, RV->getType());
3896  if (!RVEVT.isSimple())
3897  return false;
3898 
3899  // Vectors (of > 1 lane) in big endian need tricky handling.
3900  if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3901  !Subtarget->isLittleEndian())
3902  return false;
3903 
3904  MVT RVVT = RVEVT.getSimpleVT();
3905  if (RVVT == MVT::f128)
3906  return false;
3907 
3908  MVT DestVT = VA.getValVT();
3909  // Special handling for extended integers.
3910  if (RVVT != DestVT) {
3911  if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3912  return false;
3913 
3914  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3915  return false;
3916 
3917  bool IsZExt = Outs[0].Flags.isZExt();
3918  SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3919  if (SrcReg == 0)
3920  return false;
3921  }
3922 
3923  // Make the copy.
3924  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3925  TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3926 
3927  // Add register to return instruction.
3928  RetRegs.push_back(VA.getLocReg());
3929  }
3930 
3931  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3932  TII.get(AArch64::RET_ReallyLR));
3933  for (unsigned RetReg : RetRegs)
3934  MIB.addReg(RetReg, RegState::Implicit);
3935  return true;
3936 }
3937 
3938 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3939  Type *DestTy = I->getType();
3940  Value *Op = I->getOperand(0);
3941  Type *SrcTy = Op->getType();
3942 
3943  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3944  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3945  if (!SrcEVT.isSimple())
3946  return false;
3947  if (!DestEVT.isSimple())
3948  return false;
3949 
3950  MVT SrcVT = SrcEVT.getSimpleVT();
3951  MVT DestVT = DestEVT.getSimpleVT();
3952 
3953  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3954  SrcVT != MVT::i8)
3955  return false;
3956  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3957  DestVT != MVT::i1)
3958  return false;
3959 
3960  unsigned SrcReg = getRegForValue(Op);
3961  if (!SrcReg)
3962  return false;
3963  bool SrcIsKill = hasTrivialKill(Op);
3964 
3965  // If we're truncating from i64 to a smaller non-legal type then generate an
3966  // AND. Otherwise, we know the high bits are undefined and a truncate only
3967  // generate a COPY. We cannot mark the source register also as result
3968  // register, because this can incorrectly transfer the kill flag onto the
3969  // source register.
3970  unsigned ResultReg;
3971  if (SrcVT == MVT::i64) {
3972  uint64_t Mask = 0;
3973  switch (DestVT.SimpleTy) {
3974  default:
3975  // Trunc i64 to i32 is handled by the target-independent fast-isel.
3976  return false;
3977  case MVT::i1:
3978  Mask = 0x1;
3979  break;
3980  case MVT::i8:
3981  Mask = 0xff;
3982  break;
3983  case MVT::i16:
3984  Mask = 0xffff;
3985  break;
3986  }
3987  // Issue an extract_subreg to get the lower 32-bits.
3988  unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3989  AArch64::sub_32);
3990  // Create the AND instruction which performs the actual truncation.
3991  ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3992  assert(ResultReg && "Unexpected AND instruction emission failure.");
3993  } else {
3994  ResultReg = createResultReg(&AArch64::GPR32RegClass);
3995  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3996  TII.get(TargetOpcode::COPY), ResultReg)
3997  .addReg(SrcReg, getKillRegState(SrcIsKill));
3998  }
3999 
4000  updateValueMap(I, ResultReg);
4001  return true;
4002 }
4003 
4004 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
4005  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4006  DestVT == MVT::i64) &&
4007  "Unexpected value type.");
4008  // Handle i8 and i16 as i32.
4009  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4010  DestVT = MVT::i32;
4011 
4012  if (IsZExt) {
4013  unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
4014  assert(ResultReg && "Unexpected AND instruction emission failure.");
4015  if (DestVT == MVT::i64) {
4016  // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4017  // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4018  Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4019  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4020  TII.get(AArch64::SUBREG_TO_REG), Reg64)
4021  .addImm(0)
4022  .addReg(ResultReg)
4023  .addImm(AArch64::sub_32);
4024  ResultReg = Reg64;
4025  }
4026  return ResultReg;
4027  } else {
4028  if (DestVT == MVT::i64) {
4029  // FIXME: We're SExt i1 to i64.
4030  return 0;
4031  }
4032  return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4033  /*TODO:IsKill=*/false, 0, 0);
4034  }
4035 }
4036 
4037 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4038  unsigned Op1, bool Op1IsKill) {
4039  unsigned Opc, ZReg;
4040  switch (RetVT.SimpleTy) {
4041  default: return 0;
4042  case MVT::i8:
4043  case MVT::i16:
4044  case MVT::i32:
4045  RetVT = MVT::i32;
4046  Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4047  case MVT::i64:
4048  Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4049  }
4050 
4051  const TargetRegisterClass *RC =
4052  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4053  return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
4054  /*IsKill=*/ZReg, true);
4055 }
4056 
4057 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4058  unsigned Op1, bool Op1IsKill) {
4059  if (RetVT != MVT::i64)
4060  return 0;
4061 
4062  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4063  Op0, Op0IsKill, Op1, Op1IsKill,
4064  AArch64::XZR, /*IsKill=*/true);
4065 }
4066 
4067 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4068  unsigned Op1, bool Op1IsKill) {
4069  if (RetVT != MVT::i64)
4070  return 0;
4071 
4072  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4073  Op0, Op0IsKill, Op1, Op1IsKill,
4074  AArch64::XZR, /*IsKill=*/true);
4075 }
4076 
4077 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4078  unsigned Op1Reg, bool Op1IsKill) {
4079  unsigned Opc = 0;
4080  bool NeedTrunc = false;
4081  uint64_t Mask = 0;
4082  switch (RetVT.SimpleTy) {
4083  default: return 0;
4084  case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4085  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4086  case MVT::i32: Opc = AArch64::LSLVWr; break;
4087  case MVT::i64: Opc = AArch64::LSLVXr; break;
4088  }
4089 
4090  const TargetRegisterClass *RC =
4091  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4092  if (NeedTrunc) {
4093  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4094  Op1IsKill = true;
4095  }
4096  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4097  Op1IsKill);
4098  if (NeedTrunc)
4099  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4100  return ResultReg;
4101 }
4102 
4103 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4104  bool Op0IsKill, uint64_t Shift,
4105  bool IsZExt) {
4106  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4107  "Unexpected source/return type pair.");
4108  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4109  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4110  "Unexpected source value type.");
4111  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4112  RetVT == MVT::i64) && "Unexpected return value type.");
4113 
4114  bool Is64Bit = (RetVT == MVT::i64);
4115  unsigned RegSize = Is64Bit ? 64 : 32;
4116  unsigned DstBits = RetVT.getSizeInBits();
4117  unsigned SrcBits = SrcVT.getSizeInBits();
4118  const TargetRegisterClass *RC =
4119  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4120 
4121  // Just emit a copy for "zero" shifts.
4122  if (Shift == 0) {
4123  if (RetVT == SrcVT) {
4124  unsigned ResultReg = createResultReg(RC);
4125  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4126  TII.get(TargetOpcode::COPY), ResultReg)
4127  .addReg(Op0, getKillRegState(Op0IsKill));
4128  return ResultReg;
4129  } else
4130  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4131  }
4132 
4133  // Don't deal with undefined shifts.
4134  if (Shift >= DstBits)
4135  return 0;
4136 
4137  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4138  // {S|U}BFM Wd, Wn, #r, #s
4139  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4140 
4141  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4142  // %2 = shl i16 %1, 4
4143  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4144  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4145  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4146  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4147 
4148  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4149  // %2 = shl i16 %1, 8
4150  // Wd<32+7-24,32-24> = Wn<7:0>
4151  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4152  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4153  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4154 
4155  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4156  // %2 = shl i16 %1, 12
4157  // Wd<32+3-20,32-20> = Wn<3:0>
4158  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4159  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4160  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4161 
4162  unsigned ImmR = RegSize - Shift;
4163  // Limit the width to the length of the source type.
4164  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4165  static const unsigned OpcTable[2][2] = {
4166  {AArch64::SBFMWri, AArch64::SBFMXri},
4167  {AArch64::UBFMWri, AArch64::UBFMXri}
4168  };
4169  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4170  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4171  Register TmpReg = MRI.createVirtualRegister(RC);
4172  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4173  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4174  .addImm(0)
4175  .addReg(Op0, getKillRegState(Op0IsKill))
4176  .addImm(AArch64::sub_32);
4177  Op0 = TmpReg;
4178  Op0IsKill = true;
4179  }
4180  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4181 }
4182 
4183 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4184  unsigned Op1Reg, bool Op1IsKill) {
4185  unsigned Opc = 0;
4186  bool NeedTrunc = false;
4187  uint64_t Mask = 0;
4188  switch (RetVT.SimpleTy) {
4189  default: return 0;
4190  case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4191  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4192  case MVT::i32: Opc = AArch64::LSRVWr; break;
4193  case MVT::i64: Opc = AArch64::LSRVXr; break;
4194  }
4195 
4196  const TargetRegisterClass *RC =
4197  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4198  if (NeedTrunc) {
4199  Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4200  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4201  Op0IsKill = Op1IsKill = true;
4202  }
4203  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4204  Op1IsKill);
4205  if (NeedTrunc)
4206  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4207  return ResultReg;
4208 }
4209 
4210 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4211  bool Op0IsKill, uint64_t Shift,
4212  bool IsZExt) {
4213  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4214  "Unexpected source/return type pair.");
4215  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4216  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4217  "Unexpected source value type.");
4218  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4219  RetVT == MVT::i64) && "Unexpected return value type.");
4220 
4221  bool Is64Bit = (RetVT == MVT::i64);
4222  unsigned RegSize = Is64Bit ? 64 : 32;
4223  unsigned DstBits = RetVT.getSizeInBits();
4224  unsigned SrcBits = SrcVT.getSizeInBits();
4225  const TargetRegisterClass *RC =
4226  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4227 
4228  // Just emit a copy for "zero" shifts.
4229  if (Shift == 0) {
4230  if (RetVT == SrcVT) {
4231  unsigned ResultReg = createResultReg(RC);
4232  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4233  TII.get(TargetOpcode::COPY), ResultReg)
4234  .addReg(Op0, getKillRegState(Op0IsKill));
4235  return ResultReg;
4236  } else
4237  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4238  }
4239 
4240  // Don't deal with undefined shifts.
4241  if (Shift >= DstBits)
4242  return 0;
4243 
4244  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4245  // {S|U}BFM Wd, Wn, #r, #s
4246  // Wd<s-r:0> = Wn<s:r> when r <= s
4247 
4248  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4249  // %2 = lshr i16 %1, 4
4250  // Wd<7-4:0> = Wn<7:4>
4251  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4252  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4253  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4254 
4255  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4256  // %2 = lshr i16 %1, 8
4257  // Wd<7-7,0> = Wn<7:7>
4258  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4259  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4260  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4261 
4262  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4263  // %2 = lshr i16 %1, 12
4264  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4265  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4266  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4267  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4268 
4269  if (Shift >= SrcBits && IsZExt)
4270  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4271 
4272  // It is not possible to fold a sign-extend into the LShr instruction. In this
4273  // case emit a sign-extend.
4274  if (!IsZExt) {
4275  Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4276  if (!Op0)
4277  return 0;
4278  Op0IsKill = true;
4279  SrcVT = RetVT;
4280  SrcBits = SrcVT.getSizeInBits();
4281  IsZExt = true;
4282  }
4283 
4284  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4285  unsigned ImmS = SrcBits - 1;
4286  static const unsigned OpcTable[2][2] = {
4287  {AArch64::SBFMWri, AArch64::SBFMXri},
4288  {AArch64::UBFMWri, AArch64::UBFMXri}
4289  };
4290  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4291  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4292  Register TmpReg = MRI.createVirtualRegister(RC);
4293  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4294  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4295  .addImm(0)
4296  .addReg(Op0, getKillRegState(Op0IsKill))
4297  .addImm(AArch64::sub_32);
4298  Op0 = TmpReg;
4299  Op0IsKill = true;
4300  }
4301  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4302 }
4303 
4304 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4305  unsigned Op1Reg, bool Op1IsKill) {
4306  unsigned Opc = 0;
4307  bool NeedTrunc = false;
4308  uint64_t Mask = 0;
4309  switch (RetVT.SimpleTy) {
4310  default: return 0;
4311  case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4312  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4313  case MVT::i32: Opc = AArch64::ASRVWr; break;
4314  case MVT::i64: Opc = AArch64::ASRVXr; break;
4315  }
4316 
4317  const TargetRegisterClass *RC =
4318  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4319  if (NeedTrunc) {
4320  Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4321  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4322  Op0IsKill = Op1IsKill = true;
4323  }
4324  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4325  Op1IsKill);
4326  if (NeedTrunc)
4327  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4328  return ResultReg;
4329 }
4330 
4331 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4332  bool Op0IsKill, uint64_t Shift,
4333  bool IsZExt) {
4334  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4335  "Unexpected source/return type pair.");
4336  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4337  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4338  "Unexpected source value type.");
4339  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4340  RetVT == MVT::i64) && "Unexpected return value type.");
4341 
4342  bool Is64Bit = (RetVT == MVT::i64);
4343  unsigned RegSize = Is64Bit ? 64 : 32;
4344  unsigned DstBits = RetVT.getSizeInBits();
4345  unsigned SrcBits = SrcVT.getSizeInBits();
4346  const TargetRegisterClass *RC =
4347  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4348 
4349  // Just emit a copy for "zero" shifts.
4350  if (Shift == 0) {
4351  if (RetVT == SrcVT) {
4352  unsigned ResultReg = createResultReg(RC);
4353  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4354  TII.get(TargetOpcode::COPY), ResultReg)
4355  .addReg(Op0, getKillRegState(Op0IsKill));
4356  return ResultReg;
4357  } else
4358  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4359  }
4360 
4361  // Don't deal with undefined shifts.
4362  if (Shift >= DstBits)
4363  return 0;
4364 
4365  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4366  // {S|U}BFM Wd, Wn, #r, #s
4367  // Wd<s-r:0> = Wn<s:r> when r <= s
4368 
4369  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4370  // %2 = ashr i16 %1, 4
4371  // Wd<7-4:0> = Wn<7:4>
4372  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4373  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4374  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4375 
4376  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4377  // %2 = ashr i16 %1, 8
4378  // Wd<7-7,0> = Wn<7:7>
4379  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4380  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4381  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4382 
4383  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4384  // %2 = ashr i16 %1, 12
4385  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4386  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4387  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4388  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4389 
4390  if (Shift >= SrcBits && IsZExt)
4391  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4392 
4393  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4394  unsigned ImmS = SrcBits - 1;
4395  static const unsigned OpcTable[2][2] = {
4396  {AArch64::SBFMWri, AArch64::SBFMXri},
4397  {AArch64::UBFMWri, AArch64::UBFMXri}
4398  };
4399  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4400  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4401  Register TmpReg = MRI.createVirtualRegister(RC);
4402  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4403  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4404  .addImm(0)
4405  .addReg(Op0, getKillRegState(Op0IsKill))
4406  .addImm(AArch64::sub_32);
4407  Op0 = TmpReg;
4408  Op0IsKill = true;
4409  }
4410  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4411 }
4412 
4413 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4414  bool IsZExt) {
4415  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4416 
4417  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4418  // DestVT are odd things, so test to make sure that they are both types we can
4419  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4420  // bail out to SelectionDAG.
4421  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4422  (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4423  ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4424  (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4425  return 0;
4426 
4427  unsigned Opc;
4428  unsigned Imm = 0;
4429 
4430  switch (SrcVT.SimpleTy) {
4431  default:
4432  return 0;
4433  case MVT::i1:
4434  return emiti1Ext(SrcReg, DestVT, IsZExt);
4435  case MVT::i8:
4436  if (DestVT == MVT::i64)
4437  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4438  else
4439  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4440  Imm = 7;
4441  break;
4442  case MVT::i16:
4443  if (DestVT == MVT::i64)
4444  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4445  else
4446  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4447  Imm = 15;
4448  break;
4449  case MVT::i32:
4450  assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4451  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4452  Imm = 31;
4453  break;
4454  }
4455 
4456  // Handle i8 and i16 as i32.
4457  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4458  DestVT = MVT::i32;
4459  else if (DestVT == MVT::i64) {
4460  Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4461  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4462  TII.get(AArch64::SUBREG_TO_REG), Src64)
4463  .addImm(0)
4464  .addReg(SrcReg)
4465  .addImm(AArch64::sub_32);
4466  SrcReg = Src64;
4467  }
4468 
4469  const TargetRegisterClass *RC =
4470  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4471  return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4472 }
4473 
4474 static bool isZExtLoad(const MachineInstr *LI) {
4475  switch (LI->getOpcode()) {
4476  default:
4477  return false;
4478  case AArch64::LDURBBi:
4479  case AArch64::LDURHHi:
4480  case AArch64::LDURWi:
4481  case AArch64::LDRBBui:
4482  case AArch64::LDRHHui:
4483  case AArch64::LDRWui:
4484  case AArch64::LDRBBroX:
4485  case AArch64::LDRHHroX:
4486  case AArch64::LDRWroX:
4487  case AArch64::LDRBBroW:
4488  case AArch64::LDRHHroW:
4489  case AArch64::LDRWroW:
4490  return true;
4491  }
4492 }
4493 
4494 static bool isSExtLoad(const MachineInstr *LI) {
4495  switch (LI->getOpcode()) {
4496  default:
4497  return false;
4498  case AArch64::LDURSBWi:
4499  case AArch64::LDURSHWi:
4500  case AArch64::LDURSBXi:
4501  case AArch64::LDURSHXi:
4502  case AArch64::LDURSWi:
4503  case AArch64::LDRSBWui:
4504  case AArch64::LDRSHWui:
4505  case AArch64::LDRSBXui:
4506  case AArch64::LDRSHXui:
4507  case AArch64::LDRSWui:
4508  case AArch64::LDRSBWroX:
4509  case AArch64::LDRSHWroX:
4510  case AArch64::LDRSBXroX:
4511  case AArch64::LDRSHXroX:
4512  case AArch64::LDRSWroX:
4513  case AArch64::LDRSBWroW:
4514  case AArch64::LDRSHWroW:
4515  case AArch64::LDRSBXroW:
4516  case AArch64::LDRSHXroW:
4517  case AArch64::LDRSWroW:
4518  return true;
4519  }
4520 }
4521 
4522 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4523  MVT SrcVT) {
4524  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4525  if (!LI || !LI->hasOneUse())
4526  return false;
4527 
4528  // Check if the load instruction has already been selected.
4529  unsigned Reg = lookUpRegForValue(LI);
4530  if (!Reg)
4531  return false;
4532 
4533  MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4534  if (!MI)
4535  return false;
4536 
4537  // Check if the correct load instruction has been emitted - SelectionDAG might
4538  // have emitted a zero-extending load, but we need a sign-extending load.
4539  bool IsZExt = isa<ZExtInst>(I);
4540  const auto *LoadMI = MI;
4541  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4542  LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4543  Register LoadReg = MI->getOperand(1).getReg();
4544  LoadMI = MRI.getUniqueVRegDef(LoadReg);
4545  assert(LoadMI && "Expected valid instruction");
4546  }
4547  if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4548  return false;
4549 
4550  // Nothing to be done.
4551  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4552  updateValueMap(I, Reg);
4553  return true;
4554  }
4555 
4556  if (IsZExt) {
4557  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4558  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4559  TII.get(AArch64::SUBREG_TO_REG), Reg64)
4560  .addImm(0)
4561  .addReg(Reg, getKillRegState(true))
4562  .addImm(AArch64::sub_32);
4563  Reg = Reg64;
4564  } else {
4565  assert((MI->getOpcode() == TargetOpcode::COPY &&
4566  MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4567  "Expected copy instruction");
4568  Reg = MI->getOperand(1).getReg();
4570  removeDeadCode(I, std::next(I));
4571  }
4572  updateValueMap(I, Reg);
4573  return true;
4574 }
4575 
4576 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4577  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4578  "Unexpected integer extend instruction.");
4579  MVT RetVT;
4580  MVT SrcVT;
4581  if (!isTypeSupported(I->getType(), RetVT))
4582  return false;
4583 
4584  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4585  return false;
4586 
4587  // Try to optimize already sign-/zero-extended values from load instructions.
4588  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4589  return true;
4590 
4591  unsigned SrcReg = getRegForValue(I->getOperand(0));
4592  if (!SrcReg)
4593  return false;
4594  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4595 
4596  // Try to optimize already sign-/zero-extended values from function arguments.
4597  bool IsZExt = isa<ZExtInst>(I);
4598  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4599  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4600  if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4601  unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4602  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4603  TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4604  .addImm(0)
4605  .addReg(SrcReg, getKillRegState(SrcIsKill))
4606  .addImm(AArch64::sub_32);
4607  SrcReg = ResultReg;
4608  }
4609  // Conservatively clear all kill flags from all uses, because we are
4610  // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4611  // level. The result of the instruction at IR level might have been
4612  // trivially dead, which is now not longer true.
4613  unsigned UseReg = lookUpRegForValue(I);
4614  if (UseReg)
4615  MRI.clearKillFlags(UseReg);
4616 
4617  updateValueMap(I, SrcReg);
4618  return true;
4619  }
4620  }
4621 
4622  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4623  if (!ResultReg)
4624  return false;
4625 
4626  updateValueMap(I, ResultReg);
4627  return true;
4628 }
4629 
4630 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4631  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4632  if (!DestEVT.isSimple())
4633  return false;
4634 
4635  MVT DestVT = DestEVT.getSimpleVT();
4636  if (DestVT != MVT::i64 && DestVT != MVT::i32)
4637  return false;
4638 
4639  unsigned DivOpc;
4640  bool Is64bit = (DestVT == MVT::i64);
4641  switch (ISDOpcode) {
4642  default:
4643  return false;
4644  case ISD::SREM:
4645  DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4646  break;
4647  case ISD::UREM:
4648  DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4649  break;
4650  }
4651  unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4652  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4653  if (!Src0Reg)
4654  return false;
4655  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4656 
4657  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4658  if (!Src1Reg)
4659  return false;
4660  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4661 
4662  const TargetRegisterClass *RC =
4663  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4664  unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4665  Src1Reg, /*IsKill=*/false);
4666  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4667  // The remainder is computed as numerator - (quotient * denominator) using the
4668  // MSUB instruction.
4669  unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4670  Src1Reg, Src1IsKill, Src0Reg,
4671  Src0IsKill);
4672  updateValueMap(I, ResultReg);
4673  return true;
4674 }
4675 
4676 bool AArch64FastISel::selectMul(const Instruction *I) {
4677  MVT VT;
4678  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4679  return false;
4680 
4681  if (VT.isVector())
4682  return selectBinaryOp(I, ISD::MUL);
4683 
4684  const Value *Src0 = I->getOperand(0);
4685  const Value *Src1 = I->getOperand(1);
4686  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4687  if (C->getValue().isPowerOf2())
4688  std::swap(Src0, Src1);
4689 
4690  // Try to simplify to a shift instruction.
4691  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4692  if (C->getValue().isPowerOf2()) {
4693  uint64_t ShiftVal = C->getValue().logBase2();
4694  MVT SrcVT = VT;
4695  bool IsZExt = true;
4696  if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4697  if (!isIntExtFree(ZExt)) {
4698  MVT VT;
4699  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4700  SrcVT = VT;
4701  IsZExt = true;
4702  Src0 = ZExt->getOperand(0);
4703  }
4704  }
4705  } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4706  if (!isIntExtFree(SExt)) {
4707  MVT VT;
4708  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4709  SrcVT = VT;
4710  IsZExt = false;
4711  Src0 = SExt->getOperand(0);
4712  }
4713  }
4714  }
4715 
4716  unsigned Src0Reg = getRegForValue(Src0);
4717  if (!Src0Reg)
4718  return false;
4719  bool Src0IsKill = hasTrivialKill(Src0);
4720 
4721  unsigned ResultReg =
4722  emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4723 
4724  if (ResultReg) {
4725  updateValueMap(I, ResultReg);
4726  return true;
4727  }
4728  }
4729 
4730  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4731  if (!Src0Reg)
4732  return false;
4733  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4734 
4735  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4736  if (!Src1Reg)
4737  return false;
4738  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4739 
4740  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4741 
4742  if (!ResultReg)
4743  return false;
4744 
4745  updateValueMap(I, ResultReg);
4746  return true;
4747 }
4748 
4749 bool AArch64FastISel::selectShift(const Instruction *I) {
4750  MVT RetVT;
4751  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4752  return false;
4753 
4754  if (RetVT.isVector())
4755  return selectOperator(I, I->getOpcode());
4756 
4757  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4758  unsigned ResultReg = 0;
4759  uint64_t ShiftVal = C->getZExtValue();
4760  MVT SrcVT = RetVT;
4761  bool IsZExt = I->getOpcode() != Instruction::AShr;
4762  const Value *Op0 = I->getOperand(0);
4763  if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4764  if (!isIntExtFree(ZExt)) {
4765  MVT TmpVT;
4766  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4767  SrcVT = TmpVT;
4768  IsZExt = true;
4769  Op0 = ZExt->getOperand(0);
4770  }
4771  }
4772  } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4773  if (!isIntExtFree(SExt)) {
4774  MVT TmpVT;
4775  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4776  SrcVT = TmpVT;
4777  IsZExt = false;
4778  Op0 = SExt->getOperand(0);
4779  }
4780  }
4781  }
4782 
4783  unsigned Op0Reg = getRegForValue(Op0);
4784  if (!Op0Reg)
4785  return false;
4786  bool Op0IsKill = hasTrivialKill(Op0);
4787 
4788  switch (I->getOpcode()) {
4789  default: llvm_unreachable("Unexpected instruction.");
4790  case Instruction::Shl:
4791  ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4792  break;
4793  case Instruction::AShr:
4794  ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4795  break;
4796  case Instruction::LShr:
4797  ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4798  break;
4799  }
4800  if (!ResultReg)
4801  return false;
4802 
4803  updateValueMap(I, ResultReg);
4804  return true;
4805  }
4806 
4807  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4808  if (!Op0Reg)
4809  return false;
4810  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4811 
4812  unsigned Op1Reg = getRegForValue(I->getOperand(1));
4813  if (!Op1Reg)
4814  return false;
4815  bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4816 
4817  unsigned ResultReg = 0;
4818  switch (I->getOpcode()) {
4819  default: llvm_unreachable("Unexpected instruction.");
4820  case Instruction::Shl:
4821  ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4822  break;
4823  case Instruction::AShr:
4824  ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4825  break;
4826  case Instruction::LShr:
4827  ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4828  break;
4829  }
4830 
4831  if (!ResultReg)
4832  return false;
4833 
4834  updateValueMap(I, ResultReg);
4835  return true;
4836 }
4837 
4838 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4839  MVT RetVT, SrcVT;
4840 
4841  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4842  return false;
4843  if (!isTypeLegal(I->getType(), RetVT))
4844  return false;
4845 
4846  unsigned Opc;
4847  if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4848  Opc = AArch64::FMOVWSr;
4849  else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4850  Opc = AArch64::FMOVXDr;
4851  else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4852  Opc = AArch64::FMOVSWr;
4853  else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4854  Opc = AArch64::FMOVDXr;
4855  else
4856  return false;
4857 
4858  const TargetRegisterClass *RC = nullptr;
4859  switch (RetVT.SimpleTy) {
4860  default: llvm_unreachable("Unexpected value type.");
4861  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4862  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4863  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4864  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4865  }
4866  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4867  if (!Op0Reg)
4868  return false;
4869  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4870  unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4871 
4872  if (!ResultReg)
4873  return false;
4874 
4875  updateValueMap(I, ResultReg);
4876  return true;
4877 }
4878 
4879 bool AArch64FastISel::selectFRem(const Instruction *I) {
4880  MVT RetVT;
4881  if (!isTypeLegal(I->getType(), RetVT))
4882  return false;
4883 
4884  RTLIB::Libcall LC;
4885  switch (RetVT.SimpleTy) {
4886  default:
4887  return false;
4888  case MVT::f32:
4889  LC = RTLIB::REM_F32;
4890  break;
4891  case MVT::f64:
4892  LC = RTLIB::REM_F64;
4893  break;
4894  }
4895 
4896  ArgListTy Args;
4897  Args.reserve(I->getNumOperands());
4898 
4899  // Populate the argument list.
4900  for (auto &Arg : I->operands()) {
4901  ArgListEntry Entry;
4902  Entry.Val = Arg;
4903  Entry.Ty = Arg->getType();
4904  Args.push_back(Entry);
4905  }
4906 
4907  CallLoweringInfo CLI;
4908  MCContext &Ctx = MF->getContext();
4909  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4910  TLI.getLibcallName(LC), std::move(Args));
4911  if (!lowerCallTo(CLI))
4912  return false;
4913  updateValueMap(I, CLI.ResultReg);
4914  return true;
4915 }
4916 
4917 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4918  MVT VT;
4919  if (!isTypeLegal(I->getType(), VT))
4920  return false;
4921 
4922  if (!isa<ConstantInt>(I->getOperand(1)))
4923  return selectBinaryOp(I, ISD::SDIV);
4924 
4925  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4926  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4927  !(C.isPowerOf2() || (-C).isPowerOf2()))
4928  return selectBinaryOp(I, ISD::SDIV);
4929 
4930  unsigned Lg2 = C.countTrailingZeros();
4931  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4932  if (!Src0Reg)
4933  return false;
4934  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4935 
4936  if (cast<BinaryOperator>(I)->isExact()) {
4937  unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4938  if (!ResultReg)
4939  return false;
4940  updateValueMap(I, ResultReg);
4941  return true;
4942  }
4943 
4944  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4945  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4946  if (!AddReg)
4947  return false;
4948 
4949  // (Src0 < 0) ? Pow2 - 1 : 0;
4950  if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4951  return false;
4952 
4953  unsigned SelectOpc;
4954  const TargetRegisterClass *RC;
4955  if (VT == MVT::i64) {
4956  SelectOpc = AArch64::CSELXr;
4957  RC = &AArch64::GPR64RegClass;
4958  } else {
4959  SelectOpc = AArch64::CSELWr;
4960  RC = &AArch64::GPR32RegClass;
4961  }
4962  unsigned SelectReg =
4963  fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4964  Src0IsKill, AArch64CC::LT);
4965  if (!SelectReg)
4966  return false;
4967 
4968  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4969  // negate the result.
4970  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4971  unsigned ResultReg;
4972  if (C.isNegative())
4973  ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4974  SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4975  else
4976  ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4977 
4978  if (!ResultReg)
4979  return false;
4980 
4981  updateValueMap(I, ResultReg);
4982  return true;
4983 }
4984 
4985 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4986 /// have to duplicate it for AArch64, because otherwise we would fail during the
4987 /// sign-extend emission.
4988 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4989  unsigned IdxN = getRegForValue(Idx);
4990  if (IdxN == 0)
4991  // Unhandled operand. Halt "fast" selection and bail.
4992  return std::pair<unsigned, bool>(0, false);
4993 
4994  bool IdxNIsKill = hasTrivialKill(Idx);
4995 
4996  // If the index is smaller or larger than intptr_t, truncate or extend it.
4997  MVT PtrVT = TLI.getPointerTy(DL);
4998  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4999  if (IdxVT.bitsLT(PtrVT)) {
5000  IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
5001  IdxNIsKill = true;
5002  } else if (IdxVT.bitsGT(PtrVT))
5003  llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
5004  return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
5005 }
5006 
5007 /// This is mostly a copy of the existing FastISel GEP code, but we have to
5008 /// duplicate it for AArch64, because otherwise we would bail out even for
5009 /// simple cases. This is because the standard fastEmit functions don't cover
5010 /// MUL at all and ADD is lowered very inefficientily.
5011 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
5012  unsigned N = getRegForValue(I->getOperand(0));
5013  if (!N)
5014  return false;
5015  bool NIsKill = hasTrivialKill(I->getOperand(0));
5016 
5017  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
5018  // into a single N = N + TotalOffset.
5019  uint64_t TotalOffs = 0;
5020  MVT VT = TLI.getPointerTy(DL);
5021  for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
5022  GTI != E; ++GTI) {
5023  const Value *Idx = GTI.getOperand();
5024  if (auto *StTy = GTI.getStructTypeOrNull()) {
5025  unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
5026  // N = N + Offset
5027  if (Field)
5028  TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
5029  } else {
5030  Type *Ty = GTI.getIndexedType();
5031 
5032  // If this is a constant subscript, handle it quickly.
5033  if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
5034  if (CI->isZero())
5035  continue;
5036  // N = N + Offset
5037  TotalOffs +=
5038  DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
5039  continue;
5040  }
5041  if (TotalOffs) {
5042  N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
5043  if (!N)
5044  return false;
5045  NIsKill = true;
5046  TotalOffs = 0;
5047  }
5048 
5049  // N = N + Idx * ElementSize;
5050  uint64_t ElementSize = DL.getTypeAllocSize(Ty);
5051  std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
5052  unsigned IdxN = Pair.first;
5053  bool IdxNIsKill = Pair.second;
5054  if (!IdxN)
5055  return false;
5056 
5057  if (ElementSize != 1) {
5058  unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5059  if (!C)
5060  return false;
5061  IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
5062  if (!IdxN)
5063  return false;
5064  IdxNIsKill = true;
5065  }
5066  N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
5067  if (!N)
5068  return false;
5069  }
5070  }
5071  if (TotalOffs) {
5072  N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
5073  if (!N)
5074  return false;
5075  }
5076  updateValueMap(I, N);
5077  return true;
5078 }
5079 
5080 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5081  assert(TM.getOptLevel() == CodeGenOpt::None &&
5082  "cmpxchg survived AtomicExpand at optlevel > -O0");
5083 
5084  auto *RetPairTy = cast<StructType>(I->getType());
5085  Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5086  assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5087  "cmpxchg has a non-i1 status result");
5088 
5089  MVT VT;
5090  if (!isTypeLegal(RetTy, VT))
5091  return false;
5092 
5093  const TargetRegisterClass *ResRC;
5094  unsigned Opc, CmpOpc;
5095  // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5096  // extractvalue selection doesn't support that.
5097  if (VT == MVT::i32) {
5098  Opc = AArch64::CMP_SWAP_32;
5099  CmpOpc = AArch64::SUBSWrs;
5100  ResRC = &AArch64::GPR32RegClass;
5101  } else if (VT == MVT::i64) {
5102  Opc = AArch64::CMP_SWAP_64;
5103  CmpOpc = AArch64::SUBSXrs;
5104  ResRC = &AArch64::GPR64RegClass;
5105  } else {
5106  return false;
5107  }
5108 
5109  const MCInstrDesc &II = TII.get(Opc);
5110 
5111  const unsigned AddrReg = constrainOperandRegClass(
5112  II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5113  const unsigned DesiredReg = constrainOperandRegClass(
5114  II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5115  const unsigned NewReg = constrainOperandRegClass(
5116  II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5117 
5118  const unsigned ResultReg1 = createResultReg(ResRC);
5119  const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5120  const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5121 
5122  // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5123  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5124  .addDef(ResultReg1)
5125  .addDef(ScratchReg)
5126  .addUse(AddrReg)
5127  .addUse(DesiredReg)
5128  .addUse(NewReg);
5129 
5130  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5131  .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5132  .addUse(ResultReg1)
5133  .addUse(DesiredReg)
5134  .addImm(0);
5135 
5136  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5137  .addDef(ResultReg2)
5138  .addUse(AArch64::WZR)
5139  .addUse(AArch64::WZR)
5141 
5142  assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5143  updateValueMap(I, ResultReg1, 2);
5144  return true;
5145 }
5146 
5147 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5148  switch (I->getOpcode()) {
5149  default:
5150  break;
5151  case Instruction::Add:
5152  case Instruction::Sub:
5153  return selectAddSub(I);
5154  case Instruction::Mul:
5155  return selectMul(I);
5156  case Instruction::SDiv:
5157  return selectSDiv(I);
5158  case Instruction::SRem:
5159  if (!selectBinaryOp(I, ISD::SREM))
5160  return selectRem(I, ISD::SREM);
5161  return true;
5162  case Instruction::URem:
5163  if (!selectBinaryOp(I, ISD::UREM))
5164  return selectRem(I, ISD::UREM);
5165  return true;
5166  case Instruction::Shl:
5167  case Instruction::LShr:
5168  case Instruction::AShr:
5169  return selectShift(I);
5170  case Instruction::And:
5171  case Instruction::Or:
5172  case Instruction::Xor:
5173  return selectLogicalOp(I);
5174  case Instruction::Br:
5175  return selectBranch(I);
5176  case Instruction::IndirectBr:
5177  return selectIndirectBr(I);
5178  case Instruction::BitCast:
5179  if (!FastISel::selectBitCast(I))
5180  return selectBitCast(I);
5181  return true;
5182  case Instruction::FPToSI:
5183  if (!selectCast(I, ISD::FP_TO_SINT))
5184  return selectFPToInt(I, /*Signed=*/true);
5185  return true;
5186  case Instruction::FPToUI:
5187  return selectFPToInt(I, /*Signed=*/false);
5188  case Instruction::ZExt:
5189  case Instruction::SExt:
5190  return selectIntExt(I);
5191  case Instruction::Trunc:
5192  if (!selectCast(I, ISD::TRUNCATE))
5193  return selectTrunc(I);
5194  return true;
5195  case Instruction::FPExt:
5196  return selectFPExt(I);
5197  case Instruction::FPTrunc:
5198  return selectFPTrunc(I);
5199  case Instruction::SIToFP:
5200  if (!selectCast(I, ISD::SINT_TO_FP))
5201  return selectIntToFP(I, /*Signed=*/true);
5202  return true;
5203  case Instruction::UIToFP:
5204  return selectIntToFP(I, /*Signed=*/false);
5205  case Instruction::Load:
5206  return selectLoad(I);
5207  case Instruction::Store:
5208  return selectStore(I);
5209  case Instruction::FCmp:
5210  case Instruction::ICmp:
5211  return selectCmp(I);
5212  case Instruction::Select:
5213  return selectSelect(I);
5214  case Instruction::Ret:
5215  return selectRet(I);
5216  case Instruction::FRem:
5217  return selectFRem(I);
5218  case Instruction::GetElementPtr:
5219  return selectGetElementPtr(I);
5220  case Instruction::AtomicCmpXchg:
5221  return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5222  }
5223 
5224  // fall-back to target-independent instruction selection.
5225  return selectOperator(I, I->getOpcode());
5226 }
5227 
5228 namespace llvm {
5229 
5231  const TargetLibraryInfo *LibInfo) {
5232  return new AArch64FastISel(FuncInfo, LibInfo);
5233 }
5234 
5235 } // end namespace llvm
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
void setFrameAddressIsTaken(bool T)
uint64_t CallInst * C
Return a value (possibly void), from a function.
static MVT getIntegerVT(unsigned BitWidth)
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:722
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1571
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
LLVMContext & Context
Atomic ordering constants.
unsigned constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, const MachineOperand &RegMO, unsigned OpIdx)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:40
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Register getLocReg() const
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
bool hasCustomCallingConv() const
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:265
An instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:538
bool isVector() const
Return true if this is a vector value type.
static CondCode getInvertedCondCode(CondCode Code)
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:179
unsigned Reg
unsigned getSubReg() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
unsigned less or equal
Definition: InstrTypes.h:758
unsigned less than
Definition: InstrTypes.h:757
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:738
unsigned getValNo() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:743
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:748
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
unsigned const TargetRegisterInfo * TRI
F(f)
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:169
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:181
Value * getCondition() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
void reserve(size_type N)
Definition: SmallVector.h:369
Value * getLength() const
bool isAnyArgRegReserved(const MachineFunction &MF) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1241
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:743
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1640
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:585
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:742
A description of a memory reference used in the backend.
This class represents the LLVM &#39;select&#39; instruction.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:831
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:96
Class to represent struct types.
Definition: DerivedTypes.h:238
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool isUnsigned() const
Definition: InstrTypes.h:908
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:739
This file contains the simple types necessary to represent the attributes associated with functions a...
SimpleValueType SimpleTy
bool RetCC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
LocInfo getLocInfo() const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
This file implements a class to represent arbitrary precision integral constant values and operations...
bool is64BitVector() const
Return true if this is a 64-bit vector type.
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
unsigned getSizeInBits() const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
Context object for machine code objects.
Definition: MCContext.h:65
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:200
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:66
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:888
iterator_range< succ_op_iterator > successors()
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:407
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:517
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
const AArch64RegisterInfo * getRegisterInfo() const override
static bool isZExtLoad(const MachineInstr *LI)
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:308
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
iterator_range< User::op_iterator > arg_operands()
Definition: InstrTypes.h:1233
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
SI Pre allocate WWM Registers
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getKillRegState(bool B)
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:563
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:131
bool is128BitVector() const
Return true if this is a 128-bit vector type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:228
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:147
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:661
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV)...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:363
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:168
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:665
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
static Register UseReg(const MachineOperand &MO)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
Indirect Branch Instruction.
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:470
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool useSmallAddressing() const
static bool isSExtLoad(const MachineInstr *LI)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
op_range operands()
Definition: User.h:237
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:741
Register getFrameRegister(const MachineFunction &MF) const override
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const Value * getCondition() const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:749
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type...
Extended Value Type.
Definition: ValueTypes.h:33
size_t size() const
Definition: SmallVector.h:52
bool isVolatile() const
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:747
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
signed greater than
Definition: InstrTypes.h:759
The memory access writes data.
const APFloat & getValueAPF() const
Definition: Constants.h:302
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
bool isReleaseOrStronger(AtomicOrdering ao)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...