LLVM  3.7.0
AArch64FastISel.cpp
Go to the documentation of this file.
1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the AArch64-specific support for the FastISel class. Some
11 // of the target-specific code is generated by tablegen in the file
12 // AArch64GenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
23 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/IR/CallingConv.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Function.h"
34 #include "llvm/IR/GlobalAlias.h"
35 #include "llvm/IR/GlobalVariable.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/Operator.h"
39 #include "llvm/MC/MCSymbol.h"
41 using namespace llvm;
42 
43 namespace {
44 
45 class AArch64FastISel final : public FastISel {
46  class Address {
47  public:
48  typedef enum {
49  RegBase,
50  FrameIndexBase
51  } BaseKind;
52 
53  private:
54  BaseKind Kind;
56  union {
57  unsigned Reg;
58  int FI;
59  } Base;
60  unsigned OffsetReg;
61  unsigned Shift;
62  int64_t Offset;
63  const GlobalValue *GV;
64 
65  public:
66  Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
67  OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
68  void setKind(BaseKind K) { Kind = K; }
69  BaseKind getKind() const { return Kind; }
70  void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
71  AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
72  bool isRegBase() const { return Kind == RegBase; }
73  bool isFIBase() const { return Kind == FrameIndexBase; }
74  void setReg(unsigned Reg) {
75  assert(isRegBase() && "Invalid base register access!");
76  Base.Reg = Reg;
77  }
78  unsigned getReg() const {
79  assert(isRegBase() && "Invalid base register access!");
80  return Base.Reg;
81  }
82  void setOffsetReg(unsigned Reg) {
83  OffsetReg = Reg;
84  }
85  unsigned getOffsetReg() const {
86  return OffsetReg;
87  }
88  void setFI(unsigned FI) {
89  assert(isFIBase() && "Invalid base frame index access!");
90  Base.FI = FI;
91  }
92  unsigned getFI() const {
93  assert(isFIBase() && "Invalid base frame index access!");
94  return Base.FI;
95  }
96  void setOffset(int64_t O) { Offset = O; }
97  int64_t getOffset() { return Offset; }
98  void setShift(unsigned S) { Shift = S; }
99  unsigned getShift() { return Shift; }
100 
101  void setGlobalValue(const GlobalValue *G) { GV = G; }
102  const GlobalValue *getGlobalValue() { return GV; }
103  };
104 
105  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
106  /// make the right decision when generating code for different targets.
107  const AArch64Subtarget *Subtarget;
108  LLVMContext *Context;
109 
110  bool fastLowerArguments() override;
111  bool fastLowerCall(CallLoweringInfo &CLI) override;
112  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
113 
114 private:
115  // Selection routines.
116  bool selectAddSub(const Instruction *I);
117  bool selectLogicalOp(const Instruction *I);
118  bool selectLoad(const Instruction *I);
119  bool selectStore(const Instruction *I);
120  bool selectBranch(const Instruction *I);
121  bool selectIndirectBr(const Instruction *I);
122  bool selectCmp(const Instruction *I);
123  bool selectSelect(const Instruction *I);
124  bool selectFPExt(const Instruction *I);
125  bool selectFPTrunc(const Instruction *I);
126  bool selectFPToInt(const Instruction *I, bool Signed);
127  bool selectIntToFP(const Instruction *I, bool Signed);
128  bool selectRem(const Instruction *I, unsigned ISDOpcode);
129  bool selectRet(const Instruction *I);
130  bool selectTrunc(const Instruction *I);
131  bool selectIntExt(const Instruction *I);
132  bool selectMul(const Instruction *I);
133  bool selectShift(const Instruction *I);
134  bool selectBitCast(const Instruction *I);
135  bool selectFRem(const Instruction *I);
136  bool selectSDiv(const Instruction *I);
137  bool selectGetElementPtr(const Instruction *I);
138 
139  // Utility helper routines.
140  bool isTypeLegal(Type *Ty, MVT &VT);
141  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
142  bool isValueAvailable(const Value *V) const;
143  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
144  bool computeCallAddress(const Value *V, Address &Addr);
145  bool simplifyAddress(Address &Addr, MVT VT);
146  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
147  unsigned Flags, unsigned ScaleFactor,
148  MachineMemOperand *MMO);
149  bool isMemCpySmall(uint64_t Len, unsigned Alignment);
150  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
151  unsigned Alignment);
152  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
153  const Value *Cond);
154  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
155  bool optimizeSelect(const SelectInst *SI);
156  std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
157 
158  // Emit helper routines.
159  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
160  const Value *RHS, bool SetFlags = false,
161  bool WantResult = true, bool IsZExt = false);
162  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
163  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
164  bool SetFlags = false, bool WantResult = true);
165  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
166  bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
167  bool WantResult = true);
168  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
169  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
170  AArch64_AM::ShiftExtendType ShiftType,
171  uint64_t ShiftImm, bool SetFlags = false,
172  bool WantResult = true);
173  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
174  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
176  uint64_t ShiftImm, bool SetFlags = false,
177  bool WantResult = true);
178 
179  // Emit functions.
180  bool emitCompareAndBranch(const BranchInst *BI);
181  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
182  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
183  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
184  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
185  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
186  MachineMemOperand *MMO = nullptr);
187  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
188  MachineMemOperand *MMO = nullptr);
189  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
190  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
191  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
192  bool SetFlags = false, bool WantResult = true,
193  bool IsZExt = false);
194  unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
195  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
196  bool SetFlags = false, bool WantResult = true,
197  bool IsZExt = false);
198  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
199  unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
200  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
201  unsigned RHSReg, bool RHSIsKill,
202  AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
203  bool WantResult = true);
204  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
205  const Value *RHS);
206  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
207  bool LHSIsKill, uint64_t Imm);
208  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
209  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
210  uint64_t ShiftImm);
211  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
212  unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
213  unsigned Op1, bool Op1IsKill);
214  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
215  unsigned Op1, bool Op1IsKill);
216  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
217  unsigned Op1, bool Op1IsKill);
218  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
219  unsigned Op1Reg, bool Op1IsKill);
220  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
221  uint64_t Imm, bool IsZExt = true);
222  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
223  unsigned Op1Reg, bool Op1IsKill);
224  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
225  uint64_t Imm, bool IsZExt = true);
226  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
227  unsigned Op1Reg, bool Op1IsKill);
228  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
229  uint64_t Imm, bool IsZExt = false);
230 
231  unsigned materializeInt(const ConstantInt *CI, MVT VT);
232  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
233  unsigned materializeGV(const GlobalValue *GV);
234 
235  // Call handling routines.
236 private:
237  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
238  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
239  unsigned &NumBytes);
240  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
241 
242 public:
243  // Backend specific FastISel code.
244  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
245  unsigned fastMaterializeConstant(const Constant *C) override;
246  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
247 
248  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
249  const TargetLibraryInfo *LibInfo)
250  : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
251  Subtarget =
252  &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
253  Context = &FuncInfo.Fn->getContext();
254  }
255 
256  bool fastSelectInstruction(const Instruction *I) override;
257 
258 #include "AArch64GenFastISel.inc"
259 };
260 
261 } // end anonymous namespace
262 
263 #include "AArch64GenCallingConv.inc"
264 
265 /// \brief Check if the sign-/zero-extend will be a noop.
266 static bool isIntExtFree(const Instruction *I) {
267  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
268  "Unexpected integer extend instruction.");
269  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
270  "Unexpected value type.");
271  bool IsZExt = isa<ZExtInst>(I);
272 
273  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
274  if (LI->hasOneUse())
275  return true;
276 
277  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
278  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
279  return true;
280 
281  return false;
282 }
283 
284 /// \brief Determine the implicit scale factor that is applied by a memory
285 /// operation for a given value type.
286 static unsigned getImplicitScaleFactor(MVT VT) {
287  switch (VT.SimpleTy) {
288  default:
289  return 0; // invalid
290  case MVT::i1: // fall-through
291  case MVT::i8:
292  return 1;
293  case MVT::i16:
294  return 2;
295  case MVT::i32: // fall-through
296  case MVT::f32:
297  return 4;
298  case MVT::i64: // fall-through
299  case MVT::f64:
300  return 8;
301  }
302 }
303 
304 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
305  if (CC == CallingConv::WebKit_JS)
306  return CC_AArch64_WebKit_JS;
307  if (CC == CallingConv::GHC)
308  return CC_AArch64_GHC;
309  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
310 }
311 
312 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
313  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
314  "Alloca should always return a pointer.");
315 
316  // Don't handle dynamic allocas.
317  if (!FuncInfo.StaticAllocaMap.count(AI))
318  return 0;
319 
321  FuncInfo.StaticAllocaMap.find(AI);
322 
323  if (SI != FuncInfo.StaticAllocaMap.end()) {
324  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
325  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
326  ResultReg)
327  .addFrameIndex(SI->second)
328  .addImm(0)
329  .addImm(0);
330  return ResultReg;
331  }
332 
333  return 0;
334 }
335 
336 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
337  if (VT > MVT::i64)
338  return 0;
339 
340  if (!CI->isZero())
341  return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
342 
343  // Create a copy from the zero register to materialize a "0" value.
344  const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
345  : &AArch64::GPR32RegClass;
346  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
347  unsigned ResultReg = createResultReg(RC);
348  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
349  ResultReg).addReg(ZeroReg, getKillRegState(true));
350  return ResultReg;
351 }
352 
353 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
354  // Positive zero (+0.0) has to be materialized with a fmov from the zero
355  // register, because the immediate version of fmov cannot encode zero.
356  if (CFP->isNullValue())
357  return fastMaterializeFloatZero(CFP);
358 
359  if (VT != MVT::f32 && VT != MVT::f64)
360  return 0;
361 
362  const APFloat Val = CFP->getValueAPF();
363  bool Is64Bit = (VT == MVT::f64);
364  // This checks to see if we can use FMOV instructions to materialize
365  // a constant, otherwise we have to materialize via the constant pool.
366  if (TLI.isFPImmLegal(Val, VT)) {
367  int Imm =
368  Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
369  assert((Imm != -1) && "Cannot encode floating-point constant.");
370  unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
371  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
372  }
373 
374  // For the MachO large code model materialize the FP constant in code.
375  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
376  unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
377  const TargetRegisterClass *RC = Is64Bit ?
378  &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
379 
380  unsigned TmpReg = createResultReg(RC);
381  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
382  .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
383 
384  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
385  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
386  TII.get(TargetOpcode::COPY), ResultReg)
387  .addReg(TmpReg, getKillRegState(true));
388 
389  return ResultReg;
390  }
391 
392  // Materialize via constant pool. MachineConstantPool wants an explicit
393  // alignment.
394  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
395  if (Align == 0)
396  Align = DL.getTypeAllocSize(CFP->getType());
397 
398  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
399  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
400  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
401  ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
402 
403  unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
404  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
405  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
406  .addReg(ADRPReg)
408  return ResultReg;
409 }
410 
411 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
412  // We can't handle thread-local variables quickly yet.
413  if (GV->isThreadLocal())
414  return 0;
415 
416  // MachO still uses GOT for large code-model accesses, but ELF requires
417  // movz/movk sequences, which FastISel doesn't handle yet.
418  if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
419  return 0;
420 
421  unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
422 
423  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
424  if (!DestEVT.isSimple())
425  return 0;
426 
427  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
428  unsigned ResultReg;
429 
430  if (OpFlags & AArch64II::MO_GOT) {
431  // ADRP + LDRX
432  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
433  ADRPReg)
434  .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
435 
436  ResultReg = createResultReg(&AArch64::GPR64RegClass);
437  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
438  ResultReg)
439  .addReg(ADRPReg)
440  .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
442  } else if (OpFlags & AArch64II::MO_CONSTPOOL) {
443  // We can't handle addresses loaded from a constant pool quickly yet.
444  return 0;
445  } else {
446  // ADRP + ADDX
447  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
448  ADRPReg)
449  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
450 
451  ResultReg = createResultReg(&AArch64::GPR64spRegClass);
452  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
453  ResultReg)
454  .addReg(ADRPReg)
456  .addImm(0);
457  }
458  return ResultReg;
459 }
460 
461 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
462  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
463 
464  // Only handle simple types.
465  if (!CEVT.isSimple())
466  return 0;
467  MVT VT = CEVT.getSimpleVT();
468 
469  if (const auto *CI = dyn_cast<ConstantInt>(C))
470  return materializeInt(CI, VT);
471  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
472  return materializeFP(CFP, VT);
473  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
474  return materializeGV(GV);
475 
476  return 0;
477 }
478 
479 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
480  assert(CFP->isNullValue() &&
481  "Floating-point constant is not a positive zero.");
482  MVT VT;
483  if (!isTypeLegal(CFP->getType(), VT))
484  return 0;
485 
486  if (VT != MVT::f32 && VT != MVT::f64)
487  return 0;
488 
489  bool Is64Bit = (VT == MVT::f64);
490  unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
491  unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
492  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
493 }
494 
495 /// \brief Check if the multiply is by a power-of-2 constant.
496 static bool isMulPowOf2(const Value *I) {
497  if (const auto *MI = dyn_cast<MulOperator>(I)) {
498  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
499  if (C->getValue().isPowerOf2())
500  return true;
501  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
502  if (C->getValue().isPowerOf2())
503  return true;
504  }
505  return false;
506 }
507 
508 // Computes the address to get to an object.
509 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
510 {
511  const User *U = nullptr;
512  unsigned Opcode = Instruction::UserOp1;
513  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
514  // Don't walk into other basic blocks unless the object is an alloca from
515  // another block, otherwise it may not have a virtual register assigned.
516  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
517  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
518  Opcode = I->getOpcode();
519  U = I;
520  }
521  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
522  Opcode = C->getOpcode();
523  U = C;
524  }
525 
526  if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
527  if (Ty->getAddressSpace() > 255)
528  // Fast instruction selection doesn't support the special
529  // address spaces.
530  return false;
531 
532  switch (Opcode) {
533  default:
534  break;
535  case Instruction::BitCast: {
536  // Look through bitcasts.
537  return computeAddress(U->getOperand(0), Addr, Ty);
538  }
539  case Instruction::IntToPtr: {
540  // Look past no-op inttoptrs.
541  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
542  TLI.getPointerTy(DL))
543  return computeAddress(U->getOperand(0), Addr, Ty);
544  break;
545  }
546  case Instruction::PtrToInt: {
547  // Look past no-op ptrtoints.
548  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
549  return computeAddress(U->getOperand(0), Addr, Ty);
550  break;
551  }
552  case Instruction::GetElementPtr: {
553  Address SavedAddr = Addr;
554  uint64_t TmpOffset = Addr.getOffset();
555 
556  // Iterate through the GEP folding the constants into offsets where
557  // we can.
559  for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
560  ++i, ++GTI) {
561  const Value *Op = *i;
562  if (StructType *STy = dyn_cast<StructType>(*GTI)) {
563  const StructLayout *SL = DL.getStructLayout(STy);
564  unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
565  TmpOffset += SL->getElementOffset(Idx);
566  } else {
567  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
568  for (;;) {
569  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
570  // Constant-offset addressing.
571  TmpOffset += CI->getSExtValue() * S;
572  break;
573  }
574  if (canFoldAddIntoGEP(U, Op)) {
575  // A compatible add with a constant operand. Fold the constant.
576  ConstantInt *CI =
577  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
578  TmpOffset += CI->getSExtValue() * S;
579  // Iterate on the other operand.
580  Op = cast<AddOperator>(Op)->getOperand(0);
581  continue;
582  }
583  // Unsupported
584  goto unsupported_gep;
585  }
586  }
587  }
588 
589  // Try to grab the base operand now.
590  Addr.setOffset(TmpOffset);
591  if (computeAddress(U->getOperand(0), Addr, Ty))
592  return true;
593 
594  // We failed, restore everything and try the other options.
595  Addr = SavedAddr;
596 
597  unsupported_gep:
598  break;
599  }
600  case Instruction::Alloca: {
601  const AllocaInst *AI = cast<AllocaInst>(Obj);
603  FuncInfo.StaticAllocaMap.find(AI);
604  if (SI != FuncInfo.StaticAllocaMap.end()) {
605  Addr.setKind(Address::FrameIndexBase);
606  Addr.setFI(SI->second);
607  return true;
608  }
609  break;
610  }
611  case Instruction::Add: {
612  // Adds of constants are common and easy enough.
613  const Value *LHS = U->getOperand(0);
614  const Value *RHS = U->getOperand(1);
615 
616  if (isa<ConstantInt>(LHS))
617  std::swap(LHS, RHS);
618 
619  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
620  Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
621  return computeAddress(LHS, Addr, Ty);
622  }
623 
624  Address Backup = Addr;
625  if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
626  return true;
627  Addr = Backup;
628 
629  break;
630  }
631  case Instruction::Sub: {
632  // Subs of constants are common and easy enough.
633  const Value *LHS = U->getOperand(0);
634  const Value *RHS = U->getOperand(1);
635 
636  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
637  Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
638  return computeAddress(LHS, Addr, Ty);
639  }
640  break;
641  }
642  case Instruction::Shl: {
643  if (Addr.getOffsetReg())
644  break;
645 
646  const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
647  if (!CI)
648  break;
649 
650  unsigned Val = CI->getZExtValue();
651  if (Val < 1 || Val > 3)
652  break;
653 
654  uint64_t NumBytes = 0;
655  if (Ty && Ty->isSized()) {
656  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
657  NumBytes = NumBits / 8;
658  if (!isPowerOf2_64(NumBits))
659  NumBytes = 0;
660  }
661 
662  if (NumBytes != (1ULL << Val))
663  break;
664 
665  Addr.setShift(Val);
666  Addr.setExtendType(AArch64_AM::LSL);
667 
668  const Value *Src = U->getOperand(0);
669  if (const auto *I = dyn_cast<Instruction>(Src)) {
670  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
671  // Fold the zext or sext when it won't become a noop.
672  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
673  if (!isIntExtFree(ZE) &&
674  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
675  Addr.setExtendType(AArch64_AM::UXTW);
676  Src = ZE->getOperand(0);
677  }
678  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
679  if (!isIntExtFree(SE) &&
680  SE->getOperand(0)->getType()->isIntegerTy(32)) {
681  Addr.setExtendType(AArch64_AM::SXTW);
682  Src = SE->getOperand(0);
683  }
684  }
685  }
686  }
687 
688  if (const auto *AI = dyn_cast<BinaryOperator>(Src))
689  if (AI->getOpcode() == Instruction::And) {
690  const Value *LHS = AI->getOperand(0);
691  const Value *RHS = AI->getOperand(1);
692 
693  if (const auto *C = dyn_cast<ConstantInt>(LHS))
694  if (C->getValue() == 0xffffffff)
695  std::swap(LHS, RHS);
696 
697  if (const auto *C = dyn_cast<ConstantInt>(RHS))
698  if (C->getValue() == 0xffffffff) {
699  Addr.setExtendType(AArch64_AM::UXTW);
700  unsigned Reg = getRegForValue(LHS);
701  if (!Reg)
702  return false;
703  bool RegIsKill = hasTrivialKill(LHS);
704  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
705  AArch64::sub_32);
706  Addr.setOffsetReg(Reg);
707  return true;
708  }
709  }
710 
711  unsigned Reg = getRegForValue(Src);
712  if (!Reg)
713  return false;
714  Addr.setOffsetReg(Reg);
715  return true;
716  }
717  case Instruction::Mul: {
718  if (Addr.getOffsetReg())
719  break;
720 
721  if (!isMulPowOf2(U))
722  break;
723 
724  const Value *LHS = U->getOperand(0);
725  const Value *RHS = U->getOperand(1);
726 
727  // Canonicalize power-of-2 value to the RHS.
728  if (const auto *C = dyn_cast<ConstantInt>(LHS))
729  if (C->getValue().isPowerOf2())
730  std::swap(LHS, RHS);
731 
732  assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
733  const auto *C = cast<ConstantInt>(RHS);
734  unsigned Val = C->getValue().logBase2();
735  if (Val < 1 || Val > 3)
736  break;
737 
738  uint64_t NumBytes = 0;
739  if (Ty && Ty->isSized()) {
740  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
741  NumBytes = NumBits / 8;
742  if (!isPowerOf2_64(NumBits))
743  NumBytes = 0;
744  }
745 
746  if (NumBytes != (1ULL << Val))
747  break;
748 
749  Addr.setShift(Val);
750  Addr.setExtendType(AArch64_AM::LSL);
751 
752  const Value *Src = LHS;
753  if (const auto *I = dyn_cast<Instruction>(Src)) {
754  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
755  // Fold the zext or sext when it won't become a noop.
756  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
757  if (!isIntExtFree(ZE) &&
758  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
759  Addr.setExtendType(AArch64_AM::UXTW);
760  Src = ZE->getOperand(0);
761  }
762  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
763  if (!isIntExtFree(SE) &&
764  SE->getOperand(0)->getType()->isIntegerTy(32)) {
765  Addr.setExtendType(AArch64_AM::SXTW);
766  Src = SE->getOperand(0);
767  }
768  }
769  }
770  }
771 
772  unsigned Reg = getRegForValue(Src);
773  if (!Reg)
774  return false;
775  Addr.setOffsetReg(Reg);
776  return true;
777  }
778  case Instruction::And: {
779  if (Addr.getOffsetReg())
780  break;
781 
782  if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
783  break;
784 
785  const Value *LHS = U->getOperand(0);
786  const Value *RHS = U->getOperand(1);
787 
788  if (const auto *C = dyn_cast<ConstantInt>(LHS))
789  if (C->getValue() == 0xffffffff)
790  std::swap(LHS, RHS);
791 
792  if (const auto *C = dyn_cast<ConstantInt>(RHS))
793  if (C->getValue() == 0xffffffff) {
794  Addr.setShift(0);
795  Addr.setExtendType(AArch64_AM::LSL);
796  Addr.setExtendType(AArch64_AM::UXTW);
797 
798  unsigned Reg = getRegForValue(LHS);
799  if (!Reg)
800  return false;
801  bool RegIsKill = hasTrivialKill(LHS);
802  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
803  AArch64::sub_32);
804  Addr.setOffsetReg(Reg);
805  return true;
806  }
807  break;
808  }
809  case Instruction::SExt:
810  case Instruction::ZExt: {
811  if (!Addr.getReg() || Addr.getOffsetReg())
812  break;
813 
814  const Value *Src = nullptr;
815  // Fold the zext or sext when it won't become a noop.
816  if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
817  if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
818  Addr.setExtendType(AArch64_AM::UXTW);
819  Src = ZE->getOperand(0);
820  }
821  } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
822  if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
823  Addr.setExtendType(AArch64_AM::SXTW);
824  Src = SE->getOperand(0);
825  }
826  }
827 
828  if (!Src)
829  break;
830 
831  Addr.setShift(0);
832  unsigned Reg = getRegForValue(Src);
833  if (!Reg)
834  return false;
835  Addr.setOffsetReg(Reg);
836  return true;
837  }
838  } // end switch
839 
840  if (Addr.isRegBase() && !Addr.getReg()) {
841  unsigned Reg = getRegForValue(Obj);
842  if (!Reg)
843  return false;
844  Addr.setReg(Reg);
845  return true;
846  }
847 
848  if (!Addr.getOffsetReg()) {
849  unsigned Reg = getRegForValue(Obj);
850  if (!Reg)
851  return false;
852  Addr.setOffsetReg(Reg);
853  return true;
854  }
855 
856  return false;
857 }
858 
859 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
860  const User *U = nullptr;
861  unsigned Opcode = Instruction::UserOp1;
862  bool InMBB = true;
863 
864  if (const auto *I = dyn_cast<Instruction>(V)) {
865  Opcode = I->getOpcode();
866  U = I;
867  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
868  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
869  Opcode = C->getOpcode();
870  U = C;
871  }
872 
873  switch (Opcode) {
874  default: break;
875  case Instruction::BitCast:
876  // Look past bitcasts if its operand is in the same BB.
877  if (InMBB)
878  return computeCallAddress(U->getOperand(0), Addr);
879  break;
880  case Instruction::IntToPtr:
881  // Look past no-op inttoptrs if its operand is in the same BB.
882  if (InMBB &&
883  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
884  TLI.getPointerTy(DL))
885  return computeCallAddress(U->getOperand(0), Addr);
886  break;
887  case Instruction::PtrToInt:
888  // Look past no-op ptrtoints if its operand is in the same BB.
889  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
890  return computeCallAddress(U->getOperand(0), Addr);
891  break;
892  }
893 
894  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
895  Addr.setGlobalValue(GV);
896  return true;
897  }
898 
899  // If all else fails, try to materialize the value in a register.
900  if (!Addr.getGlobalValue()) {
901  Addr.setReg(getRegForValue(V));
902  return Addr.getReg() != 0;
903  }
904 
905  return false;
906 }
907 
908 
909 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
910  EVT evt = TLI.getValueType(DL, Ty, true);
911 
912  // Only handle simple types.
913  if (evt == MVT::Other || !evt.isSimple())
914  return false;
915  VT = evt.getSimpleVT();
916 
917  // This is a legal type, but it's not something we handle in fast-isel.
918  if (VT == MVT::f128)
919  return false;
920 
921  // Handle all other legal types, i.e. a register that will directly hold this
922  // value.
923  return TLI.isTypeLegal(VT);
924 }
925 
926 /// \brief Determine if the value type is supported by FastISel.
927 ///
928 /// FastISel for AArch64 can handle more value types than are legal. This adds
929 /// simple value type such as i1, i8, and i16.
930 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
931  if (Ty->isVectorTy() && !IsVectorAllowed)
932  return false;
933 
934  if (isTypeLegal(Ty, VT))
935  return true;
936 
937  // If this is a type than can be sign or zero-extended to a basic operation
938  // go ahead and accept it now.
939  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
940  return true;
941 
942  return false;
943 }
944 
945 bool AArch64FastISel::isValueAvailable(const Value *V) const {
946  if (!isa<Instruction>(V))
947  return true;
948 
949  const auto *I = cast<Instruction>(V);
950  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
951  return true;
952 
953  return false;
954 }
955 
956 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
957  unsigned ScaleFactor = getImplicitScaleFactor(VT);
958  if (!ScaleFactor)
959  return false;
960 
961  bool ImmediateOffsetNeedsLowering = false;
962  bool RegisterOffsetNeedsLowering = false;
963  int64_t Offset = Addr.getOffset();
964  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
965  ImmediateOffsetNeedsLowering = true;
966  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
967  !isUInt<12>(Offset / ScaleFactor))
968  ImmediateOffsetNeedsLowering = true;
969 
970  // Cannot encode an offset register and an immediate offset in the same
971  // instruction. Fold the immediate offset into the load/store instruction and
972  // emit an additonal add to take care of the offset register.
973  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
974  RegisterOffsetNeedsLowering = true;
975 
976  // Cannot encode zero register as base.
977  if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
978  RegisterOffsetNeedsLowering = true;
979 
980  // If this is a stack pointer and the offset needs to be simplified then put
981  // the alloca address into a register, set the base type back to register and
982  // continue. This should almost never happen.
983  if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
984  {
985  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
986  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
987  ResultReg)
988  .addFrameIndex(Addr.getFI())
989  .addImm(0)
990  .addImm(0);
991  Addr.setKind(Address::RegBase);
992  Addr.setReg(ResultReg);
993  }
994 
995  if (RegisterOffsetNeedsLowering) {
996  unsigned ResultReg = 0;
997  if (Addr.getReg()) {
998  if (Addr.getExtendType() == AArch64_AM::SXTW ||
999  Addr.getExtendType() == AArch64_AM::UXTW )
1000  ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1001  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1002  /*TODO:IsKill=*/false, Addr.getExtendType(),
1003  Addr.getShift());
1004  else
1005  ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1006  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1007  /*TODO:IsKill=*/false, AArch64_AM::LSL,
1008  Addr.getShift());
1009  } else {
1010  if (Addr.getExtendType() == AArch64_AM::UXTW)
1011  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1012  /*Op0IsKill=*/false, Addr.getShift(),
1013  /*IsZExt=*/true);
1014  else if (Addr.getExtendType() == AArch64_AM::SXTW)
1015  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1016  /*Op0IsKill=*/false, Addr.getShift(),
1017  /*IsZExt=*/false);
1018  else
1019  ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1020  /*Op0IsKill=*/false, Addr.getShift());
1021  }
1022  if (!ResultReg)
1023  return false;
1024 
1025  Addr.setReg(ResultReg);
1026  Addr.setOffsetReg(0);
1027  Addr.setShift(0);
1028  Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1029  }
1030 
1031  // Since the offset is too large for the load/store instruction get the
1032  // reg+offset into a register.
1033  if (ImmediateOffsetNeedsLowering) {
1034  unsigned ResultReg;
1035  if (Addr.getReg())
1036  // Try to fold the immediate into the add instruction.
1037  ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1038  else
1039  ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1040 
1041  if (!ResultReg)
1042  return false;
1043  Addr.setReg(ResultReg);
1044  Addr.setOffset(0);
1045  }
1046  return true;
1047 }
1048 
1049 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1050  const MachineInstrBuilder &MIB,
1051  unsigned Flags,
1052  unsigned ScaleFactor,
1053  MachineMemOperand *MMO) {
1054  int64_t Offset = Addr.getOffset() / ScaleFactor;
1055  // Frame base works a bit differently. Handle it separately.
1056  if (Addr.isFIBase()) {
1057  int FI = Addr.getFI();
1058  // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1059  // and alignment should be based on the VT.
1060  MMO = FuncInfo.MF->getMachineMemOperand(
1061  MachinePointerInfo::getFixedStack(FI, Offset), Flags,
1062  MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1063  // Now add the rest of the operands.
1064  MIB.addFrameIndex(FI).addImm(Offset);
1065  } else {
1066  assert(Addr.isRegBase() && "Unexpected address kind.");
1067  const MCInstrDesc &II = MIB->getDesc();
1068  unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1069  Addr.setReg(
1070  constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1071  Addr.setOffsetReg(
1072  constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1073  if (Addr.getOffsetReg()) {
1074  assert(Addr.getOffset() == 0 && "Unexpected offset");
1075  bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1076  Addr.getExtendType() == AArch64_AM::SXTX;
1077  MIB.addReg(Addr.getReg());
1078  MIB.addReg(Addr.getOffsetReg());
1079  MIB.addImm(IsSigned);
1080  MIB.addImm(Addr.getShift() != 0);
1081  } else
1082  MIB.addReg(Addr.getReg()).addImm(Offset);
1083  }
1084 
1085  if (MMO)
1086  MIB.addMemOperand(MMO);
1087 }
1088 
1089 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1090  const Value *RHS, bool SetFlags,
1091  bool WantResult, bool IsZExt) {
1093  bool NeedExtend = false;
1094  switch (RetVT.SimpleTy) {
1095  default:
1096  return 0;
1097  case MVT::i1:
1098  NeedExtend = true;
1099  break;
1100  case MVT::i8:
1101  NeedExtend = true;
1102  ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1103  break;
1104  case MVT::i16:
1105  NeedExtend = true;
1106  ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1107  break;
1108  case MVT::i32: // fall-through
1109  case MVT::i64:
1110  break;
1111  }
1112  MVT SrcVT = RetVT;
1113  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1114 
1115  // Canonicalize immediates to the RHS first.
1116  if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1117  std::swap(LHS, RHS);
1118 
1119  // Canonicalize mul by power of 2 to the RHS.
1120  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1121  if (isMulPowOf2(LHS))
1122  std::swap(LHS, RHS);
1123 
1124  // Canonicalize shift immediate to the RHS.
1125  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1126  if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1127  if (isa<ConstantInt>(SI->getOperand(1)))
1128  if (SI->getOpcode() == Instruction::Shl ||
1129  SI->getOpcode() == Instruction::LShr ||
1130  SI->getOpcode() == Instruction::AShr )
1131  std::swap(LHS, RHS);
1132 
1133  unsigned LHSReg = getRegForValue(LHS);
1134  if (!LHSReg)
1135  return 0;
1136  bool LHSIsKill = hasTrivialKill(LHS);
1137 
1138  if (NeedExtend)
1139  LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1140 
1141  unsigned ResultReg = 0;
1142  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1143  uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1144  if (C->isNegative())
1145  ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1146  SetFlags, WantResult);
1147  else
1148  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1149  WantResult);
1150  } else if (const auto *C = dyn_cast<Constant>(RHS))
1151  if (C->isNullValue())
1152  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1153  WantResult);
1154 
1155  if (ResultReg)
1156  return ResultReg;
1157 
1158  // Only extend the RHS within the instruction if there is a valid extend type.
1159  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1160  isValueAvailable(RHS)) {
1161  if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1162  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1163  if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1164  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1165  if (!RHSReg)
1166  return 0;
1167  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1168  return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1169  RHSIsKill, ExtendType, C->getZExtValue(),
1170  SetFlags, WantResult);
1171  }
1172  unsigned RHSReg = getRegForValue(RHS);
1173  if (!RHSReg)
1174  return 0;
1175  bool RHSIsKill = hasTrivialKill(RHS);
1176  return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1177  ExtendType, 0, SetFlags, WantResult);
1178  }
1179 
1180  // Check if the mul can be folded into the instruction.
1181  if (RHS->hasOneUse() && isValueAvailable(RHS))
1182  if (isMulPowOf2(RHS)) {
1183  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1184  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1185 
1186  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1187  if (C->getValue().isPowerOf2())
1188  std::swap(MulLHS, MulRHS);
1189 
1190  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1191  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1192  unsigned RHSReg = getRegForValue(MulLHS);
1193  if (!RHSReg)
1194  return 0;
1195  bool RHSIsKill = hasTrivialKill(MulLHS);
1196  return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1197  AArch64_AM::LSL, ShiftVal, SetFlags, WantResult);
1198  }
1199 
1200  // Check if the shift can be folded into the instruction.
1201  if (RHS->hasOneUse() && isValueAvailable(RHS))
1202  if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1203  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1205  switch (SI->getOpcode()) {
1206  default: break;
1207  case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1208  case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1209  case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1210  }
1211  uint64_t ShiftVal = C->getZExtValue();
1212  if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1213  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1214  if (!RHSReg)
1215  return 0;
1216  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1217  return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1218  RHSIsKill, ShiftType, ShiftVal, SetFlags,
1219  WantResult);
1220  }
1221  }
1222  }
1223 
1224  unsigned RHSReg = getRegForValue(RHS);
1225  if (!RHSReg)
1226  return 0;
1227  bool RHSIsKill = hasTrivialKill(RHS);
1228 
1229  if (NeedExtend)
1230  RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1231 
1232  return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1233  SetFlags, WantResult);
1234 }
1235 
1236 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1237  bool LHSIsKill, unsigned RHSReg,
1238  bool RHSIsKill, bool SetFlags,
1239  bool WantResult) {
1240  assert(LHSReg && RHSReg && "Invalid register number.");
1241 
1242  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1243  return 0;
1244 
1245  static const unsigned OpcTable[2][2][2] = {
1246  { { AArch64::SUBWrr, AArch64::SUBXrr },
1247  { AArch64::ADDWrr, AArch64::ADDXrr } },
1248  { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1249  { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1250  };
1251  bool Is64Bit = RetVT == MVT::i64;
1252  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1253  const TargetRegisterClass *RC =
1254  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1255  unsigned ResultReg;
1256  if (WantResult)
1257  ResultReg = createResultReg(RC);
1258  else
1259  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1260 
1261  const MCInstrDesc &II = TII.get(Opc);
1262  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1263  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1264  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1265  .addReg(LHSReg, getKillRegState(LHSIsKill))
1266  .addReg(RHSReg, getKillRegState(RHSIsKill));
1267  return ResultReg;
1268 }
1269 
1270 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1271  bool LHSIsKill, uint64_t Imm,
1272  bool SetFlags, bool WantResult) {
1273  assert(LHSReg && "Invalid register number.");
1274 
1275  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1276  return 0;
1277 
1278  unsigned ShiftImm;
1279  if (isUInt<12>(Imm))
1280  ShiftImm = 0;
1281  else if ((Imm & 0xfff000) == Imm) {
1282  ShiftImm = 12;
1283  Imm >>= 12;
1284  } else
1285  return 0;
1286 
1287  static const unsigned OpcTable[2][2][2] = {
1288  { { AArch64::SUBWri, AArch64::SUBXri },
1289  { AArch64::ADDWri, AArch64::ADDXri } },
1290  { { AArch64::SUBSWri, AArch64::SUBSXri },
1291  { AArch64::ADDSWri, AArch64::ADDSXri } }
1292  };
1293  bool Is64Bit = RetVT == MVT::i64;
1294  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1295  const TargetRegisterClass *RC;
1296  if (SetFlags)
1297  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1298  else
1299  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1300  unsigned ResultReg;
1301  if (WantResult)
1302  ResultReg = createResultReg(RC);
1303  else
1304  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1305 
1306  const MCInstrDesc &II = TII.get(Opc);
1307  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1308  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1309  .addReg(LHSReg, getKillRegState(LHSIsKill))
1310  .addImm(Imm)
1311  .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1312  return ResultReg;
1313 }
1314 
1315 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1316  bool LHSIsKill, unsigned RHSReg,
1317  bool RHSIsKill,
1318  AArch64_AM::ShiftExtendType ShiftType,
1319  uint64_t ShiftImm, bool SetFlags,
1320  bool WantResult) {
1321  assert(LHSReg && RHSReg && "Invalid register number.");
1322 
1323  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1324  return 0;
1325 
1326  static const unsigned OpcTable[2][2][2] = {
1327  { { AArch64::SUBWrs, AArch64::SUBXrs },
1328  { AArch64::ADDWrs, AArch64::ADDXrs } },
1329  { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1330  { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1331  };
1332  bool Is64Bit = RetVT == MVT::i64;
1333  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1334  const TargetRegisterClass *RC =
1335  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1336  unsigned ResultReg;
1337  if (WantResult)
1338  ResultReg = createResultReg(RC);
1339  else
1340  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1341 
1342  const MCInstrDesc &II = TII.get(Opc);
1343  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1344  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1345  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1346  .addReg(LHSReg, getKillRegState(LHSIsKill))
1347  .addReg(RHSReg, getKillRegState(RHSIsKill))
1348  .addImm(getShifterImm(ShiftType, ShiftImm));
1349  return ResultReg;
1350 }
1351 
1352 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1353  bool LHSIsKill, unsigned RHSReg,
1354  bool RHSIsKill,
1356  uint64_t ShiftImm, bool SetFlags,
1357  bool WantResult) {
1358  assert(LHSReg && RHSReg && "Invalid register number.");
1359 
1360  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1361  return 0;
1362 
1363  static const unsigned OpcTable[2][2][2] = {
1364  { { AArch64::SUBWrx, AArch64::SUBXrx },
1365  { AArch64::ADDWrx, AArch64::ADDXrx } },
1366  { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1367  { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1368  };
1369  bool Is64Bit = RetVT == MVT::i64;
1370  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1371  const TargetRegisterClass *RC = nullptr;
1372  if (SetFlags)
1373  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1374  else
1375  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1376  unsigned ResultReg;
1377  if (WantResult)
1378  ResultReg = createResultReg(RC);
1379  else
1380  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1381 
1382  const MCInstrDesc &II = TII.get(Opc);
1383  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1384  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1385  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1386  .addReg(LHSReg, getKillRegState(LHSIsKill))
1387  .addReg(RHSReg, getKillRegState(RHSIsKill))
1388  .addImm(getArithExtendImm(ExtType, ShiftImm));
1389  return ResultReg;
1390 }
1391 
1392 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1393  Type *Ty = LHS->getType();
1394  EVT EVT = TLI.getValueType(DL, Ty, true);
1395  if (!EVT.isSimple())
1396  return false;
1397  MVT VT = EVT.getSimpleVT();
1398 
1399  switch (VT.SimpleTy) {
1400  default:
1401  return false;
1402  case MVT::i1:
1403  case MVT::i8:
1404  case MVT::i16:
1405  case MVT::i32:
1406  case MVT::i64:
1407  return emitICmp(VT, LHS, RHS, IsZExt);
1408  case MVT::f32:
1409  case MVT::f64:
1410  return emitFCmp(VT, LHS, RHS);
1411  }
1412 }
1413 
1414 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1415  bool IsZExt) {
1416  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1417  IsZExt) != 0;
1418 }
1419 
1420 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1421  uint64_t Imm) {
1422  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1423  /*SetFlags=*/true, /*WantResult=*/false) != 0;
1424 }
1425 
1426 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1427  if (RetVT != MVT::f32 && RetVT != MVT::f64)
1428  return false;
1429 
1430  // Check to see if the 2nd operand is a constant that we can encode directly
1431  // in the compare.
1432  bool UseImm = false;
1433  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1434  if (CFP->isZero() && !CFP->isNegative())
1435  UseImm = true;
1436 
1437  unsigned LHSReg = getRegForValue(LHS);
1438  if (!LHSReg)
1439  return false;
1440  bool LHSIsKill = hasTrivialKill(LHS);
1441 
1442  if (UseImm) {
1443  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1444  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1445  .addReg(LHSReg, getKillRegState(LHSIsKill));
1446  return true;
1447  }
1448 
1449  unsigned RHSReg = getRegForValue(RHS);
1450  if (!RHSReg)
1451  return false;
1452  bool RHSIsKill = hasTrivialKill(RHS);
1453 
1454  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1455  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1456  .addReg(LHSReg, getKillRegState(LHSIsKill))
1457  .addReg(RHSReg, getKillRegState(RHSIsKill));
1458  return true;
1459 }
1460 
1461 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1462  bool SetFlags, bool WantResult, bool IsZExt) {
1463  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1464  IsZExt);
1465 }
1466 
1467 /// \brief This method is a wrapper to simplify add emission.
1468 ///
1469 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1470 /// that fails, then try to materialize the immediate into a register and use
1471 /// emitAddSub_rr instead.
1472 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1473  int64_t Imm) {
1474  unsigned ResultReg;
1475  if (Imm < 0)
1476  ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1477  else
1478  ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1479 
1480  if (ResultReg)
1481  return ResultReg;
1482 
1483  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1484  if (!CReg)
1485  return 0;
1486 
1487  ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1488  return ResultReg;
1489 }
1490 
1491 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1492  bool SetFlags, bool WantResult, bool IsZExt) {
1493  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1494  IsZExt);
1495 }
1496 
1497 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1498  bool LHSIsKill, unsigned RHSReg,
1499  bool RHSIsKill, bool WantResult) {
1500  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1501  RHSIsKill, /*SetFlags=*/true, WantResult);
1502 }
1503 
1504 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1505  bool LHSIsKill, unsigned RHSReg,
1506  bool RHSIsKill,
1507  AArch64_AM::ShiftExtendType ShiftType,
1508  uint64_t ShiftImm, bool WantResult) {
1509  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1510  RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1511  WantResult);
1512 }
1513 
1514 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1515  const Value *LHS, const Value *RHS) {
1516  // Canonicalize immediates to the RHS first.
1517  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1518  std::swap(LHS, RHS);
1519 
1520  // Canonicalize mul by power-of-2 to the RHS.
1521  if (LHS->hasOneUse() && isValueAvailable(LHS))
1522  if (isMulPowOf2(LHS))
1523  std::swap(LHS, RHS);
1524 
1525  // Canonicalize shift immediate to the RHS.
1526  if (LHS->hasOneUse() && isValueAvailable(LHS))
1527  if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1528  if (isa<ConstantInt>(SI->getOperand(1)))
1529  std::swap(LHS, RHS);
1530 
1531  unsigned LHSReg = getRegForValue(LHS);
1532  if (!LHSReg)
1533  return 0;
1534  bool LHSIsKill = hasTrivialKill(LHS);
1535 
1536  unsigned ResultReg = 0;
1537  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1538  uint64_t Imm = C->getZExtValue();
1539  ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1540  }
1541  if (ResultReg)
1542  return ResultReg;
1543 
1544  // Check if the mul can be folded into the instruction.
1545  if (RHS->hasOneUse() && isValueAvailable(RHS))
1546  if (isMulPowOf2(RHS)) {
1547  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1548  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1549 
1550  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1551  if (C->getValue().isPowerOf2())
1552  std::swap(MulLHS, MulRHS);
1553 
1554  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1555  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1556 
1557  unsigned RHSReg = getRegForValue(MulLHS);
1558  if (!RHSReg)
1559  return 0;
1560  bool RHSIsKill = hasTrivialKill(MulLHS);
1561  return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1562  RHSIsKill, ShiftVal);
1563  }
1564 
1565  // Check if the shift can be folded into the instruction.
1566  if (RHS->hasOneUse() && isValueAvailable(RHS))
1567  if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1568  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1569  uint64_t ShiftVal = C->getZExtValue();
1570  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1571  if (!RHSReg)
1572  return 0;
1573  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1574  return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1575  RHSIsKill, ShiftVal);
1576  }
1577 
1578  unsigned RHSReg = getRegForValue(RHS);
1579  if (!RHSReg)
1580  return 0;
1581  bool RHSIsKill = hasTrivialKill(RHS);
1582 
1583  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1584  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1585  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1586  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1587  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1588  }
1589  return ResultReg;
1590 }
1591 
1592 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1593  unsigned LHSReg, bool LHSIsKill,
1594  uint64_t Imm) {
1595  assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1596  "ISD nodes are not consecutive!");
1597  static const unsigned OpcTable[3][2] = {
1598  { AArch64::ANDWri, AArch64::ANDXri },
1599  { AArch64::ORRWri, AArch64::ORRXri },
1600  { AArch64::EORWri, AArch64::EORXri }
1601  };
1602  const TargetRegisterClass *RC;
1603  unsigned Opc;
1604  unsigned RegSize;
1605  switch (RetVT.SimpleTy) {
1606  default:
1607  return 0;
1608  case MVT::i1:
1609  case MVT::i8:
1610  case MVT::i16:
1611  case MVT::i32: {
1612  unsigned Idx = ISDOpc - ISD::AND;
1613  Opc = OpcTable[Idx][0];
1614  RC = &AArch64::GPR32spRegClass;
1615  RegSize = 32;
1616  break;
1617  }
1618  case MVT::i64:
1619  Opc = OpcTable[ISDOpc - ISD::AND][1];
1620  RC = &AArch64::GPR64spRegClass;
1621  RegSize = 64;
1622  break;
1623  }
1624 
1625  if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1626  return 0;
1627 
1628  unsigned ResultReg =
1629  fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1630  AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1631  if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1632  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1633  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1634  }
1635  return ResultReg;
1636 }
1637 
1638 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1639  unsigned LHSReg, bool LHSIsKill,
1640  unsigned RHSReg, bool RHSIsKill,
1641  uint64_t ShiftImm) {
1642  assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1643  "ISD nodes are not consecutive!");
1644  static const unsigned OpcTable[3][2] = {
1645  { AArch64::ANDWrs, AArch64::ANDXrs },
1646  { AArch64::ORRWrs, AArch64::ORRXrs },
1647  { AArch64::EORWrs, AArch64::EORXrs }
1648  };
1649  const TargetRegisterClass *RC;
1650  unsigned Opc;
1651  switch (RetVT.SimpleTy) {
1652  default:
1653  return 0;
1654  case MVT::i1:
1655  case MVT::i8:
1656  case MVT::i16:
1657  case MVT::i32:
1658  Opc = OpcTable[ISDOpc - ISD::AND][0];
1659  RC = &AArch64::GPR32RegClass;
1660  break;
1661  case MVT::i64:
1662  Opc = OpcTable[ISDOpc - ISD::AND][1];
1663  RC = &AArch64::GPR64RegClass;
1664  break;
1665  }
1666  unsigned ResultReg =
1667  fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1669  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1670  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1671  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1672  }
1673  return ResultReg;
1674 }
1675 
1676 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1677  uint64_t Imm) {
1678  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1679 }
1680 
1681 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1682  bool WantZExt, MachineMemOperand *MMO) {
1683  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1684  return 0;
1685 
1686  // Simplify this down to something we can handle.
1687  if (!simplifyAddress(Addr, VT))
1688  return 0;
1689 
1690  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1691  if (!ScaleFactor)
1692  llvm_unreachable("Unexpected value type.");
1693 
1694  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1695  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1696  bool UseScaled = true;
1697  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1698  UseScaled = false;
1699  ScaleFactor = 1;
1700  }
1701 
1702  static const unsigned GPOpcTable[2][8][4] = {
1703  // Sign-extend.
1704  { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1705  AArch64::LDURXi },
1706  { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1707  AArch64::LDURXi },
1708  { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1709  AArch64::LDRXui },
1710  { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1711  AArch64::LDRXui },
1712  { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1713  AArch64::LDRXroX },
1714  { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1715  AArch64::LDRXroX },
1716  { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1717  AArch64::LDRXroW },
1718  { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1719  AArch64::LDRXroW }
1720  },
1721  // Zero-extend.
1722  { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1723  AArch64::LDURXi },
1724  { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1725  AArch64::LDURXi },
1726  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1727  AArch64::LDRXui },
1728  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1729  AArch64::LDRXui },
1730  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1731  AArch64::LDRXroX },
1732  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1733  AArch64::LDRXroX },
1734  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1735  AArch64::LDRXroW },
1736  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1737  AArch64::LDRXroW }
1738  }
1739  };
1740 
1741  static const unsigned FPOpcTable[4][2] = {
1742  { AArch64::LDURSi, AArch64::LDURDi },
1743  { AArch64::LDRSui, AArch64::LDRDui },
1744  { AArch64::LDRSroX, AArch64::LDRDroX },
1745  { AArch64::LDRSroW, AArch64::LDRDroW }
1746  };
1747 
1748  unsigned Opc;
1749  const TargetRegisterClass *RC;
1750  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1751  Addr.getOffsetReg();
1752  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1753  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1754  Addr.getExtendType() == AArch64_AM::SXTW)
1755  Idx++;
1756 
1757  bool IsRet64Bit = RetVT == MVT::i64;
1758  switch (VT.SimpleTy) {
1759  default:
1760  llvm_unreachable("Unexpected value type.");
1761  case MVT::i1: // Intentional fall-through.
1762  case MVT::i8:
1763  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1764  RC = (IsRet64Bit && !WantZExt) ?
1765  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1766  break;
1767  case MVT::i16:
1768  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1769  RC = (IsRet64Bit && !WantZExt) ?
1770  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1771  break;
1772  case MVT::i32:
1773  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1774  RC = (IsRet64Bit && !WantZExt) ?
1775  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1776  break;
1777  case MVT::i64:
1778  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1779  RC = &AArch64::GPR64RegClass;
1780  break;
1781  case MVT::f32:
1782  Opc = FPOpcTable[Idx][0];
1783  RC = &AArch64::FPR32RegClass;
1784  break;
1785  case MVT::f64:
1786  Opc = FPOpcTable[Idx][1];
1787  RC = &AArch64::FPR64RegClass;
1788  break;
1789  }
1790 
1791  // Create the base instruction, then add the operands.
1792  unsigned ResultReg = createResultReg(RC);
1793  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1794  TII.get(Opc), ResultReg);
1795  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1796 
1797  // Loading an i1 requires special handling.
1798  if (VT == MVT::i1) {
1799  unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1800  assert(ANDReg && "Unexpected AND instruction emission failure.");
1801  ResultReg = ANDReg;
1802  }
1803 
1804  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1805  // the 32bit reg to a 64bit reg.
1806  if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1807  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1808  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1809  TII.get(AArch64::SUBREG_TO_REG), Reg64)
1810  .addImm(0)
1811  .addReg(ResultReg, getKillRegState(true))
1812  .addImm(AArch64::sub_32);
1813  ResultReg = Reg64;
1814  }
1815  return ResultReg;
1816 }
1817 
1818 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1819  MVT VT;
1820  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1821  return false;
1822 
1823  if (VT.isVector())
1824  return selectOperator(I, I->getOpcode());
1825 
1826  unsigned ResultReg;
1827  switch (I->getOpcode()) {
1828  default:
1829  llvm_unreachable("Unexpected instruction.");
1830  case Instruction::Add:
1831  ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1832  break;
1833  case Instruction::Sub:
1834  ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1835  break;
1836  }
1837  if (!ResultReg)
1838  return false;
1839 
1840  updateValueMap(I, ResultReg);
1841  return true;
1842 }
1843 
1844 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1845  MVT VT;
1846  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1847  return false;
1848 
1849  if (VT.isVector())
1850  return selectOperator(I, I->getOpcode());
1851 
1852  unsigned ResultReg;
1853  switch (I->getOpcode()) {
1854  default:
1855  llvm_unreachable("Unexpected instruction.");
1856  case Instruction::And:
1857  ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1858  break;
1859  case Instruction::Or:
1860  ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1861  break;
1862  case Instruction::Xor:
1863  ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1864  break;
1865  }
1866  if (!ResultReg)
1867  return false;
1868 
1869  updateValueMap(I, ResultReg);
1870  return true;
1871 }
1872 
1873 bool AArch64FastISel::selectLoad(const Instruction *I) {
1874  MVT VT;
1875  // Verify we have a legal type before going any further. Currently, we handle
1876  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1877  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1878  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1879  cast<LoadInst>(I)->isAtomic())
1880  return false;
1881 
1882  // See if we can handle this address.
1883  Address Addr;
1884  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1885  return false;
1886 
1887  // Fold the following sign-/zero-extend into the load instruction.
1888  bool WantZExt = true;
1889  MVT RetVT = VT;
1890  const Value *IntExtVal = nullptr;
1891  if (I->hasOneUse()) {
1892  if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1893  if (isTypeSupported(ZE->getType(), RetVT))
1894  IntExtVal = ZE;
1895  else
1896  RetVT = VT;
1897  } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1898  if (isTypeSupported(SE->getType(), RetVT))
1899  IntExtVal = SE;
1900  else
1901  RetVT = VT;
1902  WantZExt = false;
1903  }
1904  }
1905 
1906  unsigned ResultReg =
1907  emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1908  if (!ResultReg)
1909  return false;
1910 
1911  // There are a few different cases we have to handle, because the load or the
1912  // sign-/zero-extend might not be selected by FastISel if we fall-back to
1913  // SelectionDAG. There is also an ordering issue when both instructions are in
1914  // different basic blocks.
1915  // 1.) The load instruction is selected by FastISel, but the integer extend
1916  // not. This usually happens when the integer extend is in a different
1917  // basic block and SelectionDAG took over for that basic block.
1918  // 2.) The load instruction is selected before the integer extend. This only
1919  // happens when the integer extend is in a different basic block.
1920  // 3.) The load instruction is selected by SelectionDAG and the integer extend
1921  // by FastISel. This happens if there are instructions between the load
1922  // and the integer extend that couldn't be selected by FastISel.
1923  if (IntExtVal) {
1924  // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
1925  // could select it. Emit a copy to subreg if necessary. FastISel will remove
1926  // it when it selects the integer extend.
1927  unsigned Reg = lookUpRegForValue(IntExtVal);
1928  auto *MI = MRI.getUniqueVRegDef(Reg);
1929  if (!MI) {
1930  if (RetVT == MVT::i64 && VT <= MVT::i32) {
1931  if (WantZExt) {
1932  // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
1933  std::prev(FuncInfo.InsertPt)->eraseFromParent();
1934  ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
1935  } else
1936  ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
1937  /*IsKill=*/true,
1938  AArch64::sub_32);
1939  }
1940  updateValueMap(I, ResultReg);
1941  return true;
1942  }
1943 
1944  // The integer extend has already been emitted - delete all the instructions
1945  // that have been emitted by the integer extend lowering code and use the
1946  // result from the load instruction directly.
1947  while (MI) {
1948  Reg = 0;
1949  for (auto &Opnd : MI->uses()) {
1950  if (Opnd.isReg()) {
1951  Reg = Opnd.getReg();
1952  break;
1953  }
1954  }
1955  MI->eraseFromParent();
1956  MI = nullptr;
1957  if (Reg)
1958  MI = MRI.getUniqueVRegDef(Reg);
1959  }
1960  updateValueMap(IntExtVal, ResultReg);
1961  return true;
1962  }
1963 
1964  updateValueMap(I, ResultReg);
1965  return true;
1966 }
1967 
1968 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
1969  MachineMemOperand *MMO) {
1970  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1971  return false;
1972 
1973  // Simplify this down to something we can handle.
1974  if (!simplifyAddress(Addr, VT))
1975  return false;
1976 
1977  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1978  if (!ScaleFactor)
1979  llvm_unreachable("Unexpected value type.");
1980 
1981  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1982  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1983  bool UseScaled = true;
1984  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1985  UseScaled = false;
1986  ScaleFactor = 1;
1987  }
1988 
1989  static const unsigned OpcTable[4][6] = {
1990  { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
1991  AArch64::STURSi, AArch64::STURDi },
1992  { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
1993  AArch64::STRSui, AArch64::STRDui },
1994  { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
1995  AArch64::STRSroX, AArch64::STRDroX },
1996  { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
1997  AArch64::STRSroW, AArch64::STRDroW }
1998  };
1999 
2000  unsigned Opc;
2001  bool VTIsi1 = false;
2002  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2003  Addr.getOffsetReg();
2004  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2005  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2006  Addr.getExtendType() == AArch64_AM::SXTW)
2007  Idx++;
2008 
2009  switch (VT.SimpleTy) {
2010  default: llvm_unreachable("Unexpected value type.");
2011  case MVT::i1: VTIsi1 = true;
2012  case MVT::i8: Opc = OpcTable[Idx][0]; break;
2013  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2014  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2015  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2016  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2017  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2018  }
2019 
2020  // Storing an i1 requires special handling.
2021  if (VTIsi1 && SrcReg != AArch64::WZR) {
2022  unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2023  assert(ANDReg && "Unexpected AND instruction emission failure.");
2024  SrcReg = ANDReg;
2025  }
2026  // Create the base instruction, then add the operands.
2027  const MCInstrDesc &II = TII.get(Opc);
2028  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2029  MachineInstrBuilder MIB =
2030  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2031  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2032 
2033  return true;
2034 }
2035 
2036 bool AArch64FastISel::selectStore(const Instruction *I) {
2037  MVT VT;
2038  const Value *Op0 = I->getOperand(0);
2039  // Verify we have a legal type before going any further. Currently, we handle
2040  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2041  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2042  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) ||
2043  cast<StoreInst>(I)->isAtomic())
2044  return false;
2045 
2046  // Get the value to be stored into a register. Use the zero register directly
2047  // when possible to avoid an unnecessary copy and a wasted register.
2048  unsigned SrcReg = 0;
2049  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2050  if (CI->isZero())
2051  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2052  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2053  if (CF->isZero() && !CF->isNegative()) {
2054  VT = MVT::getIntegerVT(VT.getSizeInBits());
2055  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2056  }
2057  }
2058 
2059  if (!SrcReg)
2060  SrcReg = getRegForValue(Op0);
2061 
2062  if (!SrcReg)
2063  return false;
2064 
2065  // See if we can handle this address.
2066  Address Addr;
2067  if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
2068  return false;
2069 
2070  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2071  return false;
2072  return true;
2073 }
2074 
2076  switch (Pred) {
2077  case CmpInst::FCMP_ONE:
2078  case CmpInst::FCMP_UEQ:
2079  default:
2080  // AL is our "false" for now. The other two need more compares.
2081  return AArch64CC::AL;
2082  case CmpInst::ICMP_EQ:
2083  case CmpInst::FCMP_OEQ:
2084  return AArch64CC::EQ;
2085  case CmpInst::ICMP_SGT:
2086  case CmpInst::FCMP_OGT:
2087  return AArch64CC::GT;
2088  case CmpInst::ICMP_SGE:
2089  case CmpInst::FCMP_OGE:
2090  return AArch64CC::GE;
2091  case CmpInst::ICMP_UGT:
2092  case CmpInst::FCMP_UGT:
2093  return AArch64CC::HI;
2094  case CmpInst::FCMP_OLT:
2095  return AArch64CC::MI;
2096  case CmpInst::ICMP_ULE:
2097  case CmpInst::FCMP_OLE:
2098  return AArch64CC::LS;
2099  case CmpInst::FCMP_ORD:
2100  return AArch64CC::VC;
2101  case CmpInst::FCMP_UNO:
2102  return AArch64CC::VS;
2103  case CmpInst::FCMP_UGE:
2104  return AArch64CC::PL;
2105  case CmpInst::ICMP_SLT:
2106  case CmpInst::FCMP_ULT:
2107  return AArch64CC::LT;
2108  case CmpInst::ICMP_SLE:
2109  case CmpInst::FCMP_ULE:
2110  return AArch64CC::LE;
2111  case CmpInst::FCMP_UNE:
2112  case CmpInst::ICMP_NE:
2113  return AArch64CC::NE;
2114  case CmpInst::ICMP_UGE:
2115  return AArch64CC::HS;
2116  case CmpInst::ICMP_ULT:
2117  return AArch64CC::LO;
2118  }
2119 }
2120 
2121 /// \brief Try to emit a combined compare-and-branch instruction.
2122 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2123  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2124  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2125  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2126 
2127  const Value *LHS = CI->getOperand(0);
2128  const Value *RHS = CI->getOperand(1);
2129 
2130  MVT VT;
2131  if (!isTypeSupported(LHS->getType(), VT))
2132  return false;
2133 
2134  unsigned BW = VT.getSizeInBits();
2135  if (BW > 64)
2136  return false;
2137 
2138  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2139  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2140 
2141  // Try to take advantage of fallthrough opportunities.
2142  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2143  std::swap(TBB, FBB);
2144  Predicate = CmpInst::getInversePredicate(Predicate);
2145  }
2146 
2147  int TestBit = -1;
2148  bool IsCmpNE;
2149  switch (Predicate) {
2150  default:
2151  return false;
2152  case CmpInst::ICMP_EQ:
2153  case CmpInst::ICMP_NE:
2154  if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2155  std::swap(LHS, RHS);
2156 
2157  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2158  return false;
2159 
2160  if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2161  if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2162  const Value *AndLHS = AI->getOperand(0);
2163  const Value *AndRHS = AI->getOperand(1);
2164 
2165  if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2166  if (C->getValue().isPowerOf2())
2167  std::swap(AndLHS, AndRHS);
2168 
2169  if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2170  if (C->getValue().isPowerOf2()) {
2171  TestBit = C->getValue().logBase2();
2172  LHS = AndLHS;
2173  }
2174  }
2175 
2176  if (VT == MVT::i1)
2177  TestBit = 0;
2178 
2179  IsCmpNE = Predicate == CmpInst::ICMP_NE;
2180  break;
2181  case CmpInst::ICMP_SLT:
2182  case CmpInst::ICMP_SGE:
2183  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2184  return false;
2185 
2186  TestBit = BW - 1;
2187  IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2188  break;
2189  case CmpInst::ICMP_SGT:
2190  case CmpInst::ICMP_SLE:
2191  if (!isa<ConstantInt>(RHS))
2192  return false;
2193 
2194  if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2195  return false;
2196 
2197  TestBit = BW - 1;
2198  IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2199  break;
2200  } // end switch
2201 
2202  static const unsigned OpcTable[2][2][2] = {
2203  { {AArch64::CBZW, AArch64::CBZX },
2204  {AArch64::CBNZW, AArch64::CBNZX} },
2205  { {AArch64::TBZW, AArch64::TBZX },
2206  {AArch64::TBNZW, AArch64::TBNZX} }
2207  };
2208 
2209  bool IsBitTest = TestBit != -1;
2210  bool Is64Bit = BW == 64;
2211  if (TestBit < 32 && TestBit >= 0)
2212  Is64Bit = false;
2213 
2214  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2215  const MCInstrDesc &II = TII.get(Opc);
2216 
2217  unsigned SrcReg = getRegForValue(LHS);
2218  if (!SrcReg)
2219  return false;
2220  bool SrcIsKill = hasTrivialKill(LHS);
2221 
2222  if (BW == 64 && !Is64Bit)
2223  SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2224  AArch64::sub_32);
2225 
2226  if ((BW < 32) && !IsBitTest)
2227  SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2228 
2229  // Emit the combined compare and branch instruction.
2230  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2231  MachineInstrBuilder MIB =
2232  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2233  .addReg(SrcReg, getKillRegState(SrcIsKill));
2234  if (IsBitTest)
2235  MIB.addImm(TestBit);
2236  MIB.addMBB(TBB);
2237 
2238  // Obtain the branch weight and add the TrueBB to the successor list.
2239  uint32_t BranchWeight = 0;
2240  if (FuncInfo.BPI)
2241  BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2242  TBB->getBasicBlock());
2243  FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2244  fastEmitBranch(FBB, DbgLoc);
2245 
2246  return true;
2247 }
2248 
2249 bool AArch64FastISel::selectBranch(const Instruction *I) {
2250  const BranchInst *BI = cast<BranchInst>(I);
2251  if (BI->isUnconditional()) {
2252  MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2253  fastEmitBranch(MSucc, BI->getDebugLoc());
2254  return true;
2255  }
2256 
2257  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2258  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2259 
2261  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2262  if (CI->hasOneUse() && isValueAvailable(CI)) {
2263  // Try to optimize or fold the cmp.
2264  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2265  switch (Predicate) {
2266  default:
2267  break;
2268  case CmpInst::FCMP_FALSE:
2269  fastEmitBranch(FBB, DbgLoc);
2270  return true;
2271  case CmpInst::FCMP_TRUE:
2272  fastEmitBranch(TBB, DbgLoc);
2273  return true;
2274  }
2275 
2276  // Try to emit a combined compare-and-branch first.
2277  if (emitCompareAndBranch(BI))
2278  return true;
2279 
2280  // Try to take advantage of fallthrough opportunities.
2281  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2282  std::swap(TBB, FBB);
2283  Predicate = CmpInst::getInversePredicate(Predicate);
2284  }
2285 
2286  // Emit the cmp.
2287  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2288  return false;
2289 
2290  // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2291  // instruction.
2292  CC = getCompareCC(Predicate);
2294  switch (Predicate) {
2295  default:
2296  break;
2297  case CmpInst::FCMP_UEQ:
2298  ExtraCC = AArch64CC::EQ;
2299  CC = AArch64CC::VS;
2300  break;
2301  case CmpInst::FCMP_ONE:
2302  ExtraCC = AArch64CC::MI;
2303  CC = AArch64CC::GT;
2304  break;
2305  }
2306  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2307 
2308  // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2309  if (ExtraCC != AArch64CC::AL) {
2310  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2311  .addImm(ExtraCC)
2312  .addMBB(TBB);
2313  }
2314 
2315  // Emit the branch.
2316  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2317  .addImm(CC)
2318  .addMBB(TBB);
2319 
2320  // Obtain the branch weight and add the TrueBB to the successor list.
2321  uint32_t BranchWeight = 0;
2322  if (FuncInfo.BPI)
2323  BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2324  TBB->getBasicBlock());
2325  FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2326 
2327  fastEmitBranch(FBB, DbgLoc);
2328  return true;
2329  }
2330  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
2331  MVT SrcVT;
2332  if (TI->hasOneUse() && isValueAvailable(TI) &&
2333  isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) {
2334  unsigned CondReg = getRegForValue(TI->getOperand(0));
2335  if (!CondReg)
2336  return false;
2337  bool CondIsKill = hasTrivialKill(TI->getOperand(0));
2338 
2339  // Issue an extract_subreg to get the lower 32-bits.
2340  if (SrcVT == MVT::i64) {
2341  CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
2342  AArch64::sub_32);
2343  CondIsKill = true;
2344  }
2345 
2346  unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
2347  assert(ANDReg && "Unexpected AND instruction emission failure.");
2348  emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
2349 
2350  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2351  std::swap(TBB, FBB);
2352  CC = AArch64CC::EQ;
2353  }
2354  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2355  .addImm(CC)
2356  .addMBB(TBB);
2357 
2358  // Obtain the branch weight and add the TrueBB to the successor list.
2359  uint32_t BranchWeight = 0;
2360  if (FuncInfo.BPI)
2361  BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2362  TBB->getBasicBlock());
2363  FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2364 
2365  fastEmitBranch(FBB, DbgLoc);
2366  return true;
2367  }
2368  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2369  uint64_t Imm = CI->getZExtValue();
2370  MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2371  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2372  .addMBB(Target);
2373 
2374  // Obtain the branch weight and add the target to the successor list.
2375  uint32_t BranchWeight = 0;
2376  if (FuncInfo.BPI)
2377  BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2378  Target->getBasicBlock());
2379  FuncInfo.MBB->addSuccessor(Target, BranchWeight);
2380  return true;
2381  } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2382  // Fake request the condition, otherwise the intrinsic might be completely
2383  // optimized away.
2384  unsigned CondReg = getRegForValue(BI->getCondition());
2385  if (!CondReg)
2386  return false;
2387 
2388  // Emit the branch.
2389  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2390  .addImm(CC)
2391  .addMBB(TBB);
2392 
2393  // Obtain the branch weight and add the TrueBB to the successor list.
2394  uint32_t BranchWeight = 0;
2395  if (FuncInfo.BPI)
2396  BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2397  TBB->getBasicBlock());
2398  FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2399 
2400  fastEmitBranch(FBB, DbgLoc);
2401  return true;
2402  }
2403 
2404  unsigned CondReg = getRegForValue(BI->getCondition());
2405  if (CondReg == 0)
2406  return false;
2407  bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2408 
2409  // We've been divorced from our compare! Our block was split, and
2410  // now our compare lives in a predecessor block. We musn't
2411  // re-compare here, as the children of the compare aren't guaranteed
2412  // live across the block boundary (we *could* check for this).
2413  // Regardless, the compare has been done in the predecessor block,
2414  // and it left a value for us in a virtual register. Ergo, we test
2415  // the one-bit value left in the virtual register.
2416  emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
2417 
2418  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2419  std::swap(TBB, FBB);
2420  CC = AArch64CC::EQ;
2421  }
2422 
2423  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2424  .addImm(CC)
2425  .addMBB(TBB);
2426 
2427  // Obtain the branch weight and add the TrueBB to the successor list.
2428  uint32_t BranchWeight = 0;
2429  if (FuncInfo.BPI)
2430  BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2431  TBB->getBasicBlock());
2432  FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2433 
2434  fastEmitBranch(FBB, DbgLoc);
2435  return true;
2436 }
2437 
2438 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2439  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2440  unsigned AddrReg = getRegForValue(BI->getOperand(0));
2441  if (AddrReg == 0)
2442  return false;
2443 
2444  // Emit the indirect branch.
2445  const MCInstrDesc &II = TII.get(AArch64::BR);
2446  AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2447  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2448 
2449  // Make sure the CFG is up-to-date.
2450  for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
2451  FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
2452 
2453  return true;
2454 }
2455 
2456 bool AArch64FastISel::selectCmp(const Instruction *I) {
2457  const CmpInst *CI = cast<CmpInst>(I);
2458 
2459  // Try to optimize or fold the cmp.
2460  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2461  unsigned ResultReg = 0;
2462  switch (Predicate) {
2463  default:
2464  break;
2465  case CmpInst::FCMP_FALSE:
2466  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2467  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2468  TII.get(TargetOpcode::COPY), ResultReg)
2469  .addReg(AArch64::WZR, getKillRegState(true));
2470  break;
2471  case CmpInst::FCMP_TRUE:
2472  ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2473  break;
2474  }
2475 
2476  if (ResultReg) {
2477  updateValueMap(I, ResultReg);
2478  return true;
2479  }
2480 
2481  // Emit the cmp.
2482  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2483  return false;
2484 
2485  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2486 
2487  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2488  // condition codes are inverted, because they are used by CSINC.
2489  static unsigned CondCodeTable[2][2] = {
2492  };
2493  unsigned *CondCodes = nullptr;
2494  switch (Predicate) {
2495  default:
2496  break;
2497  case CmpInst::FCMP_UEQ:
2498  CondCodes = &CondCodeTable[0][0];
2499  break;
2500  case CmpInst::FCMP_ONE:
2501  CondCodes = &CondCodeTable[1][0];
2502  break;
2503  }
2504 
2505  if (CondCodes) {
2506  unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2507  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2508  TmpReg1)
2509  .addReg(AArch64::WZR, getKillRegState(true))
2510  .addReg(AArch64::WZR, getKillRegState(true))
2511  .addImm(CondCodes[0]);
2512  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2513  ResultReg)
2514  .addReg(TmpReg1, getKillRegState(true))
2515  .addReg(AArch64::WZR, getKillRegState(true))
2516  .addImm(CondCodes[1]);
2517 
2518  updateValueMap(I, ResultReg);
2519  return true;
2520  }
2521 
2522  // Now set a register based on the comparison.
2523  AArch64CC::CondCode CC = getCompareCC(Predicate);
2524  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2525  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2526  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2527  ResultReg)
2528  .addReg(AArch64::WZR, getKillRegState(true))
2529  .addReg(AArch64::WZR, getKillRegState(true))
2530  .addImm(invertedCC);
2531 
2532  updateValueMap(I, ResultReg);
2533  return true;
2534 }
2535 
2536 /// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
2537 /// value.
2538 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2539  if (!SI->getType()->isIntegerTy(1))
2540  return false;
2541 
2542  const Value *Src1Val, *Src2Val;
2543  unsigned Opc = 0;
2544  bool NeedExtraOp = false;
2545  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2546  if (CI->isOne()) {
2547  Src1Val = SI->getCondition();
2548  Src2Val = SI->getFalseValue();
2549  Opc = AArch64::ORRWrr;
2550  } else {
2551  assert(CI->isZero());
2552  Src1Val = SI->getFalseValue();
2553  Src2Val = SI->getCondition();
2554  Opc = AArch64::BICWrr;
2555  }
2556  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2557  if (CI->isOne()) {
2558  Src1Val = SI->getCondition();
2559  Src2Val = SI->getTrueValue();
2560  Opc = AArch64::ORRWrr;
2561  NeedExtraOp = true;
2562  } else {
2563  assert(CI->isZero());
2564  Src1Val = SI->getCondition();
2565  Src2Val = SI->getTrueValue();
2566  Opc = AArch64::ANDWrr;
2567  }
2568  }
2569 
2570  if (!Opc)
2571  return false;
2572 
2573  unsigned Src1Reg = getRegForValue(Src1Val);
2574  if (!Src1Reg)
2575  return false;
2576  bool Src1IsKill = hasTrivialKill(Src1Val);
2577 
2578  unsigned Src2Reg = getRegForValue(Src2Val);
2579  if (!Src2Reg)
2580  return false;
2581  bool Src2IsKill = hasTrivialKill(Src2Val);
2582 
2583  if (NeedExtraOp) {
2584  Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2585  Src1IsKill = true;
2586  }
2587  unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2588  Src1IsKill, Src2Reg, Src2IsKill);
2589  updateValueMap(SI, ResultReg);
2590  return true;
2591 }
2592 
2593 bool AArch64FastISel::selectSelect(const Instruction *I) {
2594  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2595  MVT VT;
2596  if (!isTypeSupported(I->getType(), VT))
2597  return false;
2598 
2599  unsigned Opc;
2600  const TargetRegisterClass *RC;
2601  switch (VT.SimpleTy) {
2602  default:
2603  return false;
2604  case MVT::i1:
2605  case MVT::i8:
2606  case MVT::i16:
2607  case MVT::i32:
2608  Opc = AArch64::CSELWr;
2609  RC = &AArch64::GPR32RegClass;
2610  break;
2611  case MVT::i64:
2612  Opc = AArch64::CSELXr;
2613  RC = &AArch64::GPR64RegClass;
2614  break;
2615  case MVT::f32:
2616  Opc = AArch64::FCSELSrrr;
2617  RC = &AArch64::FPR32RegClass;
2618  break;
2619  case MVT::f64:
2620  Opc = AArch64::FCSELDrrr;
2621  RC = &AArch64::FPR64RegClass;
2622  break;
2623  }
2624 
2625  const SelectInst *SI = cast<SelectInst>(I);
2626  const Value *Cond = SI->getCondition();
2629 
2630  if (optimizeSelect(SI))
2631  return true;
2632 
2633  // Try to pickup the flags, so we don't have to emit another compare.
2634  if (foldXALUIntrinsic(CC, I, Cond)) {
2635  // Fake request the condition to force emission of the XALU intrinsic.
2636  unsigned CondReg = getRegForValue(Cond);
2637  if (!CondReg)
2638  return false;
2639  } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2640  isValueAvailable(Cond)) {
2641  const auto *Cmp = cast<CmpInst>(Cond);
2642  // Try to optimize or fold the cmp.
2643  CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2644  const Value *FoldSelect = nullptr;
2645  switch (Predicate) {
2646  default:
2647  break;
2648  case CmpInst::FCMP_FALSE:
2649  FoldSelect = SI->getFalseValue();
2650  break;
2651  case CmpInst::FCMP_TRUE:
2652  FoldSelect = SI->getTrueValue();
2653  break;
2654  }
2655 
2656  if (FoldSelect) {
2657  unsigned SrcReg = getRegForValue(FoldSelect);
2658  if (!SrcReg)
2659  return false;
2660  unsigned UseReg = lookUpRegForValue(SI);
2661  if (UseReg)
2662  MRI.clearKillFlags(UseReg);
2663 
2664  updateValueMap(I, SrcReg);
2665  return true;
2666  }
2667 
2668  // Emit the cmp.
2669  if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2670  return false;
2671 
2672  // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2673  CC = getCompareCC(Predicate);
2674  switch (Predicate) {
2675  default:
2676  break;
2677  case CmpInst::FCMP_UEQ:
2678  ExtraCC = AArch64CC::EQ;
2679  CC = AArch64CC::VS;
2680  break;
2681  case CmpInst::FCMP_ONE:
2682  ExtraCC = AArch64CC::MI;
2683  CC = AArch64CC::GT;
2684  break;
2685  }
2686  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2687  } else {
2688  unsigned CondReg = getRegForValue(Cond);
2689  if (!CondReg)
2690  return false;
2691  bool CondIsKill = hasTrivialKill(Cond);
2692 
2693  const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2694  CondReg = constrainOperandRegClass(II, CondReg, 1);
2695 
2696  // Emit a TST instruction (ANDS wzr, reg, #imm).
2697  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2698  AArch64::WZR)
2699  .addReg(CondReg, getKillRegState(CondIsKill))
2701  }
2702 
2703  unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2704  bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2705 
2706  unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2707  bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2708 
2709  if (!Src1Reg || !Src2Reg)
2710  return false;
2711 
2712  if (ExtraCC != AArch64CC::AL) {
2713  Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2714  Src2IsKill, ExtraCC);
2715  Src2IsKill = true;
2716  }
2717  unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2718  Src2IsKill, CC);
2719  updateValueMap(I, ResultReg);
2720  return true;
2721 }
2722 
2723 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2724  Value *V = I->getOperand(0);
2725  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2726  return false;
2727 
2728  unsigned Op = getRegForValue(V);
2729  if (Op == 0)
2730  return false;
2731 
2732  unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2733  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2734  ResultReg).addReg(Op);
2735  updateValueMap(I, ResultReg);
2736  return true;
2737 }
2738 
2739 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2740  Value *V = I->getOperand(0);
2741  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2742  return false;
2743 
2744  unsigned Op = getRegForValue(V);
2745  if (Op == 0)
2746  return false;
2747 
2748  unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2749  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2750  ResultReg).addReg(Op);
2751  updateValueMap(I, ResultReg);
2752  return true;
2753 }
2754 
2755 // FPToUI and FPToSI
2756 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2757  MVT DestVT;
2758  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2759  return false;
2760 
2761  unsigned SrcReg = getRegForValue(I->getOperand(0));
2762  if (SrcReg == 0)
2763  return false;
2764 
2765  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2766  if (SrcVT == MVT::f128)
2767  return false;
2768 
2769  unsigned Opc;
2770  if (SrcVT == MVT::f64) {
2771  if (Signed)
2772  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2773  else
2774  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2775  } else {
2776  if (Signed)
2777  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2778  else
2779  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2780  }
2781  unsigned ResultReg = createResultReg(
2782  DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2783  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2784  .addReg(SrcReg);
2785  updateValueMap(I, ResultReg);
2786  return true;
2787 }
2788 
2789 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2790  MVT DestVT;
2791  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2792  return false;
2793  assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2794  "Unexpected value type.");
2795 
2796  unsigned SrcReg = getRegForValue(I->getOperand(0));
2797  if (!SrcReg)
2798  return false;
2799  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2800 
2801  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2802 
2803  // Handle sign-extension.
2804  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2805  SrcReg =
2806  emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2807  if (!SrcReg)
2808  return false;
2809  SrcIsKill = true;
2810  }
2811 
2812  unsigned Opc;
2813  if (SrcVT == MVT::i64) {
2814  if (Signed)
2815  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2816  else
2817  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2818  } else {
2819  if (Signed)
2820  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2821  else
2822  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2823  }
2824 
2825  unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2826  SrcIsKill);
2827  updateValueMap(I, ResultReg);
2828  return true;
2829 }
2830 
2831 bool AArch64FastISel::fastLowerArguments() {
2832  if (!FuncInfo.CanLowerReturn)
2833  return false;
2834 
2835  const Function *F = FuncInfo.Fn;
2836  if (F->isVarArg())
2837  return false;
2838 
2839  CallingConv::ID CC = F->getCallingConv();
2840  if (CC != CallingConv::C)
2841  return false;
2842 
2843  // Only handle simple cases of up to 8 GPR and FPR each.
2844  unsigned GPRCnt = 0;
2845  unsigned FPRCnt = 0;
2846  unsigned Idx = 0;
2847  for (auto const &Arg : F->args()) {
2848  // The first argument is at index 1.
2849  ++Idx;
2850  if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
2854  return false;
2855 
2856  Type *ArgTy = Arg.getType();
2857  if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2858  return false;
2859 
2860  EVT ArgVT = TLI.getValueType(DL, ArgTy);
2861  if (!ArgVT.isSimple())
2862  return false;
2863 
2864  MVT VT = ArgVT.getSimpleVT().SimpleTy;
2865  if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2866  return false;
2867 
2868  if (VT.isVector() &&
2869  (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2870  return false;
2871 
2872  if (VT >= MVT::i1 && VT <= MVT::i64)
2873  ++GPRCnt;
2874  else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2875  VT.is128BitVector())
2876  ++FPRCnt;
2877  else
2878  return false;
2879 
2880  if (GPRCnt > 8 || FPRCnt > 8)
2881  return false;
2882  }
2883 
2884  static const MCPhysReg Registers[6][8] = {
2885  { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2886  AArch64::W5, AArch64::W6, AArch64::W7 },
2887  { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2888  AArch64::X5, AArch64::X6, AArch64::X7 },
2889  { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2890  AArch64::H5, AArch64::H6, AArch64::H7 },
2891  { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2892  AArch64::S5, AArch64::S6, AArch64::S7 },
2893  { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2894  AArch64::D5, AArch64::D6, AArch64::D7 },
2895  { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2896  AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2897  };
2898 
2899  unsigned GPRIdx = 0;
2900  unsigned FPRIdx = 0;
2901  for (auto const &Arg : F->args()) {
2902  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2903  unsigned SrcReg;
2904  const TargetRegisterClass *RC;
2905  if (VT >= MVT::i1 && VT <= MVT::i32) {
2906  SrcReg = Registers[0][GPRIdx++];
2907  RC = &AArch64::GPR32RegClass;
2908  VT = MVT::i32;
2909  } else if (VT == MVT::i64) {
2910  SrcReg = Registers[1][GPRIdx++];
2911  RC = &AArch64::GPR64RegClass;
2912  } else if (VT == MVT::f16) {
2913  SrcReg = Registers[2][FPRIdx++];
2914  RC = &AArch64::FPR16RegClass;
2915  } else if (VT == MVT::f32) {
2916  SrcReg = Registers[3][FPRIdx++];
2917  RC = &AArch64::FPR32RegClass;
2918  } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2919  SrcReg = Registers[4][FPRIdx++];
2920  RC = &AArch64::FPR64RegClass;
2921  } else if (VT.is128BitVector()) {
2922  SrcReg = Registers[5][FPRIdx++];
2923  RC = &AArch64::FPR128RegClass;
2924  } else
2925  llvm_unreachable("Unexpected value type.");
2926 
2927  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2928  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2929  // Without this, EmitLiveInCopies may eliminate the livein if its only
2930  // use is a bitcast (which isn't turned into an instruction).
2931  unsigned ResultReg = createResultReg(RC);
2932  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2933  TII.get(TargetOpcode::COPY), ResultReg)
2934  .addReg(DstReg, getKillRegState(true));
2935  updateValueMap(&Arg, ResultReg);
2936  }
2937  return true;
2938 }
2939 
2940 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2941  SmallVectorImpl<MVT> &OutVTs,
2942  unsigned &NumBytes) {
2943  CallingConv::ID CC = CLI.CallConv;
2945  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2946  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2947 
2948  // Get a count of how many bytes are to be pushed on the stack.
2949  NumBytes = CCInfo.getNextStackOffset();
2950 
2951  // Issue CALLSEQ_START
2952  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2953  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
2954  .addImm(NumBytes);
2955 
2956  // Process the args.
2957  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2958  CCValAssign &VA = ArgLocs[i];
2959  const Value *ArgVal = CLI.OutVals[VA.getValNo()];
2960  MVT ArgVT = OutVTs[VA.getValNo()];
2961 
2962  unsigned ArgReg = getRegForValue(ArgVal);
2963  if (!ArgReg)
2964  return false;
2965 
2966  // Handle arg promotion: SExt, ZExt, AExt.
2967  switch (VA.getLocInfo()) {
2968  case CCValAssign::Full:
2969  break;
2970  case CCValAssign::SExt: {
2971  MVT DestVT = VA.getLocVT();
2972  MVT SrcVT = ArgVT;
2973  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
2974  if (!ArgReg)
2975  return false;
2976  break;
2977  }
2978  case CCValAssign::AExt:
2979  // Intentional fall-through.
2980  case CCValAssign::ZExt: {
2981  MVT DestVT = VA.getLocVT();
2982  MVT SrcVT = ArgVT;
2983  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
2984  if (!ArgReg)
2985  return false;
2986  break;
2987  }
2988  default:
2989  llvm_unreachable("Unknown arg promotion!");
2990  }
2991 
2992  // Now copy/store arg to correct locations.
2993  if (VA.isRegLoc() && !VA.needsCustom()) {
2994  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2995  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
2996  CLI.OutRegs.push_back(VA.getLocReg());
2997  } else if (VA.needsCustom()) {
2998  // FIXME: Handle custom args.
2999  return false;
3000  } else {
3001  assert(VA.isMemLoc() && "Assuming store on stack.");
3002 
3003  // Don't emit stores for undef values.
3004  if (isa<UndefValue>(ArgVal))
3005  continue;
3006 
3007  // Need to store on the stack.
3008  unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3009 
3010  unsigned BEAlign = 0;
3011  if (ArgSize < 8 && !Subtarget->isLittleEndian())
3012  BEAlign = 8 - ArgSize;
3013 
3014  Address Addr;
3015  Addr.setKind(Address::RegBase);
3016  Addr.setReg(AArch64::SP);
3017  Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3018 
3019  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3020  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3021  MachinePointerInfo::getStack(Addr.getOffset()),
3022  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3023 
3024  if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3025  return false;
3026  }
3027  }
3028  return true;
3029 }
3030 
3031 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3032  unsigned NumBytes) {
3033  CallingConv::ID CC = CLI.CallConv;
3034 
3035  // Issue CALLSEQ_END
3036  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3037  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3038  .addImm(NumBytes).addImm(0);
3039 
3040  // Now the return value.
3041  if (RetVT != MVT::isVoid) {
3043  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3044  CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3045 
3046  // Only handle a single return value.
3047  if (RVLocs.size() != 1)
3048  return false;
3049 
3050  // Copy all of the result registers out of their specified physreg.
3051  MVT CopyVT = RVLocs[0].getValVT();
3052 
3053  // TODO: Handle big-endian results
3054  if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3055  return false;
3056 
3057  unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3058  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3059  TII.get(TargetOpcode::COPY), ResultReg)
3060  .addReg(RVLocs[0].getLocReg());
3061  CLI.InRegs.push_back(RVLocs[0].getLocReg());
3062 
3063  CLI.ResultReg = ResultReg;
3064  CLI.NumResultRegs = 1;
3065  }
3066 
3067  return true;
3068 }
3069 
3070 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3071  CallingConv::ID CC = CLI.CallConv;
3072  bool IsTailCall = CLI.IsTailCall;
3073  bool IsVarArg = CLI.IsVarArg;
3074  const Value *Callee = CLI.Callee;
3075  MCSymbol *Symbol = CLI.Symbol;
3076 
3077  if (!Callee && !Symbol)
3078  return false;
3079 
3080  // Allow SelectionDAG isel to handle tail calls.
3081  if (IsTailCall)
3082  return false;
3083 
3084  CodeModel::Model CM = TM.getCodeModel();
3085  // Only support the small and large code model.
3086  if (CM != CodeModel::Small && CM != CodeModel::Large)
3087  return false;
3088 
3089  // FIXME: Add large code model support for ELF.
3090  if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3091  return false;
3092 
3093  // Let SDISel handle vararg functions.
3094  if (IsVarArg)
3095  return false;
3096 
3097  // FIXME: Only handle *simple* calls for now.
3098  MVT RetVT;
3099  if (CLI.RetTy->isVoidTy())
3100  RetVT = MVT::isVoid;
3101  else if (!isTypeLegal(CLI.RetTy, RetVT))
3102  return false;
3103 
3104  for (auto Flag : CLI.OutFlags)
3105  if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
3106  return false;
3107 
3108  // Set up the argument vectors.
3109  SmallVector<MVT, 16> OutVTs;
3110  OutVTs.reserve(CLI.OutVals.size());
3111 
3112  for (auto *Val : CLI.OutVals) {
3113  MVT VT;
3114  if (!isTypeLegal(Val->getType(), VT) &&
3115  !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3116  return false;
3117 
3118  // We don't handle vector parameters yet.
3119  if (VT.isVector() || VT.getSizeInBits() > 64)
3120  return false;
3121 
3122  OutVTs.push_back(VT);
3123  }
3124 
3125  Address Addr;
3126  if (Callee && !computeCallAddress(Callee, Addr))
3127  return false;
3128 
3129  // Handle the arguments now that we've gotten them.
3130  unsigned NumBytes;
3131  if (!processCallArgs(CLI, OutVTs, NumBytes))
3132  return false;
3133 
3134  // Issue the call.
3135  MachineInstrBuilder MIB;
3136  if (CM == CodeModel::Small) {
3137  const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3138  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3139  if (Symbol)
3140  MIB.addSym(Symbol, 0);
3141  else if (Addr.getGlobalValue())
3142  MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3143  else if (Addr.getReg()) {
3144  unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3145  MIB.addReg(Reg);
3146  } else
3147  return false;
3148  } else {
3149  unsigned CallReg = 0;
3150  if (Symbol) {
3151  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3152  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3153  ADRPReg)
3154  .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3155 
3156  CallReg = createResultReg(&AArch64::GPR64RegClass);
3157  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3158  TII.get(AArch64::LDRXui), CallReg)
3159  .addReg(ADRPReg)
3160  .addSym(Symbol,
3161  AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3162  } else if (Addr.getGlobalValue())
3163  CallReg = materializeGV(Addr.getGlobalValue());
3164  else if (Addr.getReg())
3165  CallReg = Addr.getReg();
3166 
3167  if (!CallReg)
3168  return false;
3169 
3170  const MCInstrDesc &II = TII.get(AArch64::BLR);
3171  CallReg = constrainOperandRegClass(II, CallReg, 0);
3172  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3173  }
3174 
3175  // Add implicit physical register uses to the call.
3176  for (auto Reg : CLI.OutRegs)
3177  MIB.addReg(Reg, RegState::Implicit);
3178 
3179  // Add a register mask with the call-preserved registers.
3180  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3181  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3182 
3183  CLI.Call = MIB;
3184 
3185  // Finish off the call including any return values.
3186  return finishCall(CLI, RetVT, NumBytes);
3187 }
3188 
3189 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3190  if (Alignment)
3191  return Len / Alignment <= 4;
3192  else
3193  return Len < 32;
3194 }
3195 
3196 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3197  uint64_t Len, unsigned Alignment) {
3198  // Make sure we don't bloat code by inlining very large memcpy's.
3199  if (!isMemCpySmall(Len, Alignment))
3200  return false;
3201 
3202  int64_t UnscaledOffset = 0;
3203  Address OrigDest = Dest;
3204  Address OrigSrc = Src;
3205 
3206  while (Len) {
3207  MVT VT;
3208  if (!Alignment || Alignment >= 8) {
3209  if (Len >= 8)
3210  VT = MVT::i64;
3211  else if (Len >= 4)
3212  VT = MVT::i32;
3213  else if (Len >= 2)
3214  VT = MVT::i16;
3215  else {
3216  VT = MVT::i8;
3217  }
3218  } else {
3219  // Bound based on alignment.
3220  if (Len >= 4 && Alignment == 4)
3221  VT = MVT::i32;
3222  else if (Len >= 2 && Alignment == 2)
3223  VT = MVT::i16;
3224  else {
3225  VT = MVT::i8;
3226  }
3227  }
3228 
3229  unsigned ResultReg = emitLoad(VT, VT, Src);
3230  if (!ResultReg)
3231  return false;
3232 
3233  if (!emitStore(VT, ResultReg, Dest))
3234  return false;
3235 
3236  int64_t Size = VT.getSizeInBits() / 8;
3237  Len -= Size;
3238  UnscaledOffset += Size;
3239 
3240  // We need to recompute the unscaled offset for each iteration.
3241  Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3242  Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3243  }
3244 
3245  return true;
3246 }
3247 
3248 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
3249 /// into the user. The condition code will only be updated on success.
3250 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3251  const Instruction *I,
3252  const Value *Cond) {
3253  if (!isa<ExtractValueInst>(Cond))
3254  return false;
3255 
3256  const auto *EV = cast<ExtractValueInst>(Cond);
3257  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3258  return false;
3259 
3260  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3261  MVT RetVT;
3262  const Function *Callee = II->getCalledFunction();
3263  Type *RetTy =
3264  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3265  if (!isTypeLegal(RetTy, RetVT))
3266  return false;
3267 
3268  if (RetVT != MVT::i32 && RetVT != MVT::i64)
3269  return false;
3270 
3271  const Value *LHS = II->getArgOperand(0);
3272  const Value *RHS = II->getArgOperand(1);
3273 
3274  // Canonicalize immediate to the RHS.
3275  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3276  isCommutativeIntrinsic(II))
3277  std::swap(LHS, RHS);
3278 
3279  // Simplify multiplies.
3280  Intrinsic::ID IID = II->getIntrinsicID();
3281  switch (IID) {
3282  default:
3283  break;
3284  case Intrinsic::smul_with_overflow:
3285  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3286  if (C->getValue() == 2)
3287  IID = Intrinsic::sadd_with_overflow;
3288  break;
3289  case Intrinsic::umul_with_overflow:
3290  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3291  if (C->getValue() == 2)
3292  IID = Intrinsic::uadd_with_overflow;
3293  break;
3294  }
3295 
3296  AArch64CC::CondCode TmpCC;
3297  switch (IID) {
3298  default:
3299  return false;
3300  case Intrinsic::sadd_with_overflow:
3301  case Intrinsic::ssub_with_overflow:
3302  TmpCC = AArch64CC::VS;
3303  break;
3304  case Intrinsic::uadd_with_overflow:
3305  TmpCC = AArch64CC::HS;
3306  break;
3307  case Intrinsic::usub_with_overflow:
3308  TmpCC = AArch64CC::LO;
3309  break;
3310  case Intrinsic::smul_with_overflow:
3311  case Intrinsic::umul_with_overflow:
3312  TmpCC = AArch64CC::NE;
3313  break;
3314  }
3315 
3316  // Check if both instructions are in the same basic block.
3317  if (!isValueAvailable(II))
3318  return false;
3319 
3320  // Make sure nothing is in the way
3321  BasicBlock::const_iterator Start = I;
3322  BasicBlock::const_iterator End = II;
3323  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3324  // We only expect extractvalue instructions between the intrinsic and the
3325  // instruction to be selected.
3326  if (!isa<ExtractValueInst>(Itr))
3327  return false;
3328 
3329  // Check that the extractvalue operand comes from the intrinsic.
3330  const auto *EVI = cast<ExtractValueInst>(Itr);
3331  if (EVI->getAggregateOperand() != II)
3332  return false;
3333  }
3334 
3335  CC = TmpCC;
3336  return true;
3337 }
3338 
3339 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3340  // FIXME: Handle more intrinsics.
3341  switch (II->getIntrinsicID()) {
3342  default: return false;
3343  case Intrinsic::frameaddress: {
3344  MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
3345  MFI->setFrameAddressIsTaken(true);
3346 
3347  const AArch64RegisterInfo *RegInfo =
3348  static_cast<const AArch64RegisterInfo *>(Subtarget->getRegisterInfo());
3349  unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3350  unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3351  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3352  TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3353  // Recursively load frame address
3354  // ldr x0, [fp]
3355  // ldr x0, [x0]
3356  // ldr x0, [x0]
3357  // ...
3358  unsigned DestReg;
3359  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3360  while (Depth--) {
3361  DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3362  SrcReg, /*IsKill=*/true, 0);
3363  assert(DestReg && "Unexpected LDR instruction emission failure.");
3364  SrcReg = DestReg;
3365  }
3366 
3367  updateValueMap(II, SrcReg);
3368  return true;
3369  }
3370  case Intrinsic::memcpy:
3371  case Intrinsic::memmove: {
3372  const auto *MTI = cast<MemTransferInst>(II);
3373  // Don't handle volatile.
3374  if (MTI->isVolatile())
3375  return false;
3376 
3377  // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3378  // we would emit dead code because we don't currently handle memmoves.
3379  bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3380  if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3381  // Small memcpy's are common enough that we want to do them without a call
3382  // if possible.
3383  uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3384  unsigned Alignment = MTI->getAlignment();
3385  if (isMemCpySmall(Len, Alignment)) {
3386  Address Dest, Src;
3387  if (!computeAddress(MTI->getRawDest(), Dest) ||
3388  !computeAddress(MTI->getRawSource(), Src))
3389  return false;
3390  if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3391  return true;
3392  }
3393  }
3394 
3395  if (!MTI->getLength()->getType()->isIntegerTy(64))
3396  return false;
3397 
3398  if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3399  // Fast instruction selection doesn't support the special
3400  // address spaces.
3401  return false;
3402 
3403  const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3404  return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
3405  }
3406  case Intrinsic::memset: {
3407  const MemSetInst *MSI = cast<MemSetInst>(II);
3408  // Don't handle volatile.
3409  if (MSI->isVolatile())
3410  return false;
3411 
3412  if (!MSI->getLength()->getType()->isIntegerTy(64))
3413  return false;
3414 
3415  if (MSI->getDestAddressSpace() > 255)
3416  // Fast instruction selection doesn't support the special
3417  // address spaces.
3418  return false;
3419 
3420  return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
3421  }
3422  case Intrinsic::sin:
3423  case Intrinsic::cos:
3424  case Intrinsic::pow: {
3425  MVT RetVT;
3426  if (!isTypeLegal(II->getType(), RetVT))
3427  return false;
3428 
3429  if (RetVT != MVT::f32 && RetVT != MVT::f64)
3430  return false;
3431 
3432  static const RTLIB::Libcall LibCallTable[3][2] = {
3436  };
3437  RTLIB::Libcall LC;
3438  bool Is64Bit = RetVT == MVT::f64;
3439  switch (II->getIntrinsicID()) {
3440  default:
3441  llvm_unreachable("Unexpected intrinsic.");
3442  case Intrinsic::sin:
3443  LC = LibCallTable[0][Is64Bit];
3444  break;
3445  case Intrinsic::cos:
3446  LC = LibCallTable[1][Is64Bit];
3447  break;
3448  case Intrinsic::pow:
3449  LC = LibCallTable[2][Is64Bit];
3450  break;
3451  }
3452 
3453  ArgListTy Args;
3454  Args.reserve(II->getNumArgOperands());
3455 
3456  // Populate the argument list.
3457  for (auto &Arg : II->arg_operands()) {
3458  ArgListEntry Entry;
3459  Entry.Val = Arg;
3460  Entry.Ty = Arg->getType();
3461  Args.push_back(Entry);
3462  }
3463 
3464  CallLoweringInfo CLI;
3465  MCContext &Ctx = MF->getContext();
3466  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3467  TLI.getLibcallName(LC), std::move(Args));
3468  if (!lowerCallTo(CLI))
3469  return false;
3470  updateValueMap(II, CLI.ResultReg);
3471  return true;
3472  }
3473  case Intrinsic::fabs: {
3474  MVT VT;
3475  if (!isTypeLegal(II->getType(), VT))
3476  return false;
3477 
3478  unsigned Opc;
3479  switch (VT.SimpleTy) {
3480  default:
3481  return false;
3482  case MVT::f32:
3483  Opc = AArch64::FABSSr;
3484  break;
3485  case MVT::f64:
3486  Opc = AArch64::FABSDr;
3487  break;
3488  }
3489  unsigned SrcReg = getRegForValue(II->getOperand(0));
3490  if (!SrcReg)
3491  return false;
3492  bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3493  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3494  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3495  .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3496  updateValueMap(II, ResultReg);
3497  return true;
3498  }
3499  case Intrinsic::trap: {
3500  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3501  .addImm(1);
3502  return true;
3503  }
3504  case Intrinsic::sqrt: {
3505  Type *RetTy = II->getCalledFunction()->getReturnType();
3506 
3507  MVT VT;
3508  if (!isTypeLegal(RetTy, VT))
3509  return false;
3510 
3511  unsigned Op0Reg = getRegForValue(II->getOperand(0));
3512  if (!Op0Reg)
3513  return false;
3514  bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3515 
3516  unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3517  if (!ResultReg)
3518  return false;
3519 
3520  updateValueMap(II, ResultReg);
3521  return true;
3522  }
3523  case Intrinsic::sadd_with_overflow:
3524  case Intrinsic::uadd_with_overflow:
3525  case Intrinsic::ssub_with_overflow:
3526  case Intrinsic::usub_with_overflow:
3527  case Intrinsic::smul_with_overflow:
3528  case Intrinsic::umul_with_overflow: {
3529  // This implements the basic lowering of the xalu with overflow intrinsics.
3530  const Function *Callee = II->getCalledFunction();
3531  auto *Ty = cast<StructType>(Callee->getReturnType());
3532  Type *RetTy = Ty->getTypeAtIndex(0U);
3533 
3534  MVT VT;
3535  if (!isTypeLegal(RetTy, VT))
3536  return false;
3537 
3538  if (VT != MVT::i32 && VT != MVT::i64)
3539  return false;
3540 
3541  const Value *LHS = II->getArgOperand(0);
3542  const Value *RHS = II->getArgOperand(1);
3543  // Canonicalize immediate to the RHS.
3544  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3545  isCommutativeIntrinsic(II))
3546  std::swap(LHS, RHS);
3547 
3548  // Simplify multiplies.
3549  Intrinsic::ID IID = II->getIntrinsicID();
3550  switch (IID) {
3551  default:
3552  break;
3553  case Intrinsic::smul_with_overflow:
3554  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3555  if (C->getValue() == 2) {
3556  IID = Intrinsic::sadd_with_overflow;
3557  RHS = LHS;
3558  }
3559  break;
3560  case Intrinsic::umul_with_overflow:
3561  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3562  if (C->getValue() == 2) {
3563  IID = Intrinsic::uadd_with_overflow;
3564  RHS = LHS;
3565  }
3566  break;
3567  }
3568 
3569  unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3571  switch (IID) {
3572  default: llvm_unreachable("Unexpected intrinsic!");
3573  case Intrinsic::sadd_with_overflow:
3574  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3575  CC = AArch64CC::VS;
3576  break;
3577  case Intrinsic::uadd_with_overflow:
3578  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3579  CC = AArch64CC::HS;
3580  break;
3581  case Intrinsic::ssub_with_overflow:
3582  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3583  CC = AArch64CC::VS;
3584  break;
3585  case Intrinsic::usub_with_overflow:
3586  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3587  CC = AArch64CC::LO;
3588  break;
3589  case Intrinsic::smul_with_overflow: {
3590  CC = AArch64CC::NE;
3591  unsigned LHSReg = getRegForValue(LHS);
3592  if (!LHSReg)
3593  return false;
3594  bool LHSIsKill = hasTrivialKill(LHS);
3595 
3596  unsigned RHSReg = getRegForValue(RHS);
3597  if (!RHSReg)
3598  return false;
3599  bool RHSIsKill = hasTrivialKill(RHS);
3600 
3601  if (VT == MVT::i32) {
3602  MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3603  unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3604  /*IsKill=*/false, 32);
3605  MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3606  AArch64::sub_32);
3607  ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3608  AArch64::sub_32);
3609  emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3610  AArch64_AM::ASR, 31, /*WantResult=*/false);
3611  } else {
3612  assert(VT == MVT::i64 && "Unexpected value type.");
3613  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3614  // reused in the next instruction.
3615  MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3616  /*IsKill=*/false);
3617  unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3618  RHSReg, RHSIsKill);
3619  emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3620  AArch64_AM::ASR, 63, /*WantResult=*/false);
3621  }
3622  break;
3623  }
3624  case Intrinsic::umul_with_overflow: {
3625  CC = AArch64CC::NE;
3626  unsigned LHSReg = getRegForValue(LHS);
3627  if (!LHSReg)
3628  return false;
3629  bool LHSIsKill = hasTrivialKill(LHS);
3630 
3631  unsigned RHSReg = getRegForValue(RHS);
3632  if (!RHSReg)
3633  return false;
3634  bool RHSIsKill = hasTrivialKill(RHS);
3635 
3636  if (VT == MVT::i32) {
3637  MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3638  emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3639  /*IsKill=*/false, AArch64_AM::LSR, 32,
3640  /*WantResult=*/false);
3641  MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3642  AArch64::sub_32);
3643  } else {
3644  assert(VT == MVT::i64 && "Unexpected value type.");
3645  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3646  // reused in the next instruction.
3647  MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3648  /*IsKill=*/false);
3649  unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3650  RHSReg, RHSIsKill);
3651  emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3652  /*IsKill=*/false, /*WantResult=*/false);
3653  }
3654  break;
3655  }
3656  }
3657 
3658  if (MulReg) {
3659  ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3660  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3661  TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3662  }
3663 
3664  ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3665  AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3666  /*IsKill=*/true, getInvertedCondCode(CC));
3667  (void)ResultReg2;
3668  assert((ResultReg1 + 1) == ResultReg2 &&
3669  "Nonconsecutive result registers.");
3670  updateValueMap(II, ResultReg1, 2);
3671  return true;
3672  }
3673  }
3674  return false;
3675 }
3676 
3677 bool AArch64FastISel::selectRet(const Instruction *I) {
3678  const ReturnInst *Ret = cast<ReturnInst>(I);
3679  const Function &F = *I->getParent()->getParent();
3680 
3681  if (!FuncInfo.CanLowerReturn)
3682  return false;
3683 
3684  if (F.isVarArg())
3685  return false;
3686 
3687  // Build a list of return value registers.
3688  SmallVector<unsigned, 4> RetRegs;
3689 
3690  if (Ret->getNumOperands() > 0) {
3691  CallingConv::ID CC = F.getCallingConv();
3693  GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3694 
3695  // Analyze operands of the call, assigning locations to each operand.
3697  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3698  CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3699  : RetCC_AArch64_AAPCS;
3700  CCInfo.AnalyzeReturn(Outs, RetCC);
3701 
3702  // Only handle a single return value for now.
3703  if (ValLocs.size() != 1)
3704  return false;
3705 
3706  CCValAssign &VA = ValLocs[0];
3707  const Value *RV = Ret->getOperand(0);
3708 
3709  // Don't bother handling odd stuff for now.
3710  if ((VA.getLocInfo() != CCValAssign::Full) &&
3711  (VA.getLocInfo() != CCValAssign::BCvt))
3712  return false;
3713 
3714  // Only handle register returns for now.
3715  if (!VA.isRegLoc())
3716  return false;
3717 
3718  unsigned Reg = getRegForValue(RV);
3719  if (Reg == 0)
3720  return false;
3721 
3722  unsigned SrcReg = Reg + VA.getValNo();
3723  unsigned DestReg = VA.getLocReg();
3724  // Avoid a cross-class copy. This is very unlikely.
3725  if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3726  return false;
3727 
3728  EVT RVEVT = TLI.getValueType(DL, RV->getType());
3729  if (!RVEVT.isSimple())
3730  return false;
3731 
3732  // Vectors (of > 1 lane) in big endian need tricky handling.
3733  if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3734  !Subtarget->isLittleEndian())
3735  return false;
3736 
3737  MVT RVVT = RVEVT.getSimpleVT();
3738  if (RVVT == MVT::f128)
3739  return false;
3740 
3741  MVT DestVT = VA.getValVT();
3742  // Special handling for extended integers.
3743  if (RVVT != DestVT) {
3744  if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3745  return false;
3746 
3747  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3748  return false;
3749 
3750  bool IsZExt = Outs[0].Flags.isZExt();
3751  SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3752  if (SrcReg == 0)
3753  return false;
3754  }
3755 
3756  // Make the copy.
3757  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3758  TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3759 
3760  // Add register to return instruction.
3761  RetRegs.push_back(VA.getLocReg());
3762  }
3763 
3764  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3765  TII.get(AArch64::RET_ReallyLR));
3766  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
3767  MIB.addReg(RetRegs[i], RegState::Implicit);
3768  return true;
3769 }
3770 
3771 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3772  Type *DestTy = I->getType();
3773  Value *Op = I->getOperand(0);
3774  Type *SrcTy = Op->getType();
3775 
3776  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3777  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3778  if (!SrcEVT.isSimple())
3779  return false;
3780  if (!DestEVT.isSimple())
3781  return false;
3782 
3783  MVT SrcVT = SrcEVT.getSimpleVT();
3784  MVT DestVT = DestEVT.getSimpleVT();
3785 
3786  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3787  SrcVT != MVT::i8)
3788  return false;
3789  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3790  DestVT != MVT::i1)
3791  return false;
3792 
3793  unsigned SrcReg = getRegForValue(Op);
3794  if (!SrcReg)
3795  return false;
3796  bool SrcIsKill = hasTrivialKill(Op);
3797 
3798  // If we're truncating from i64 to a smaller non-legal type then generate an
3799  // AND. Otherwise, we know the high bits are undefined and a truncate only
3800  // generate a COPY. We cannot mark the source register also as result
3801  // register, because this can incorrectly transfer the kill flag onto the
3802  // source register.
3803  unsigned ResultReg;
3804  if (SrcVT == MVT::i64) {
3805  uint64_t Mask = 0;
3806  switch (DestVT.SimpleTy) {
3807  default:
3808  // Trunc i64 to i32 is handled by the target-independent fast-isel.
3809  return false;
3810  case MVT::i1:
3811  Mask = 0x1;
3812  break;
3813  case MVT::i8:
3814  Mask = 0xff;
3815  break;
3816  case MVT::i16:
3817  Mask = 0xffff;
3818  break;
3819  }
3820  // Issue an extract_subreg to get the lower 32-bits.
3821  unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3822  AArch64::sub_32);
3823  // Create the AND instruction which performs the actual truncation.
3824  ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3825  assert(ResultReg && "Unexpected AND instruction emission failure.");
3826  } else {
3827  ResultReg = createResultReg(&AArch64::GPR32RegClass);
3828  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3829  TII.get(TargetOpcode::COPY), ResultReg)
3830  .addReg(SrcReg, getKillRegState(SrcIsKill));
3831  }
3832 
3833  updateValueMap(I, ResultReg);
3834  return true;
3835 }
3836 
3837 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3838  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3839  DestVT == MVT::i64) &&
3840  "Unexpected value type.");
3841  // Handle i8 and i16 as i32.
3842  if (DestVT == MVT::i8 || DestVT == MVT::i16)
3843  DestVT = MVT::i32;
3844 
3845  if (IsZExt) {
3846  unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3847  assert(ResultReg && "Unexpected AND instruction emission failure.");
3848  if (DestVT == MVT::i64) {
3849  // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
3850  // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
3851  unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3852  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3853  TII.get(AArch64::SUBREG_TO_REG), Reg64)
3854  .addImm(0)
3855  .addReg(ResultReg)
3856  .addImm(AArch64::sub_32);
3857  ResultReg = Reg64;
3858  }
3859  return ResultReg;
3860  } else {
3861  if (DestVT == MVT::i64) {
3862  // FIXME: We're SExt i1 to i64.
3863  return 0;
3864  }
3865  return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3866  /*TODO:IsKill=*/false, 0, 0);
3867  }
3868 }
3869 
3870 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3871  unsigned Op1, bool Op1IsKill) {
3872  unsigned Opc, ZReg;
3873  switch (RetVT.SimpleTy) {
3874  default: return 0;
3875  case MVT::i8:
3876  case MVT::i16:
3877  case MVT::i32:
3878  RetVT = MVT::i32;
3879  Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3880  case MVT::i64:
3881  Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3882  }
3883 
3884  const TargetRegisterClass *RC =
3885  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3886  return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3887  /*IsKill=*/ZReg, true);
3888 }
3889 
3890 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3891  unsigned Op1, bool Op1IsKill) {
3892  if (RetVT != MVT::i64)
3893  return 0;
3894 
3895  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3896  Op0, Op0IsKill, Op1, Op1IsKill,
3897  AArch64::XZR, /*IsKill=*/true);
3898 }
3899 
3900 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3901  unsigned Op1, bool Op1IsKill) {
3902  if (RetVT != MVT::i64)
3903  return 0;
3904 
3905  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3906  Op0, Op0IsKill, Op1, Op1IsKill,
3907  AArch64::XZR, /*IsKill=*/true);
3908 }
3909 
3910 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3911  unsigned Op1Reg, bool Op1IsKill) {
3912  unsigned Opc = 0;
3913  bool NeedTrunc = false;
3914  uint64_t Mask = 0;
3915  switch (RetVT.SimpleTy) {
3916  default: return 0;
3917  case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
3918  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
3919  case MVT::i32: Opc = AArch64::LSLVWr; break;
3920  case MVT::i64: Opc = AArch64::LSLVXr; break;
3921  }
3922 
3923  const TargetRegisterClass *RC =
3924  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3925  if (NeedTrunc) {
3926  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3927  Op1IsKill = true;
3928  }
3929  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3930  Op1IsKill);
3931  if (NeedTrunc)
3932  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3933  return ResultReg;
3934 }
3935 
3936 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3937  bool Op0IsKill, uint64_t Shift,
3938  bool IsZExt) {
3939  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3940  "Unexpected source/return type pair.");
3941  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
3942  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
3943  "Unexpected source value type.");
3944  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3945  RetVT == MVT::i64) && "Unexpected return value type.");
3946 
3947  bool Is64Bit = (RetVT == MVT::i64);
3948  unsigned RegSize = Is64Bit ? 64 : 32;
3949  unsigned DstBits = RetVT.getSizeInBits();
3950  unsigned SrcBits = SrcVT.getSizeInBits();
3951  const TargetRegisterClass *RC =
3952  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3953 
3954  // Just emit a copy for "zero" shifts.
3955  if (Shift == 0) {
3956  if (RetVT == SrcVT) {
3957  unsigned ResultReg = createResultReg(RC);
3958  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3959  TII.get(TargetOpcode::COPY), ResultReg)
3960  .addReg(Op0, getKillRegState(Op0IsKill));
3961  return ResultReg;
3962  } else
3963  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
3964  }
3965 
3966  // Don't deal with undefined shifts.
3967  if (Shift >= DstBits)
3968  return 0;
3969 
3970  // For immediate shifts we can fold the zero-/sign-extension into the shift.
3971  // {S|U}BFM Wd, Wn, #r, #s
3972  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
3973 
3974  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3975  // %2 = shl i16 %1, 4
3976  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
3977  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
3978  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
3979  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
3980 
3981  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3982  // %2 = shl i16 %1, 8
3983  // Wd<32+7-24,32-24> = Wn<7:0>
3984  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
3985  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
3986  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
3987 
3988  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3989  // %2 = shl i16 %1, 12
3990  // Wd<32+3-20,32-20> = Wn<3:0>
3991  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
3992  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
3993  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
3994 
3995  unsigned ImmR = RegSize - Shift;
3996  // Limit the width to the length of the source type.
3997  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
3998  static const unsigned OpcTable[2][2] = {
3999  {AArch64::SBFMWri, AArch64::SBFMXri},
4000  {AArch64::UBFMWri, AArch64::UBFMXri}
4001  };
4002  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4003  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4004  unsigned TmpReg = MRI.createVirtualRegister(RC);
4005  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4006  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4007  .addImm(0)
4008  .addReg(Op0, getKillRegState(Op0IsKill))
4009  .addImm(AArch64::sub_32);
4010  Op0 = TmpReg;
4011  Op0IsKill = true;
4012  }
4013  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4014 }
4015 
4016 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4017  unsigned Op1Reg, bool Op1IsKill) {
4018  unsigned Opc = 0;
4019  bool NeedTrunc = false;
4020  uint64_t Mask = 0;
4021  switch (RetVT.SimpleTy) {
4022  default: return 0;
4023  case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4024  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4025  case MVT::i32: Opc = AArch64::LSRVWr; break;
4026  case MVT::i64: Opc = AArch64::LSRVXr; break;
4027  }
4028 
4029  const TargetRegisterClass *RC =
4030  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4031  if (NeedTrunc) {
4032  Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4033  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4034  Op0IsKill = Op1IsKill = true;
4035  }
4036  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4037  Op1IsKill);
4038  if (NeedTrunc)
4039  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4040  return ResultReg;
4041 }
4042 
4043 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4044  bool Op0IsKill, uint64_t Shift,
4045  bool IsZExt) {
4046  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4047  "Unexpected source/return type pair.");
4048  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4049  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4050  "Unexpected source value type.");
4051  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4052  RetVT == MVT::i64) && "Unexpected return value type.");
4053 
4054  bool Is64Bit = (RetVT == MVT::i64);
4055  unsigned RegSize = Is64Bit ? 64 : 32;
4056  unsigned DstBits = RetVT.getSizeInBits();
4057  unsigned SrcBits = SrcVT.getSizeInBits();
4058  const TargetRegisterClass *RC =
4059  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4060 
4061  // Just emit a copy for "zero" shifts.
4062  if (Shift == 0) {
4063  if (RetVT == SrcVT) {
4064  unsigned ResultReg = createResultReg(RC);
4065  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4066  TII.get(TargetOpcode::COPY), ResultReg)
4067  .addReg(Op0, getKillRegState(Op0IsKill));
4068  return ResultReg;
4069  } else
4070  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4071  }
4072 
4073  // Don't deal with undefined shifts.
4074  if (Shift >= DstBits)
4075  return 0;
4076 
4077  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4078  // {S|U}BFM Wd, Wn, #r, #s
4079  // Wd<s-r:0> = Wn<s:r> when r <= s
4080 
4081  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4082  // %2 = lshr i16 %1, 4
4083  // Wd<7-4:0> = Wn<7:4>
4084  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4085  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4086  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4087 
4088  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4089  // %2 = lshr i16 %1, 8
4090  // Wd<7-7,0> = Wn<7:7>
4091  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4092  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4093  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4094 
4095  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4096  // %2 = lshr i16 %1, 12
4097  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4098  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4099  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4100  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4101 
4102  if (Shift >= SrcBits && IsZExt)
4103  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4104 
4105  // It is not possible to fold a sign-extend into the LShr instruction. In this
4106  // case emit a sign-extend.
4107  if (!IsZExt) {
4108  Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4109  if (!Op0)
4110  return 0;
4111  Op0IsKill = true;
4112  SrcVT = RetVT;
4113  SrcBits = SrcVT.getSizeInBits();
4114  IsZExt = true;
4115  }
4116 
4117  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4118  unsigned ImmS = SrcBits - 1;
4119  static const unsigned OpcTable[2][2] = {
4120  {AArch64::SBFMWri, AArch64::SBFMXri},
4121  {AArch64::UBFMWri, AArch64::UBFMXri}
4122  };
4123  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4124  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4125  unsigned TmpReg = MRI.createVirtualRegister(RC);
4126  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4127  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4128  .addImm(0)
4129  .addReg(Op0, getKillRegState(Op0IsKill))
4130  .addImm(AArch64::sub_32);
4131  Op0 = TmpReg;
4132  Op0IsKill = true;
4133  }
4134  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4135 }
4136 
4137 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4138  unsigned Op1Reg, bool Op1IsKill) {
4139  unsigned Opc = 0;
4140  bool NeedTrunc = false;
4141  uint64_t Mask = 0;
4142  switch (RetVT.SimpleTy) {
4143  default: return 0;
4144  case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4145  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4146  case MVT::i32: Opc = AArch64::ASRVWr; break;
4147  case MVT::i64: Opc = AArch64::ASRVXr; break;
4148  }
4149 
4150  const TargetRegisterClass *RC =
4151  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4152  if (NeedTrunc) {
4153  Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4154  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4155  Op0IsKill = Op1IsKill = true;
4156  }
4157  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4158  Op1IsKill);
4159  if (NeedTrunc)
4160  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4161  return ResultReg;
4162 }
4163 
4164 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4165  bool Op0IsKill, uint64_t Shift,
4166  bool IsZExt) {
4167  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4168  "Unexpected source/return type pair.");
4169  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4170  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4171  "Unexpected source value type.");
4172  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4173  RetVT == MVT::i64) && "Unexpected return value type.");
4174 
4175  bool Is64Bit = (RetVT == MVT::i64);
4176  unsigned RegSize = Is64Bit ? 64 : 32;
4177  unsigned DstBits = RetVT.getSizeInBits();
4178  unsigned SrcBits = SrcVT.getSizeInBits();
4179  const TargetRegisterClass *RC =
4180  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4181 
4182  // Just emit a copy for "zero" shifts.
4183  if (Shift == 0) {
4184  if (RetVT == SrcVT) {
4185  unsigned ResultReg = createResultReg(RC);
4186  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4187  TII.get(TargetOpcode::COPY), ResultReg)
4188  .addReg(Op0, getKillRegState(Op0IsKill));
4189  return ResultReg;
4190  } else
4191  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4192  }
4193 
4194  // Don't deal with undefined shifts.
4195  if (Shift >= DstBits)
4196  return 0;
4197 
4198  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4199  // {S|U}BFM Wd, Wn, #r, #s
4200  // Wd<s-r:0> = Wn<s:r> when r <= s
4201 
4202  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4203  // %2 = ashr i16 %1, 4
4204  // Wd<7-4:0> = Wn<7:4>
4205  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4206  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4207  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4208 
4209  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4210  // %2 = ashr i16 %1, 8
4211  // Wd<7-7,0> = Wn<7:7>
4212  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4213  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4214  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4215 
4216  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4217  // %2 = ashr i16 %1, 12
4218  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4219  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4220  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4221  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4222 
4223  if (Shift >= SrcBits && IsZExt)
4224  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4225 
4226  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4227  unsigned ImmS = SrcBits - 1;
4228  static const unsigned OpcTable[2][2] = {
4229  {AArch64::SBFMWri, AArch64::SBFMXri},
4230  {AArch64::UBFMWri, AArch64::UBFMXri}
4231  };
4232  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4233  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4234  unsigned TmpReg = MRI.createVirtualRegister(RC);
4235  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4236  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4237  .addImm(0)
4238  .addReg(Op0, getKillRegState(Op0IsKill))
4239  .addImm(AArch64::sub_32);
4240  Op0 = TmpReg;
4241  Op0IsKill = true;
4242  }
4243  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4244 }
4245 
4246 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4247  bool IsZExt) {
4248  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4249 
4250  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4251  // DestVT are odd things, so test to make sure that they are both types we can
4252  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4253  // bail out to SelectionDAG.
4254  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4255  (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4256  ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4257  (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4258  return 0;
4259 
4260  unsigned Opc;
4261  unsigned Imm = 0;
4262 
4263  switch (SrcVT.SimpleTy) {
4264  default:
4265  return 0;
4266  case MVT::i1:
4267  return emiti1Ext(SrcReg, DestVT, IsZExt);
4268  case MVT::i8:
4269  if (DestVT == MVT::i64)
4270  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4271  else
4272  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4273  Imm = 7;
4274  break;
4275  case MVT::i16:
4276  if (DestVT == MVT::i64)
4277  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4278  else
4279  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4280  Imm = 15;
4281  break;
4282  case MVT::i32:
4283  assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4284  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4285  Imm = 31;
4286  break;
4287  }
4288 
4289  // Handle i8 and i16 as i32.
4290  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4291  DestVT = MVT::i32;
4292  else if (DestVT == MVT::i64) {
4293  unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4294  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4295  TII.get(AArch64::SUBREG_TO_REG), Src64)
4296  .addImm(0)
4297  .addReg(SrcReg)
4298  .addImm(AArch64::sub_32);
4299  SrcReg = Src64;
4300  }
4301 
4302  const TargetRegisterClass *RC =
4303  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4304  return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4305 }
4306 
4307 static bool isZExtLoad(const MachineInstr *LI) {
4308  switch (LI->getOpcode()) {
4309  default:
4310  return false;
4311  case AArch64::LDURBBi:
4312  case AArch64::LDURHHi:
4313  case AArch64::LDURWi:
4314  case AArch64::LDRBBui:
4315  case AArch64::LDRHHui:
4316  case AArch64::LDRWui:
4317  case AArch64::LDRBBroX:
4318  case AArch64::LDRHHroX:
4319  case AArch64::LDRWroX:
4320  case AArch64::LDRBBroW:
4321  case AArch64::LDRHHroW:
4322  case AArch64::LDRWroW:
4323  return true;
4324  }
4325 }
4326 
4327 static bool isSExtLoad(const MachineInstr *LI) {
4328  switch (LI->getOpcode()) {
4329  default:
4330  return false;
4331  case AArch64::LDURSBWi:
4332  case AArch64::LDURSHWi:
4333  case AArch64::LDURSBXi:
4334  case AArch64::LDURSHXi:
4335  case AArch64::LDURSWi:
4336  case AArch64::LDRSBWui:
4337  case AArch64::LDRSHWui:
4338  case AArch64::LDRSBXui:
4339  case AArch64::LDRSHXui:
4340  case AArch64::LDRSWui:
4341  case AArch64::LDRSBWroX:
4342  case AArch64::LDRSHWroX:
4343  case AArch64::LDRSBXroX:
4344  case AArch64::LDRSHXroX:
4345  case AArch64::LDRSWroX:
4346  case AArch64::LDRSBWroW:
4347  case AArch64::LDRSHWroW:
4348  case AArch64::LDRSBXroW:
4349  case AArch64::LDRSHXroW:
4350  case AArch64::LDRSWroW:
4351  return true;
4352  }
4353 }
4354 
4355 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4356  MVT SrcVT) {
4357  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4358  if (!LI || !LI->hasOneUse())
4359  return false;
4360 
4361  // Check if the load instruction has already been selected.
4362  unsigned Reg = lookUpRegForValue(LI);
4363  if (!Reg)
4364  return false;
4365 
4366  MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4367  if (!MI)
4368  return false;
4369 
4370  // Check if the correct load instruction has been emitted - SelectionDAG might
4371  // have emitted a zero-extending load, but we need a sign-extending load.
4372  bool IsZExt = isa<ZExtInst>(I);
4373  const auto *LoadMI = MI;
4374  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4375  LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4376  unsigned LoadReg = MI->getOperand(1).getReg();
4377  LoadMI = MRI.getUniqueVRegDef(LoadReg);
4378  assert(LoadMI && "Expected valid instruction");
4379  }
4380  if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4381  return false;
4382 
4383  // Nothing to be done.
4384  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4385  updateValueMap(I, Reg);
4386  return true;
4387  }
4388 
4389  if (IsZExt) {
4390  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4391  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4392  TII.get(AArch64::SUBREG_TO_REG), Reg64)
4393  .addImm(0)
4394  .addReg(Reg, getKillRegState(true))
4395  .addImm(AArch64::sub_32);
4396  Reg = Reg64;
4397  } else {
4398  assert((MI->getOpcode() == TargetOpcode::COPY &&
4399  MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4400  "Expected copy instruction");
4401  Reg = MI->getOperand(1).getReg();
4402  MI->eraseFromParent();
4403  }
4404  updateValueMap(I, Reg);
4405  return true;
4406 }
4407 
4408 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4409  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4410  "Unexpected integer extend instruction.");
4411  MVT RetVT;
4412  MVT SrcVT;
4413  if (!isTypeSupported(I->getType(), RetVT))
4414  return false;
4415 
4416  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4417  return false;
4418 
4419  // Try to optimize already sign-/zero-extended values from load instructions.
4420  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4421  return true;
4422 
4423  unsigned SrcReg = getRegForValue(I->getOperand(0));
4424  if (!SrcReg)
4425  return false;
4426  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4427 
4428  // Try to optimize already sign-/zero-extended values from function arguments.
4429  bool IsZExt = isa<ZExtInst>(I);
4430  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4431  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4432  if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4433  unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4434  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4435  TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4436  .addImm(0)
4437  .addReg(SrcReg, getKillRegState(SrcIsKill))
4438  .addImm(AArch64::sub_32);
4439  SrcReg = ResultReg;
4440  }
4441  // Conservatively clear all kill flags from all uses, because we are
4442  // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4443  // level. The result of the instruction at IR level might have been
4444  // trivially dead, which is now not longer true.
4445  unsigned UseReg = lookUpRegForValue(I);
4446  if (UseReg)
4447  MRI.clearKillFlags(UseReg);
4448 
4449  updateValueMap(I, SrcReg);
4450  return true;
4451  }
4452  }
4453 
4454  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4455  if (!ResultReg)
4456  return false;
4457 
4458  updateValueMap(I, ResultReg);
4459  return true;
4460 }
4461 
4462 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4463  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4464  if (!DestEVT.isSimple())
4465  return false;
4466 
4467  MVT DestVT = DestEVT.getSimpleVT();
4468  if (DestVT != MVT::i64 && DestVT != MVT::i32)
4469  return false;
4470 
4471  unsigned DivOpc;
4472  bool Is64bit = (DestVT == MVT::i64);
4473  switch (ISDOpcode) {
4474  default:
4475  return false;
4476  case ISD::SREM:
4477  DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4478  break;
4479  case ISD::UREM:
4480  DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4481  break;
4482  }
4483  unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4484  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4485  if (!Src0Reg)
4486  return false;
4487  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4488 
4489  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4490  if (!Src1Reg)
4491  return false;
4492  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4493 
4494  const TargetRegisterClass *RC =
4495  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4496  unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4497  Src1Reg, /*IsKill=*/false);
4498  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4499  // The remainder is computed as numerator - (quotient * denominator) using the
4500  // MSUB instruction.
4501  unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4502  Src1Reg, Src1IsKill, Src0Reg,
4503  Src0IsKill);
4504  updateValueMap(I, ResultReg);
4505  return true;
4506 }
4507 
4508 bool AArch64FastISel::selectMul(const Instruction *I) {
4509  MVT VT;
4510  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4511  return false;
4512 
4513  if (VT.isVector())
4514  return selectBinaryOp(I, ISD::MUL);
4515 
4516  const Value *Src0 = I->getOperand(0);
4517  const Value *Src1 = I->getOperand(1);
4518  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4519  if (C->getValue().isPowerOf2())
4520  std::swap(Src0, Src1);
4521 
4522  // Try to simplify to a shift instruction.
4523  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4524  if (C->getValue().isPowerOf2()) {
4525  uint64_t ShiftVal = C->getValue().logBase2();
4526  MVT SrcVT = VT;
4527  bool IsZExt = true;
4528  if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4529  if (!isIntExtFree(ZExt)) {
4530  MVT VT;
4531  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4532  SrcVT = VT;
4533  IsZExt = true;
4534  Src0 = ZExt->getOperand(0);
4535  }
4536  }
4537  } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4538  if (!isIntExtFree(SExt)) {
4539  MVT VT;
4540  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4541  SrcVT = VT;
4542  IsZExt = false;
4543  Src0 = SExt->getOperand(0);
4544  }
4545  }
4546  }
4547 
4548  unsigned Src0Reg = getRegForValue(Src0);
4549  if (!Src0Reg)
4550  return false;
4551  bool Src0IsKill = hasTrivialKill(Src0);
4552 
4553  unsigned ResultReg =
4554  emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4555 
4556  if (ResultReg) {
4557  updateValueMap(I, ResultReg);
4558  return true;
4559  }
4560  }
4561 
4562  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4563  if (!Src0Reg)
4564  return false;
4565  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4566 
4567  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4568  if (!Src1Reg)
4569  return false;
4570  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4571 
4572  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4573 
4574  if (!ResultReg)
4575  return false;
4576 
4577  updateValueMap(I, ResultReg);
4578  return true;
4579 }
4580 
4581 bool AArch64FastISel::selectShift(const Instruction *I) {
4582  MVT RetVT;
4583  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4584  return false;
4585 
4586  if (RetVT.isVector())
4587  return selectOperator(I, I->getOpcode());
4588 
4589  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4590  unsigned ResultReg = 0;
4591  uint64_t ShiftVal = C->getZExtValue();
4592  MVT SrcVT = RetVT;
4593  bool IsZExt = I->getOpcode() != Instruction::AShr;
4594  const Value *Op0 = I->getOperand(0);
4595  if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4596  if (!isIntExtFree(ZExt)) {
4597  MVT TmpVT;
4598  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4599  SrcVT = TmpVT;
4600  IsZExt = true;
4601  Op0 = ZExt->getOperand(0);
4602  }
4603  }
4604  } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4605  if (!isIntExtFree(SExt)) {
4606  MVT TmpVT;
4607  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4608  SrcVT = TmpVT;
4609  IsZExt = false;
4610  Op0 = SExt->getOperand(0);
4611  }
4612  }
4613  }
4614 
4615  unsigned Op0Reg = getRegForValue(Op0);
4616  if (!Op0Reg)
4617  return false;
4618  bool Op0IsKill = hasTrivialKill(Op0);
4619 
4620  switch (I->getOpcode()) {
4621  default: llvm_unreachable("Unexpected instruction.");
4622  case Instruction::Shl:
4623  ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4624  break;
4625  case Instruction::AShr:
4626  ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4627  break;
4628  case Instruction::LShr:
4629  ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4630  break;
4631  }
4632  if (!ResultReg)
4633  return false;
4634 
4635  updateValueMap(I, ResultReg);
4636  return true;
4637  }
4638 
4639  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4640  if (!Op0Reg)
4641  return false;
4642  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4643 
4644  unsigned Op1Reg = getRegForValue(I->getOperand(1));
4645  if (!Op1Reg)
4646  return false;
4647  bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4648 
4649  unsigned ResultReg = 0;
4650  switch (I->getOpcode()) {
4651  default: llvm_unreachable("Unexpected instruction.");
4652  case Instruction::Shl:
4653  ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4654  break;
4655  case Instruction::AShr:
4656  ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4657  break;
4658  case Instruction::LShr:
4659  ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4660  break;
4661  }
4662 
4663  if (!ResultReg)
4664  return false;
4665 
4666  updateValueMap(I, ResultReg);
4667  return true;
4668 }
4669 
4670 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4671  MVT RetVT, SrcVT;
4672 
4673  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4674  return false;
4675  if (!isTypeLegal(I->getType(), RetVT))
4676  return false;
4677 
4678  unsigned Opc;
4679  if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4680  Opc = AArch64::FMOVWSr;
4681  else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4682  Opc = AArch64::FMOVXDr;
4683  else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4684  Opc = AArch64::FMOVSWr;
4685  else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4686  Opc = AArch64::FMOVDXr;
4687  else
4688  return false;
4689 
4690  const TargetRegisterClass *RC = nullptr;
4691  switch (RetVT.SimpleTy) {
4692  default: llvm_unreachable("Unexpected value type.");
4693  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4694  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4695  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4696  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4697  }
4698  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4699  if (!Op0Reg)
4700  return false;
4701  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4702  unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4703 
4704  if (!ResultReg)
4705  return false;
4706 
4707  updateValueMap(I, ResultReg);
4708  return true;
4709 }
4710 
4711 bool AArch64FastISel::selectFRem(const Instruction *I) {
4712  MVT RetVT;
4713  if (!isTypeLegal(I->getType(), RetVT))
4714  return false;
4715 
4716  RTLIB::Libcall LC;
4717  switch (RetVT.SimpleTy) {
4718  default:
4719  return false;
4720  case MVT::f32:
4721  LC = RTLIB::REM_F32;
4722  break;
4723  case MVT::f64:
4724  LC = RTLIB::REM_F64;
4725  break;
4726  }
4727 
4728  ArgListTy Args;
4729  Args.reserve(I->getNumOperands());
4730 
4731  // Populate the argument list.
4732  for (auto &Arg : I->operands()) {
4733  ArgListEntry Entry;
4734  Entry.Val = Arg;
4735  Entry.Ty = Arg->getType();
4736  Args.push_back(Entry);
4737  }
4738 
4739  CallLoweringInfo CLI;
4740  MCContext &Ctx = MF->getContext();
4741  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4742  TLI.getLibcallName(LC), std::move(Args));
4743  if (!lowerCallTo(CLI))
4744  return false;
4745  updateValueMap(I, CLI.ResultReg);
4746  return true;
4747 }
4748 
4749 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4750  MVT VT;
4751  if (!isTypeLegal(I->getType(), VT))
4752  return false;
4753 
4754  if (!isa<ConstantInt>(I->getOperand(1)))
4755  return selectBinaryOp(I, ISD::SDIV);
4756 
4757  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4758  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4759  !(C.isPowerOf2() || (-C).isPowerOf2()))
4760  return selectBinaryOp(I, ISD::SDIV);
4761 
4762  unsigned Lg2 = C.countTrailingZeros();
4763  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4764  if (!Src0Reg)
4765  return false;
4766  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4767 
4768  if (cast<BinaryOperator>(I)->isExact()) {
4769  unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4770  if (!ResultReg)
4771  return false;
4772  updateValueMap(I, ResultReg);
4773  return true;
4774  }
4775 
4776  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4777  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4778  if (!AddReg)
4779  return false;
4780 
4781  // (Src0 < 0) ? Pow2 - 1 : 0;
4782  if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4783  return false;
4784 
4785  unsigned SelectOpc;
4786  const TargetRegisterClass *RC;
4787  if (VT == MVT::i64) {
4788  SelectOpc = AArch64::CSELXr;
4789  RC = &AArch64::GPR64RegClass;
4790  } else {
4791  SelectOpc = AArch64::CSELWr;
4792  RC = &AArch64::GPR32RegClass;
4793  }
4794  unsigned SelectReg =
4795  fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4796  Src0IsKill, AArch64CC::LT);
4797  if (!SelectReg)
4798  return false;
4799 
4800  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4801  // negate the result.
4802  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4803  unsigned ResultReg;
4804  if (C.isNegative())
4805  ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4806  SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4807  else
4808  ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4809 
4810  if (!ResultReg)
4811  return false;
4812 
4813  updateValueMap(I, ResultReg);
4814  return true;
4815 }
4816 
4817 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4818 /// have to duplicate it for AArch64, because otherwise we would fail during the
4819 /// sign-extend emission.
4820 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4821  unsigned IdxN = getRegForValue(Idx);
4822  if (IdxN == 0)
4823  // Unhandled operand. Halt "fast" selection and bail.
4824  return std::pair<unsigned, bool>(0, false);
4825 
4826  bool IdxNIsKill = hasTrivialKill(Idx);
4827 
4828  // If the index is smaller or larger than intptr_t, truncate or extend it.
4829  MVT PtrVT = TLI.getPointerTy(DL);
4830  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4831  if (IdxVT.bitsLT(PtrVT)) {
4832  IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4833  IdxNIsKill = true;
4834  } else if (IdxVT.bitsGT(PtrVT))
4835  llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4836  return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4837 }
4838 
4839 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4840 /// duplicate it for AArch64, because otherwise we would bail out even for
4841 /// simple cases. This is because the standard fastEmit functions don't cover
4842 /// MUL at all and ADD is lowered very inefficientily.
4843 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4844  unsigned N = getRegForValue(I->getOperand(0));
4845  if (!N)
4846  return false;
4847  bool NIsKill = hasTrivialKill(I->getOperand(0));
4848 
4849  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4850  // into a single N = N + TotalOffset.
4851  uint64_t TotalOffs = 0;
4852  Type *Ty = I->getOperand(0)->getType();
4853  MVT VT = TLI.getPointerTy(DL);
4854  for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) {
4855  const Value *Idx = *OI;
4856  if (auto *StTy = dyn_cast<StructType>(Ty)) {
4857  unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4858  // N = N + Offset
4859  if (Field)
4860  TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4861  Ty = StTy->getElementType(Field);
4862  } else {
4863  Ty = cast<SequentialType>(Ty)->getElementType();
4864  // If this is a constant subscript, handle it quickly.
4865  if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4866  if (CI->isZero())
4867  continue;
4868  // N = N + Offset
4869  TotalOffs +=
4870  DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4871  continue;
4872  }
4873  if (TotalOffs) {
4874  N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4875  if (!N)
4876  return false;
4877  NIsKill = true;
4878  TotalOffs = 0;
4879  }
4880 
4881  // N = N + Idx * ElementSize;
4882  uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4883  std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
4884  unsigned IdxN = Pair.first;
4885  bool IdxNIsKill = Pair.second;
4886  if (!IdxN)
4887  return false;
4888 
4889  if (ElementSize != 1) {
4890  unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4891  if (!C)
4892  return false;
4893  IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
4894  if (!IdxN)
4895  return false;
4896  IdxNIsKill = true;
4897  }
4898  N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
4899  if (!N)
4900  return false;
4901  }
4902  }
4903  if (TotalOffs) {
4904  N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4905  if (!N)
4906  return false;
4907  }
4908  updateValueMap(I, N);
4909  return true;
4910 }
4911 
4912 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
4913  switch (I->getOpcode()) {
4914  default:
4915  break;
4916  case Instruction::Add:
4917  case Instruction::Sub:
4918  return selectAddSub(I);
4919  case Instruction::Mul:
4920  return selectMul(I);
4921  case Instruction::SDiv:
4922  return selectSDiv(I);
4923  case Instruction::SRem:
4924  if (!selectBinaryOp(I, ISD::SREM))
4925  return selectRem(I, ISD::SREM);
4926  return true;
4927  case Instruction::URem:
4928  if (!selectBinaryOp(I, ISD::UREM))
4929  return selectRem(I, ISD::UREM);
4930  return true;
4931  case Instruction::Shl:
4932  case Instruction::LShr:
4933  case Instruction::AShr:
4934  return selectShift(I);
4935  case Instruction::And:
4936  case Instruction::Or:
4937  case Instruction::Xor:
4938  return selectLogicalOp(I);
4939  case Instruction::Br:
4940  return selectBranch(I);
4941  case Instruction::IndirectBr:
4942  return selectIndirectBr(I);
4943  case Instruction::BitCast:
4944  if (!FastISel::selectBitCast(I))
4945  return selectBitCast(I);
4946  return true;
4947  case Instruction::FPToSI:
4948  if (!selectCast(I, ISD::FP_TO_SINT))
4949  return selectFPToInt(I, /*Signed=*/true);
4950  return true;
4951  case Instruction::FPToUI:
4952  return selectFPToInt(I, /*Signed=*/false);
4953  case Instruction::ZExt:
4954  case Instruction::SExt:
4955  return selectIntExt(I);
4956  case Instruction::Trunc:
4957  if (!selectCast(I, ISD::TRUNCATE))
4958  return selectTrunc(I);
4959  return true;
4960  case Instruction::FPExt:
4961  return selectFPExt(I);
4962  case Instruction::FPTrunc:
4963  return selectFPTrunc(I);
4964  case Instruction::SIToFP:
4965  if (!selectCast(I, ISD::SINT_TO_FP))
4966  return selectIntToFP(I, /*Signed=*/true);
4967  return true;
4968  case Instruction::UIToFP:
4969  return selectIntToFP(I, /*Signed=*/false);
4970  case Instruction::Load:
4971  return selectLoad(I);
4972  case Instruction::Store:
4973  return selectStore(I);
4974  case Instruction::FCmp:
4975  case Instruction::ICmp:
4976  return selectCmp(I);
4977  case Instruction::Select:
4978  return selectSelect(I);
4979  case Instruction::Ret:
4980  return selectRet(I);
4981  case Instruction::FRem:
4982  return selectFRem(I);
4983  case Instruction::GetElementPtr:
4984  return selectGetElementPtr(I);
4985  }
4986 
4987  // fall-back to target-independent instruction selection.
4988  return selectOperator(I, I->getOpcode());
4989  // Silence warnings.
4990  (void)&CC_AArch64_DarwinPCS_VarArg;
4991 }
4992 
4993 namespace llvm {
4995  const TargetLibraryInfo *LibInfo) {
4996  return new AArch64FastISel(FuncInfo, LibInfo);
4997 }
4998 }
void setFrameAddressIsTaken(bool T)
ReturnInst - Return a value (possibly void), from a function.
static MVT getIntegerVT(unsigned BitWidth)
The memory access reads data.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:679
The memory access writes data.
MVT getValVT() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:223
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1327
LocInfo getLocInfo() const
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
bool isVolatile() const
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:39
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
Force argument to be passed in register.
Definition: Attributes.h:78
Intrinsic::ID getIntrinsicID() const
getIntrinsicID - Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:44
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:191
unsigned getNumOperands() const
Definition: User.h:138
Nested function static chain.
Definition: Attributes.h:82
static CondCode getInvertedCondCode(CondCode Code)
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:138
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:783
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
unsigned less or equal
Definition: InstrTypes.h:723
unsigned less than
Definition: InstrTypes.h:722
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:703
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
unsigned getSizeInBits() const
bool isDoubleTy() const
isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:146
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:713
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:264
MemSetInst - This class wraps the llvm.memset intrinsic.
Type * getReturnType() const
Definition: Function.cpp:233
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:111
F(f)
LoadInst - an instruction for reading from memory.
Definition: Instructions.h:177
void reserve(size_type N)
Definition: SmallVector.h:401
bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const
Return true if the attribute exists at the given index.
Definition: Attributes.cpp:956
static std::error_code getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
unsigned getValNo() const
op_iterator op_begin()
Definition: User.h:183
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:189
bool isRegLoc() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:172
static MachinePointerInfo getFixedStack(int FI, int64_t offset=0)
getFixedStack - Return a MachinePointerInfo record that refers to the the specified FrameIndex...
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:319
COPY - Target-independent register copy.
Definition: TargetOpcodes.h:86
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:115
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:708
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:475
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:707
bool isUnconditional() const
MachineMemOperand - A description of a memory reference used in the backend.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
SelectInst - This class represents the LLVM 'select' instruction.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
StructType - Class to represent struct types.
Definition: DerivedTypes.h:191
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
A Use represents the edge between a Value definition and its users.
Definition: Use.h:69
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
unsigned getNumArgOperands() const
getNumArgOperands - Return the number of call arguments.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C)
Reg
All possible values of the reg field in the ModR/M byte.
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:704
SimpleValueType SimpleTy
bool isSized(SmallPtrSetImpl< const Type * > *Visited=nullptr) const
isSized - Return true if it makes sense to take the size of this type.
Definition: Type.h:268
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
const MachineInstrBuilder & addImm(int64_t Val) const
addImm - Add a new immediate operand.
MO_CONSTPOOL - This flag indicates that a symbol operand represents the address of a constant pool en...
Hidden pointer to structure to return.
Definition: Attributes.h:114
#define G(x, y, z)
Definition: MD5.cpp:52
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:117
Context object for machine code objects.
Definition: MCContext.h:48
unsigned getLocReg() const
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:30
ConstantExpr - a constant value that is initialized with an expression using other constant values...
Definition: Constants.h:852
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:393
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
void GetReturnInfo(Type *ReturnType, AttributeSet attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
static bool isZExtLoad(const MachineInstr *LI)
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:191
BasicBlock * getSuccessor(unsigned i) const
Pass structure by value.
Definition: Attributes.h:73
bool isArrayTy() const
isArrayTy - True if this is an instance of ArrayType.
Definition: Type.h:213
bool isZero() const
isZero - Return true if the value is positive or negative zero.
Definition: Constants.h:273
This class represents a truncation of integer types.
PointerType - Class to represent pointers.
Definition: DerivedTypes.h:449
unsigned getKillRegState(bool B)
const BasicBlock * getBasicBlock() const
getBasicBlock - Return the LLVM basic block that this instance corresponded to originally.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:436
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:97
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:491
A self-contained host- and target-independent arbitrary-precision floating-point software implementat...
Definition: APFloat.h:122
#define true
Definition: ConvertUTF.c:66
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:533
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
MVT - Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
BranchInst - Conditional or Unconditional Branch instruction.
bool isVectorTy() const
isVectorTy - True if this is an instance of VectorType.
Definition: Type.h:226
MVT getLocVT() const
This is an important base class in LLVM.
Definition: Constant.h:41
PointerType * getType() const
getType - Overload to return most specific pointer type
Definition: Instructions.h:115
bool isVector() const
isVector - Return true if this is a vector value type.
const Value * getCondition() const
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
IndirectBrInst - Indirect Branch Instruction.
bool isFloatTy() const
isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:143
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
Definition: APInt.h:1895
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:233
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
Definition: APInt.h:1900
const DebugLoc & getDebugLoc() const
getDebugLoc - Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:230
op_iterator op_end()
Definition: User.h:185
static bool isSExtLoad(const MachineInstr *LI)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:697
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
Value * getOperand(unsigned i) const
Definition: User.h:118
op_range operands()
Definition: User.h:191
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:706
unsigned getFrameRegister(const MachineFunction &MF) const override
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:152
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:714
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type...
unsigned getSubReg() const
EVT - Extended Value Type.
Definition: ValueTypes.h:31
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:519
const Value * getTrueValue() const
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:386
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:712
unsigned getNumSuccessors() const
signed greater than
Definition: InstrTypes.h:724
bool needsCustom() const
BasicBlock * getSuccessor(unsigned i) const
bool bitsGT(EVT VT) const
bitsGT - Return true if this has more bits than VT.
Definition: ValueTypes.h:177
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.cpp:749
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:701
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static AArch64_AM::ShiftExtendType getExtendType(unsigned Imm)
getExtendType - Extract the extend type for operands of arithmetic ops.
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:47
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:711
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
signed less than
Definition: InstrTypes.h:726
Value * getLength() const
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:582
Function * getCalledFunction() const
getCalledFunction - Return the function called, or null if this is an indirect function invocation...
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:161
bool is64BitVector() const
is64BitVector - Return true if this is a 64-bit vector type.
const MachineInstrBuilder & addFrameIndex(int Idx) const
bool isNullValue() const
isNullValue - Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:75
static MachinePointerInfo getStack(int64_t Offset)
getStack - stack pointer relative access.
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
AttributeSet getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:181
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
signed less or equal
Definition: InstrTypes.h:727
bool selectBitCast(const User *I)
Definition: FastISel.cpp:1280
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:73
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool isIntegerTy() const
isIntegerTy - True if this is an instance of IntegerType.
Definition: Type.h:193
This file defines the FastISel class.
bool isPowerOf2_64(uint64_t Value)
isPowerOf2_64 - This function returns true if the argument is a power of two 0 (64 bit edition...
Definition: MathExtras.h:360
bool isMemLoc() const
APInt bitcastToAPInt() const
Definition: APFloat.cpp:3084
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1890
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool isStructTy() const
isStructTy - True if this is an instance of StructType.
Definition: Type.h:209
use_iterator use_begin()
Definition: Value.h:279
Representation of each machine instruction.
Definition: MachineInstr.h:51
bool isNegative() const
isNegative - Return true if the sign bit is set.
Definition: Constants.h:276
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:185
Value * getCondition() const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:321
unsigned greater or equal
Definition: InstrTypes.h:721
static unsigned UseReg(const MachineOperand &MO)
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:311
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page...
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:705
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:709
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
getEVT - Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:277
const APFloat & getValueAPF() const
Definition: Constants.h:270
unsigned getReg() const
getReg - Returns the register number.
iterator_range< op_iterator > arg_operands()
arg_operands - iteration adapter for range-for loops.
bool isUnsigned() const
Determine if this instruction is using an unsigned comparison.
Definition: InstrTypes.h:832
const ARM::ArchExtKind Kind
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:94
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:700
LLVM Value Representation.
Definition: Value.h:69
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:710
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:112
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
unsigned getDestAddressSpace() const
static const unsigned FramePtr
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
const Value * getFalseValue() const
unsigned greater than
Definition: InstrTypes.h:720
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow...
SUBREG_TO_REG - This instruction is similar to INSERT_SUBREG except that the first operand is an imme...
Definition: TargetOpcodes.h:58
unsigned getLocMemOffset() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:389
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:125
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:702
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.cpp:229
const BasicBlock * getParent() const
Definition: Instruction.h:72
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:203
iterator_range< arg_iterator > args()
Definition: Function.h:489
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:699
signed greater or equal
Definition: InstrTypes.h:725
IntrinsicInst - A useful wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:37
AllocaInst - an instruction to allocate memory on the stack.
Definition: Instructions.h:76
static bool isMulPowOf2(const Value *I)
Check if the multiply is by a power-of-2 constant.
gep_type_iterator gep_type_begin(const User *GEP)
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:314
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:225