LLVM  7.0.0svn
AArch64FastISel.cpp
Go to the documentation of this file.
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the AArch64-specific support for the FastISel class. Some
11 // of the target-specific code is generated by tablegen in the file
12 // AArch64GenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/CodeGen/FastISel.h"
41 #include "llvm/IR/Argument.h"
42 #include "llvm/IR/Attributes.h"
43 #include "llvm/IR/BasicBlock.h"
44 #include "llvm/IR/CallingConv.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/Constants.h"
47 #include "llvm/IR/DataLayout.h"
48 #include "llvm/IR/DerivedTypes.h"
49 #include "llvm/IR/Function.h"
51 #include "llvm/IR/GlobalValue.h"
52 #include "llvm/IR/InstrTypes.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/IntrinsicInst.h"
56 #include "llvm/IR/Intrinsics.h"
57 #include "llvm/IR/Operator.h"
58 #include "llvm/IR/Type.h"
59 #include "llvm/IR/User.h"
60 #include "llvm/IR/Value.h"
61 #include "llvm/MC/MCInstrDesc.h"
62 #include "llvm/MC/MCRegisterInfo.h"
63 #include "llvm/MC/MCSymbol.h"
65 #include "llvm/Support/Casting.h"
66 #include "llvm/Support/CodeGen.h"
67 #include "llvm/Support/Compiler.h"
70 #include <algorithm>
71 #include <cassert>
72 #include <cstdint>
73 #include <iterator>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 namespace {
79 
80 class AArch64FastISel final : public FastISel {
81  class Address {
82  public:
83  using BaseKind = enum {
84  RegBase,
85  FrameIndexBase
86  };
87 
88  private:
89  BaseKind Kind = RegBase;
91  union {
92  unsigned Reg;
93  int FI;
94  } Base;
95  unsigned OffsetReg = 0;
96  unsigned Shift = 0;
97  int64_t Offset = 0;
98  const GlobalValue *GV = nullptr;
99 
100  public:
101  Address() { Base.Reg = 0; }
102 
103  void setKind(BaseKind K) { Kind = K; }
104  BaseKind getKind() const { return Kind; }
105  void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
106  AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
107  bool isRegBase() const { return Kind == RegBase; }
108  bool isFIBase() const { return Kind == FrameIndexBase; }
109 
110  void setReg(unsigned Reg) {
111  assert(isRegBase() && "Invalid base register access!");
112  Base.Reg = Reg;
113  }
114 
115  unsigned getReg() const {
116  assert(isRegBase() && "Invalid base register access!");
117  return Base.Reg;
118  }
119 
120  void setOffsetReg(unsigned Reg) {
121  OffsetReg = Reg;
122  }
123 
124  unsigned getOffsetReg() const {
125  return OffsetReg;
126  }
127 
128  void setFI(unsigned FI) {
129  assert(isFIBase() && "Invalid base frame index access!");
130  Base.FI = FI;
131  }
132 
133  unsigned getFI() const {
134  assert(isFIBase() && "Invalid base frame index access!");
135  return Base.FI;
136  }
137 
138  void setOffset(int64_t O) { Offset = O; }
139  int64_t getOffset() { return Offset; }
140  void setShift(unsigned S) { Shift = S; }
141  unsigned getShift() { return Shift; }
142 
143  void setGlobalValue(const GlobalValue *G) { GV = G; }
144  const GlobalValue *getGlobalValue() { return GV; }
145  };
146 
147  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
148  /// make the right decision when generating code for different targets.
149  const AArch64Subtarget *Subtarget;
151 
152  bool fastLowerArguments() override;
153  bool fastLowerCall(CallLoweringInfo &CLI) override;
154  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
155 
156 private:
157  // Selection routines.
158  bool selectAddSub(const Instruction *I);
159  bool selectLogicalOp(const Instruction *I);
160  bool selectLoad(const Instruction *I);
161  bool selectStore(const Instruction *I);
162  bool selectBranch(const Instruction *I);
163  bool selectIndirectBr(const Instruction *I);
164  bool selectCmp(const Instruction *I);
165  bool selectSelect(const Instruction *I);
166  bool selectFPExt(const Instruction *I);
167  bool selectFPTrunc(const Instruction *I);
168  bool selectFPToInt(const Instruction *I, bool Signed);
169  bool selectIntToFP(const Instruction *I, bool Signed);
170  bool selectRem(const Instruction *I, unsigned ISDOpcode);
171  bool selectRet(const Instruction *I);
172  bool selectTrunc(const Instruction *I);
173  bool selectIntExt(const Instruction *I);
174  bool selectMul(const Instruction *I);
175  bool selectShift(const Instruction *I);
176  bool selectBitCast(const Instruction *I);
177  bool selectFRem(const Instruction *I);
178  bool selectSDiv(const Instruction *I);
179  bool selectGetElementPtr(const Instruction *I);
180  bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
181 
182  // Utility helper routines.
183  bool isTypeLegal(Type *Ty, MVT &VT);
184  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
185  bool isValueAvailable(const Value *V) const;
186  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
187  bool computeCallAddress(const Value *V, Address &Addr);
188  bool simplifyAddress(Address &Addr, MVT VT);
189  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
191  unsigned ScaleFactor, MachineMemOperand *MMO);
192  bool isMemCpySmall(uint64_t Len, unsigned Alignment);
193  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
194  unsigned Alignment);
195  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
196  const Value *Cond);
197  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
198  bool optimizeSelect(const SelectInst *SI);
199  std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
200 
201  // Emit helper routines.
202  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
203  const Value *RHS, bool SetFlags = false,
204  bool WantResult = true, bool IsZExt = false);
205  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
206  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
207  bool SetFlags = false, bool WantResult = true);
208  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
209  bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
210  bool WantResult = true);
211  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
212  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
213  AArch64_AM::ShiftExtendType ShiftType,
214  uint64_t ShiftImm, bool SetFlags = false,
215  bool WantResult = true);
216  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
217  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
219  uint64_t ShiftImm, bool SetFlags = false,
220  bool WantResult = true);
221 
222  // Emit functions.
223  bool emitCompareAndBranch(const BranchInst *BI);
224  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
225  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
226  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
227  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
228  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
229  MachineMemOperand *MMO = nullptr);
230  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
231  MachineMemOperand *MMO = nullptr);
232  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
233  MachineMemOperand *MMO = nullptr);
234  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
235  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
236  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
237  bool SetFlags = false, bool WantResult = true,
238  bool IsZExt = false);
239  unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
240  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
241  bool SetFlags = false, bool WantResult = true,
242  bool IsZExt = false);
243  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
244  unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
245  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
246  unsigned RHSReg, bool RHSIsKill,
247  AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
248  bool WantResult = true);
249  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
250  const Value *RHS);
251  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
252  bool LHSIsKill, uint64_t Imm);
253  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
254  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
255  uint64_t ShiftImm);
256  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
257  unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
258  unsigned Op1, bool Op1IsKill);
259  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
260  unsigned Op1, bool Op1IsKill);
261  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
262  unsigned Op1, bool Op1IsKill);
263  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
264  unsigned Op1Reg, bool Op1IsKill);
265  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
266  uint64_t Imm, bool IsZExt = true);
267  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
268  unsigned Op1Reg, bool Op1IsKill);
269  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
270  uint64_t Imm, bool IsZExt = true);
271  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
272  unsigned Op1Reg, bool Op1IsKill);
273  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
274  uint64_t Imm, bool IsZExt = false);
275 
276  unsigned materializeInt(const ConstantInt *CI, MVT VT);
277  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
278  unsigned materializeGV(const GlobalValue *GV);
279 
280  // Call handling routines.
281 private:
282  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
283  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
284  unsigned &NumBytes);
285  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
286 
287 public:
288  // Backend specific FastISel code.
289  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
290  unsigned fastMaterializeConstant(const Constant *C) override;
291  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
292 
293  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
294  const TargetLibraryInfo *LibInfo)
295  : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
296  Subtarget =
297  &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
298  Context = &FuncInfo.Fn->getContext();
299  }
300 
301  bool fastSelectInstruction(const Instruction *I) override;
302 
303 #include "AArch64GenFastISel.inc"
304 };
305 
306 } // end anonymous namespace
307 
308 #include "AArch64GenCallingConv.inc"
309 
310 /// \brief Check if the sign-/zero-extend will be a noop.
311 static bool isIntExtFree(const Instruction *I) {
312  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
313  "Unexpected integer extend instruction.");
314  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
315  "Unexpected value type.");
316  bool IsZExt = isa<ZExtInst>(I);
317 
318  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
319  if (LI->hasOneUse())
320  return true;
321 
322  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
323  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
324  return true;
325 
326  return false;
327 }
328 
329 /// \brief Determine the implicit scale factor that is applied by a memory
330 /// operation for a given value type.
331 static unsigned getImplicitScaleFactor(MVT VT) {
332  switch (VT.SimpleTy) {
333  default:
334  return 0; // invalid
335  case MVT::i1: // fall-through
336  case MVT::i8:
337  return 1;
338  case MVT::i16:
339  return 2;
340  case MVT::i32: // fall-through
341  case MVT::f32:
342  return 4;
343  case MVT::i64: // fall-through
344  case MVT::f64:
345  return 8;
346  }
347 }
348 
349 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
350  if (CC == CallingConv::WebKit_JS)
351  return CC_AArch64_WebKit_JS;
352  if (CC == CallingConv::GHC)
353  return CC_AArch64_GHC;
354  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
355 }
356 
357 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
358  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
359  "Alloca should always return a pointer.");
360 
361  // Don't handle dynamic allocas.
362  if (!FuncInfo.StaticAllocaMap.count(AI))
363  return 0;
364 
366  FuncInfo.StaticAllocaMap.find(AI);
367 
368  if (SI != FuncInfo.StaticAllocaMap.end()) {
369  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
370  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
371  ResultReg)
372  .addFrameIndex(SI->second)
373  .addImm(0)
374  .addImm(0);
375  return ResultReg;
376  }
377 
378  return 0;
379 }
380 
381 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
382  if (VT > MVT::i64)
383  return 0;
384 
385  if (!CI->isZero())
386  return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
387 
388  // Create a copy from the zero register to materialize a "0" value.
389  const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
390  : &AArch64::GPR32RegClass;
391  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
392  unsigned ResultReg = createResultReg(RC);
393  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
394  ResultReg).addReg(ZeroReg, getKillRegState(true));
395  return ResultReg;
396 }
397 
398 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
399  // Positive zero (+0.0) has to be materialized with a fmov from the zero
400  // register, because the immediate version of fmov cannot encode zero.
401  if (CFP->isNullValue())
402  return fastMaterializeFloatZero(CFP);
403 
404  if (VT != MVT::f32 && VT != MVT::f64)
405  return 0;
406 
407  const APFloat Val = CFP->getValueAPF();
408  bool Is64Bit = (VT == MVT::f64);
409  // This checks to see if we can use FMOV instructions to materialize
410  // a constant, otherwise we have to materialize via the constant pool.
411  if (TLI.isFPImmLegal(Val, VT)) {
412  int Imm =
413  Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
414  assert((Imm != -1) && "Cannot encode floating-point constant.");
415  unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
416  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
417  }
418 
419  // For the MachO large code model materialize the FP constant in code.
420  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
421  unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
422  const TargetRegisterClass *RC = Is64Bit ?
423  &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
424 
425  unsigned TmpReg = createResultReg(RC);
426  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
427  .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
428 
429  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
430  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
431  TII.get(TargetOpcode::COPY), ResultReg)
432  .addReg(TmpReg, getKillRegState(true));
433 
434  return ResultReg;
435  }
436 
437  // Materialize via constant pool. MachineConstantPool wants an explicit
438  // alignment.
439  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
440  if (Align == 0)
441  Align = DL.getTypeAllocSize(CFP->getType());
442 
443  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
444  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
445  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
446  ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
447 
448  unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
449  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
450  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
451  .addReg(ADRPReg)
453  return ResultReg;
454 }
455 
456 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
457  // We can't handle thread-local variables quickly yet.
458  if (GV->isThreadLocal())
459  return 0;
460 
461  // MachO still uses GOT for large code-model accesses, but ELF requires
462  // movz/movk sequences, which FastISel doesn't handle yet.
463  if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
464  return 0;
465 
466  unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
467 
468  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
469  if (!DestEVT.isSimple())
470  return 0;
471 
472  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
473  unsigned ResultReg;
474 
475  if (OpFlags & AArch64II::MO_GOT) {
476  // ADRP + LDRX
477  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
478  ADRPReg)
479  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
480 
481  ResultReg = createResultReg(&AArch64::GPR64RegClass);
482  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
483  ResultReg)
484  .addReg(ADRPReg)
485  .addGlobalAddress(GV, 0,
487  } else {
488  // ADRP + ADDX
489  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
490  ADRPReg)
491  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
492 
493  ResultReg = createResultReg(&AArch64::GPR64spRegClass);
494  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
495  ResultReg)
496  .addReg(ADRPReg)
497  .addGlobalAddress(GV, 0,
499  .addImm(0);
500  }
501  return ResultReg;
502 }
503 
504 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
505  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
506 
507  // Only handle simple types.
508  if (!CEVT.isSimple())
509  return 0;
510  MVT VT = CEVT.getSimpleVT();
511 
512  if (const auto *CI = dyn_cast<ConstantInt>(C))
513  return materializeInt(CI, VT);
514  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
515  return materializeFP(CFP, VT);
516  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
517  return materializeGV(GV);
518 
519  return 0;
520 }
521 
522 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
523  assert(CFP->isNullValue() &&
524  "Floating-point constant is not a positive zero.");
525  MVT VT;
526  if (!isTypeLegal(CFP->getType(), VT))
527  return 0;
528 
529  if (VT != MVT::f32 && VT != MVT::f64)
530  return 0;
531 
532  bool Is64Bit = (VT == MVT::f64);
533  unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
534  unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
535  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
536 }
537 
538 /// \brief Check if the multiply is by a power-of-2 constant.
539 static bool isMulPowOf2(const Value *I) {
540  if (const auto *MI = dyn_cast<MulOperator>(I)) {
541  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
542  if (C->getValue().isPowerOf2())
543  return true;
544  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
545  if (C->getValue().isPowerOf2())
546  return true;
547  }
548  return false;
549 }
550 
551 // Computes the address to get to an object.
552 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
553 {
554  const User *U = nullptr;
555  unsigned Opcode = Instruction::UserOp1;
556  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
557  // Don't walk into other basic blocks unless the object is an alloca from
558  // another block, otherwise it may not have a virtual register assigned.
559  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
560  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
561  Opcode = I->getOpcode();
562  U = I;
563  }
564  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
565  Opcode = C->getOpcode();
566  U = C;
567  }
568 
569  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
570  if (Ty->getAddressSpace() > 255)
571  // Fast instruction selection doesn't support the special
572  // address spaces.
573  return false;
574 
575  switch (Opcode) {
576  default:
577  break;
578  case Instruction::BitCast:
579  // Look through bitcasts.
580  return computeAddress(U->getOperand(0), Addr, Ty);
581 
582  case Instruction::IntToPtr:
583  // Look past no-op inttoptrs.
584  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
585  TLI.getPointerTy(DL))
586  return computeAddress(U->getOperand(0), Addr, Ty);
587  break;
588 
589  case Instruction::PtrToInt:
590  // Look past no-op ptrtoints.
591  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
592  return computeAddress(U->getOperand(0), Addr, Ty);
593  break;
594 
595  case Instruction::GetElementPtr: {
596  Address SavedAddr = Addr;
597  uint64_t TmpOffset = Addr.getOffset();
598 
599  // Iterate through the GEP folding the constants into offsets where
600  // we can.
601  for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
602  GTI != E; ++GTI) {
603  const Value *Op = GTI.getOperand();
604  if (StructType *STy = GTI.getStructTypeOrNull()) {
605  const StructLayout *SL = DL.getStructLayout(STy);
606  unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
607  TmpOffset += SL->getElementOffset(Idx);
608  } else {
609  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
610  while (true) {
611  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
612  // Constant-offset addressing.
613  TmpOffset += CI->getSExtValue() * S;
614  break;
615  }
616  if (canFoldAddIntoGEP(U, Op)) {
617  // A compatible add with a constant operand. Fold the constant.
618  ConstantInt *CI =
619  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
620  TmpOffset += CI->getSExtValue() * S;
621  // Iterate on the other operand.
622  Op = cast<AddOperator>(Op)->getOperand(0);
623  continue;
624  }
625  // Unsupported
626  goto unsupported_gep;
627  }
628  }
629  }
630 
631  // Try to grab the base operand now.
632  Addr.setOffset(TmpOffset);
633  if (computeAddress(U->getOperand(0), Addr, Ty))
634  return true;
635 
636  // We failed, restore everything and try the other options.
637  Addr = SavedAddr;
638 
639  unsupported_gep:
640  break;
641  }
642  case Instruction::Alloca: {
643  const AllocaInst *AI = cast<AllocaInst>(Obj);
645  FuncInfo.StaticAllocaMap.find(AI);
646  if (SI != FuncInfo.StaticAllocaMap.end()) {
647  Addr.setKind(Address::FrameIndexBase);
648  Addr.setFI(SI->second);
649  return true;
650  }
651  break;
652  }
653  case Instruction::Add: {
654  // Adds of constants are common and easy enough.
655  const Value *LHS = U->getOperand(0);
656  const Value *RHS = U->getOperand(1);
657 
658  if (isa<ConstantInt>(LHS))
659  std::swap(LHS, RHS);
660 
661  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
662  Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
663  return computeAddress(LHS, Addr, Ty);
664  }
665 
666  Address Backup = Addr;
667  if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
668  return true;
669  Addr = Backup;
670 
671  break;
672  }
673  case Instruction::Sub: {
674  // Subs of constants are common and easy enough.
675  const Value *LHS = U->getOperand(0);
676  const Value *RHS = U->getOperand(1);
677 
678  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
679  Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
680  return computeAddress(LHS, Addr, Ty);
681  }
682  break;
683  }
684  case Instruction::Shl: {
685  if (Addr.getOffsetReg())
686  break;
687 
688  const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
689  if (!CI)
690  break;
691 
692  unsigned Val = CI->getZExtValue();
693  if (Val < 1 || Val > 3)
694  break;
695 
696  uint64_t NumBytes = 0;
697  if (Ty && Ty->isSized()) {
698  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
699  NumBytes = NumBits / 8;
700  if (!isPowerOf2_64(NumBits))
701  NumBytes = 0;
702  }
703 
704  if (NumBytes != (1ULL << Val))
705  break;
706 
707  Addr.setShift(Val);
708  Addr.setExtendType(AArch64_AM::LSL);
709 
710  const Value *Src = U->getOperand(0);
711  if (const auto *I = dyn_cast<Instruction>(Src)) {
712  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
713  // Fold the zext or sext when it won't become a noop.
714  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
715  if (!isIntExtFree(ZE) &&
716  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
717  Addr.setExtendType(AArch64_AM::UXTW);
718  Src = ZE->getOperand(0);
719  }
720  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
721  if (!isIntExtFree(SE) &&
722  SE->getOperand(0)->getType()->isIntegerTy(32)) {
723  Addr.setExtendType(AArch64_AM::SXTW);
724  Src = SE->getOperand(0);
725  }
726  }
727  }
728  }
729 
730  if (const auto *AI = dyn_cast<BinaryOperator>(Src))
731  if (AI->getOpcode() == Instruction::And) {
732  const Value *LHS = AI->getOperand(0);
733  const Value *RHS = AI->getOperand(1);
734 
735  if (const auto *C = dyn_cast<ConstantInt>(LHS))
736  if (C->getValue() == 0xffffffff)
737  std::swap(LHS, RHS);
738 
739  if (const auto *C = dyn_cast<ConstantInt>(RHS))
740  if (C->getValue() == 0xffffffff) {
741  Addr.setExtendType(AArch64_AM::UXTW);
742  unsigned Reg = getRegForValue(LHS);
743  if (!Reg)
744  return false;
745  bool RegIsKill = hasTrivialKill(LHS);
746  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
747  AArch64::sub_32);
748  Addr.setOffsetReg(Reg);
749  return true;
750  }
751  }
752 
753  unsigned Reg = getRegForValue(Src);
754  if (!Reg)
755  return false;
756  Addr.setOffsetReg(Reg);
757  return true;
758  }
759  case Instruction::Mul: {
760  if (Addr.getOffsetReg())
761  break;
762 
763  if (!isMulPowOf2(U))
764  break;
765 
766  const Value *LHS = U->getOperand(0);
767  const Value *RHS = U->getOperand(1);
768 
769  // Canonicalize power-of-2 value to the RHS.
770  if (const auto *C = dyn_cast<ConstantInt>(LHS))
771  if (C->getValue().isPowerOf2())
772  std::swap(LHS, RHS);
773 
774  assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
775  const auto *C = cast<ConstantInt>(RHS);
776  unsigned Val = C->getValue().logBase2();
777  if (Val < 1 || Val > 3)
778  break;
779 
780  uint64_t NumBytes = 0;
781  if (Ty && Ty->isSized()) {
782  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
783  NumBytes = NumBits / 8;
784  if (!isPowerOf2_64(NumBits))
785  NumBytes = 0;
786  }
787 
788  if (NumBytes != (1ULL << Val))
789  break;
790 
791  Addr.setShift(Val);
792  Addr.setExtendType(AArch64_AM::LSL);
793 
794  const Value *Src = LHS;
795  if (const auto *I = dyn_cast<Instruction>(Src)) {
796  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
797  // Fold the zext or sext when it won't become a noop.
798  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
799  if (!isIntExtFree(ZE) &&
800  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
801  Addr.setExtendType(AArch64_AM::UXTW);
802  Src = ZE->getOperand(0);
803  }
804  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
805  if (!isIntExtFree(SE) &&
806  SE->getOperand(0)->getType()->isIntegerTy(32)) {
807  Addr.setExtendType(AArch64_AM::SXTW);
808  Src = SE->getOperand(0);
809  }
810  }
811  }
812  }
813 
814  unsigned Reg = getRegForValue(Src);
815  if (!Reg)
816  return false;
817  Addr.setOffsetReg(Reg);
818  return true;
819  }
820  case Instruction::And: {
821  if (Addr.getOffsetReg())
822  break;
823 
824  if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
825  break;
826 
827  const Value *LHS = U->getOperand(0);
828  const Value *RHS = U->getOperand(1);
829 
830  if (const auto *C = dyn_cast<ConstantInt>(LHS))
831  if (C->getValue() == 0xffffffff)
832  std::swap(LHS, RHS);
833 
834  if (const auto *C = dyn_cast<ConstantInt>(RHS))
835  if (C->getValue() == 0xffffffff) {
836  Addr.setShift(0);
837  Addr.setExtendType(AArch64_AM::LSL);
838  Addr.setExtendType(AArch64_AM::UXTW);
839 
840  unsigned Reg = getRegForValue(LHS);
841  if (!Reg)
842  return false;
843  bool RegIsKill = hasTrivialKill(LHS);
844  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
845  AArch64::sub_32);
846  Addr.setOffsetReg(Reg);
847  return true;
848  }
849  break;
850  }
851  case Instruction::SExt:
852  case Instruction::ZExt: {
853  if (!Addr.getReg() || Addr.getOffsetReg())
854  break;
855 
856  const Value *Src = nullptr;
857  // Fold the zext or sext when it won't become a noop.
858  if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
859  if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
860  Addr.setExtendType(AArch64_AM::UXTW);
861  Src = ZE->getOperand(0);
862  }
863  } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
864  if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
865  Addr.setExtendType(AArch64_AM::SXTW);
866  Src = SE->getOperand(0);
867  }
868  }
869 
870  if (!Src)
871  break;
872 
873  Addr.setShift(0);
874  unsigned Reg = getRegForValue(Src);
875  if (!Reg)
876  return false;
877  Addr.setOffsetReg(Reg);
878  return true;
879  }
880  } // end switch
881 
882  if (Addr.isRegBase() && !Addr.getReg()) {
883  unsigned Reg = getRegForValue(Obj);
884  if (!Reg)
885  return false;
886  Addr.setReg(Reg);
887  return true;
888  }
889 
890  if (!Addr.getOffsetReg()) {
891  unsigned Reg = getRegForValue(Obj);
892  if (!Reg)
893  return false;
894  Addr.setOffsetReg(Reg);
895  return true;
896  }
897 
898  return false;
899 }
900 
901 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
902  const User *U = nullptr;
903  unsigned Opcode = Instruction::UserOp1;
904  bool InMBB = true;
905 
906  if (const auto *I = dyn_cast<Instruction>(V)) {
907  Opcode = I->getOpcode();
908  U = I;
909  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
910  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
911  Opcode = C->getOpcode();
912  U = C;
913  }
914 
915  switch (Opcode) {
916  default: break;
917  case Instruction::BitCast:
918  // Look past bitcasts if its operand is in the same BB.
919  if (InMBB)
920  return computeCallAddress(U->getOperand(0), Addr);
921  break;
922  case Instruction::IntToPtr:
923  // Look past no-op inttoptrs if its operand is in the same BB.
924  if (InMBB &&
925  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
926  TLI.getPointerTy(DL))
927  return computeCallAddress(U->getOperand(0), Addr);
928  break;
929  case Instruction::PtrToInt:
930  // Look past no-op ptrtoints if its operand is in the same BB.
931  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
932  return computeCallAddress(U->getOperand(0), Addr);
933  break;
934  }
935 
936  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
937  Addr.setGlobalValue(GV);
938  return true;
939  }
940 
941  // If all else fails, try to materialize the value in a register.
942  if (!Addr.getGlobalValue()) {
943  Addr.setReg(getRegForValue(V));
944  return Addr.getReg() != 0;
945  }
946 
947  return false;
948 }
949 
950 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
951  EVT evt = TLI.getValueType(DL, Ty, true);
952 
953  // Only handle simple types.
954  if (evt == MVT::Other || !evt.isSimple())
955  return false;
956  VT = evt.getSimpleVT();
957 
958  // This is a legal type, but it's not something we handle in fast-isel.
959  if (VT == MVT::f128)
960  return false;
961 
962  // Handle all other legal types, i.e. a register that will directly hold this
963  // value.
964  return TLI.isTypeLegal(VT);
965 }
966 
967 /// \brief Determine if the value type is supported by FastISel.
968 ///
969 /// FastISel for AArch64 can handle more value types than are legal. This adds
970 /// simple value type such as i1, i8, and i16.
971 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
972  if (Ty->isVectorTy() && !IsVectorAllowed)
973  return false;
974 
975  if (isTypeLegal(Ty, VT))
976  return true;
977 
978  // If this is a type than can be sign or zero-extended to a basic operation
979  // go ahead and accept it now.
980  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
981  return true;
982 
983  return false;
984 }
985 
986 bool AArch64FastISel::isValueAvailable(const Value *V) const {
987  if (!isa<Instruction>(V))
988  return true;
989 
990  const auto *I = cast<Instruction>(V);
991  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
992 }
993 
994 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
995  unsigned ScaleFactor = getImplicitScaleFactor(VT);
996  if (!ScaleFactor)
997  return false;
998 
999  bool ImmediateOffsetNeedsLowering = false;
1000  bool RegisterOffsetNeedsLowering = false;
1001  int64_t Offset = Addr.getOffset();
1002  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1003  ImmediateOffsetNeedsLowering = true;
1004  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1005  !isUInt<12>(Offset / ScaleFactor))
1006  ImmediateOffsetNeedsLowering = true;
1007 
1008  // Cannot encode an offset register and an immediate offset in the same
1009  // instruction. Fold the immediate offset into the load/store instruction and
1010  // emit an additional add to take care of the offset register.
1011  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1012  RegisterOffsetNeedsLowering = true;
1013 
1014  // Cannot encode zero register as base.
1015  if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1016  RegisterOffsetNeedsLowering = true;
1017 
1018  // If this is a stack pointer and the offset needs to be simplified then put
1019  // the alloca address into a register, set the base type back to register and
1020  // continue. This should almost never happen.
1021  if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1022  {
1023  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1024  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1025  ResultReg)
1026  .addFrameIndex(Addr.getFI())
1027  .addImm(0)
1028  .addImm(0);
1029  Addr.setKind(Address::RegBase);
1030  Addr.setReg(ResultReg);
1031  }
1032 
1033  if (RegisterOffsetNeedsLowering) {
1034  unsigned ResultReg = 0;
1035  if (Addr.getReg()) {
1036  if (Addr.getExtendType() == AArch64_AM::SXTW ||
1037  Addr.getExtendType() == AArch64_AM::UXTW )
1038  ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1039  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1040  /*TODO:IsKill=*/false, Addr.getExtendType(),
1041  Addr.getShift());
1042  else
1043  ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1044  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1045  /*TODO:IsKill=*/false, AArch64_AM::LSL,
1046  Addr.getShift());
1047  } else {
1048  if (Addr.getExtendType() == AArch64_AM::UXTW)
1049  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1050  /*Op0IsKill=*/false, Addr.getShift(),
1051  /*IsZExt=*/true);
1052  else if (Addr.getExtendType() == AArch64_AM::SXTW)
1053  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1054  /*Op0IsKill=*/false, Addr.getShift(),
1055  /*IsZExt=*/false);
1056  else
1057  ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1058  /*Op0IsKill=*/false, Addr.getShift());
1059  }
1060  if (!ResultReg)
1061  return false;
1062 
1063  Addr.setReg(ResultReg);
1064  Addr.setOffsetReg(0);
1065  Addr.setShift(0);
1066  Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1067  }
1068 
1069  // Since the offset is too large for the load/store instruction get the
1070  // reg+offset into a register.
1071  if (ImmediateOffsetNeedsLowering) {
1072  unsigned ResultReg;
1073  if (Addr.getReg())
1074  // Try to fold the immediate into the add instruction.
1075  ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1076  else
1077  ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1078 
1079  if (!ResultReg)
1080  return false;
1081  Addr.setReg(ResultReg);
1082  Addr.setOffset(0);
1083  }
1084  return true;
1085 }
1086 
1087 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1088  const MachineInstrBuilder &MIB,
1090  unsigned ScaleFactor,
1091  MachineMemOperand *MMO) {
1092  int64_t Offset = Addr.getOffset() / ScaleFactor;
1093  // Frame base works a bit differently. Handle it separately.
1094  if (Addr.isFIBase()) {
1095  int FI = Addr.getFI();
1096  // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1097  // and alignment should be based on the VT.
1098  MMO = FuncInfo.MF->getMachineMemOperand(
1099  MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1100  MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1101  // Now add the rest of the operands.
1102  MIB.addFrameIndex(FI).addImm(Offset);
1103  } else {
1104  assert(Addr.isRegBase() && "Unexpected address kind.");
1105  const MCInstrDesc &II = MIB->getDesc();
1106  unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1107  Addr.setReg(
1108  constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1109  Addr.setOffsetReg(
1110  constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1111  if (Addr.getOffsetReg()) {
1112  assert(Addr.getOffset() == 0 && "Unexpected offset");
1113  bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1114  Addr.getExtendType() == AArch64_AM::SXTX;
1115  MIB.addReg(Addr.getReg());
1116  MIB.addReg(Addr.getOffsetReg());
1117  MIB.addImm(IsSigned);
1118  MIB.addImm(Addr.getShift() != 0);
1119  } else
1120  MIB.addReg(Addr.getReg()).addImm(Offset);
1121  }
1122 
1123  if (MMO)
1124  MIB.addMemOperand(MMO);
1125 }
1126 
1127 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1128  const Value *RHS, bool SetFlags,
1129  bool WantResult, bool IsZExt) {
1131  bool NeedExtend = false;
1132  switch (RetVT.SimpleTy) {
1133  default:
1134  return 0;
1135  case MVT::i1:
1136  NeedExtend = true;
1137  break;
1138  case MVT::i8:
1139  NeedExtend = true;
1140  ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1141  break;
1142  case MVT::i16:
1143  NeedExtend = true;
1144  ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1145  break;
1146  case MVT::i32: // fall-through
1147  case MVT::i64:
1148  break;
1149  }
1150  MVT SrcVT = RetVT;
1151  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1152 
1153  // Canonicalize immediates to the RHS first.
1154  if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1155  std::swap(LHS, RHS);
1156 
1157  // Canonicalize mul by power of 2 to the RHS.
1158  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1159  if (isMulPowOf2(LHS))
1160  std::swap(LHS, RHS);
1161 
1162  // Canonicalize shift immediate to the RHS.
1163  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1164  if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1165  if (isa<ConstantInt>(SI->getOperand(1)))
1166  if (SI->getOpcode() == Instruction::Shl ||
1167  SI->getOpcode() == Instruction::LShr ||
1168  SI->getOpcode() == Instruction::AShr )
1169  std::swap(LHS, RHS);
1170 
1171  unsigned LHSReg = getRegForValue(LHS);
1172  if (!LHSReg)
1173  return 0;
1174  bool LHSIsKill = hasTrivialKill(LHS);
1175 
1176  if (NeedExtend)
1177  LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1178 
1179  unsigned ResultReg = 0;
1180  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1181  uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1182  if (C->isNegative())
1183  ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1184  SetFlags, WantResult);
1185  else
1186  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1187  WantResult);
1188  } else if (const auto *C = dyn_cast<Constant>(RHS))
1189  if (C->isNullValue())
1190  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1191  WantResult);
1192 
1193  if (ResultReg)
1194  return ResultReg;
1195 
1196  // Only extend the RHS within the instruction if there is a valid extend type.
1197  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1198  isValueAvailable(RHS)) {
1199  if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1200  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1201  if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1202  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1203  if (!RHSReg)
1204  return 0;
1205  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1206  return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1207  RHSIsKill, ExtendType, C->getZExtValue(),
1208  SetFlags, WantResult);
1209  }
1210  unsigned RHSReg = getRegForValue(RHS);
1211  if (!RHSReg)
1212  return 0;
1213  bool RHSIsKill = hasTrivialKill(RHS);
1214  return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1215  ExtendType, 0, SetFlags, WantResult);
1216  }
1217 
1218  // Check if the mul can be folded into the instruction.
1219  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1220  if (isMulPowOf2(RHS)) {
1221  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1222  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1223 
1224  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1225  if (C->getValue().isPowerOf2())
1226  std::swap(MulLHS, MulRHS);
1227 
1228  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1229  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1230  unsigned RHSReg = getRegForValue(MulLHS);
1231  if (!RHSReg)
1232  return 0;
1233  bool RHSIsKill = hasTrivialKill(MulLHS);
1234  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1235  RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1236  WantResult);
1237  if (ResultReg)
1238  return ResultReg;
1239  }
1240  }
1241 
1242  // Check if the shift can be folded into the instruction.
1243  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1244  if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1245  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1247  switch (SI->getOpcode()) {
1248  default: break;
1249  case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1250  case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1251  case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1252  }
1253  uint64_t ShiftVal = C->getZExtValue();
1254  if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1255  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1256  if (!RHSReg)
1257  return 0;
1258  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1259  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1260  RHSIsKill, ShiftType, ShiftVal, SetFlags,
1261  WantResult);
1262  if (ResultReg)
1263  return ResultReg;
1264  }
1265  }
1266  }
1267  }
1268 
1269  unsigned RHSReg = getRegForValue(RHS);
1270  if (!RHSReg)
1271  return 0;
1272  bool RHSIsKill = hasTrivialKill(RHS);
1273 
1274  if (NeedExtend)
1275  RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1276 
1277  return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1278  SetFlags, WantResult);
1279 }
1280 
1281 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1282  bool LHSIsKill, unsigned RHSReg,
1283  bool RHSIsKill, bool SetFlags,
1284  bool WantResult) {
1285  assert(LHSReg && RHSReg && "Invalid register number.");
1286 
1287  if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1288  RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1289  return 0;
1290 
1291  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1292  return 0;
1293 
1294  static const unsigned OpcTable[2][2][2] = {
1295  { { AArch64::SUBWrr, AArch64::SUBXrr },
1296  { AArch64::ADDWrr, AArch64::ADDXrr } },
1297  { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1298  { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1299  };
1300  bool Is64Bit = RetVT == MVT::i64;
1301  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1302  const TargetRegisterClass *RC =
1303  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1304  unsigned ResultReg;
1305  if (WantResult)
1306  ResultReg = createResultReg(RC);
1307  else
1308  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1309 
1310  const MCInstrDesc &II = TII.get(Opc);
1311  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1312  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1313  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1314  .addReg(LHSReg, getKillRegState(LHSIsKill))
1315  .addReg(RHSReg, getKillRegState(RHSIsKill));
1316  return ResultReg;
1317 }
1318 
1319 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1320  bool LHSIsKill, uint64_t Imm,
1321  bool SetFlags, bool WantResult) {
1322  assert(LHSReg && "Invalid register number.");
1323 
1324  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1325  return 0;
1326 
1327  unsigned ShiftImm;
1328  if (isUInt<12>(Imm))
1329  ShiftImm = 0;
1330  else if ((Imm & 0xfff000) == Imm) {
1331  ShiftImm = 12;
1332  Imm >>= 12;
1333  } else
1334  return 0;
1335 
1336  static const unsigned OpcTable[2][2][2] = {
1337  { { AArch64::SUBWri, AArch64::SUBXri },
1338  { AArch64::ADDWri, AArch64::ADDXri } },
1339  { { AArch64::SUBSWri, AArch64::SUBSXri },
1340  { AArch64::ADDSWri, AArch64::ADDSXri } }
1341  };
1342  bool Is64Bit = RetVT == MVT::i64;
1343  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1344  const TargetRegisterClass *RC;
1345  if (SetFlags)
1346  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1347  else
1348  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1349  unsigned ResultReg;
1350  if (WantResult)
1351  ResultReg = createResultReg(RC);
1352  else
1353  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1354 
1355  const MCInstrDesc &II = TII.get(Opc);
1356  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1357  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1358  .addReg(LHSReg, getKillRegState(LHSIsKill))
1359  .addImm(Imm)
1360  .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1361  return ResultReg;
1362 }
1363 
1364 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1365  bool LHSIsKill, unsigned RHSReg,
1366  bool RHSIsKill,
1367  AArch64_AM::ShiftExtendType ShiftType,
1368  uint64_t ShiftImm, bool SetFlags,
1369  bool WantResult) {
1370  assert(LHSReg && RHSReg && "Invalid register number.");
1371  assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1372  RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1373 
1374  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1375  return 0;
1376 
1377  // Don't deal with undefined shifts.
1378  if (ShiftImm >= RetVT.getSizeInBits())
1379  return 0;
1380 
1381  static const unsigned OpcTable[2][2][2] = {
1382  { { AArch64::SUBWrs, AArch64::SUBXrs },
1383  { AArch64::ADDWrs, AArch64::ADDXrs } },
1384  { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1385  { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1386  };
1387  bool Is64Bit = RetVT == MVT::i64;
1388  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1389  const TargetRegisterClass *RC =
1390  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1391  unsigned ResultReg;
1392  if (WantResult)
1393  ResultReg = createResultReg(RC);
1394  else
1395  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1396 
1397  const MCInstrDesc &II = TII.get(Opc);
1398  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1399  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1400  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1401  .addReg(LHSReg, getKillRegState(LHSIsKill))
1402  .addReg(RHSReg, getKillRegState(RHSIsKill))
1403  .addImm(getShifterImm(ShiftType, ShiftImm));
1404  return ResultReg;
1405 }
1406 
1407 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1408  bool LHSIsKill, unsigned RHSReg,
1409  bool RHSIsKill,
1411  uint64_t ShiftImm, bool SetFlags,
1412  bool WantResult) {
1413  assert(LHSReg && RHSReg && "Invalid register number.");
1414  assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1415  RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1416 
1417  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1418  return 0;
1419 
1420  if (ShiftImm >= 4)
1421  return 0;
1422 
1423  static const unsigned OpcTable[2][2][2] = {
1424  { { AArch64::SUBWrx, AArch64::SUBXrx },
1425  { AArch64::ADDWrx, AArch64::ADDXrx } },
1426  { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1427  { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1428  };
1429  bool Is64Bit = RetVT == MVT::i64;
1430  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1431  const TargetRegisterClass *RC = nullptr;
1432  if (SetFlags)
1433  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1434  else
1435  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1436  unsigned ResultReg;
1437  if (WantResult)
1438  ResultReg = createResultReg(RC);
1439  else
1440  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1441 
1442  const MCInstrDesc &II = TII.get(Opc);
1443  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1444  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1445  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1446  .addReg(LHSReg, getKillRegState(LHSIsKill))
1447  .addReg(RHSReg, getKillRegState(RHSIsKill))
1448  .addImm(getArithExtendImm(ExtType, ShiftImm));
1449  return ResultReg;
1450 }
1451 
1452 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1453  Type *Ty = LHS->getType();
1454  EVT EVT = TLI.getValueType(DL, Ty, true);
1455  if (!EVT.isSimple())
1456  return false;
1457  MVT VT = EVT.getSimpleVT();
1458 
1459  switch (VT.SimpleTy) {
1460  default:
1461  return false;
1462  case MVT::i1:
1463  case MVT::i8:
1464  case MVT::i16:
1465  case MVT::i32:
1466  case MVT::i64:
1467  return emitICmp(VT, LHS, RHS, IsZExt);
1468  case MVT::f32:
1469  case MVT::f64:
1470  return emitFCmp(VT, LHS, RHS);
1471  }
1472 }
1473 
1474 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1475  bool IsZExt) {
1476  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1477  IsZExt) != 0;
1478 }
1479 
1480 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1481  uint64_t Imm) {
1482  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1483  /*SetFlags=*/true, /*WantResult=*/false) != 0;
1484 }
1485 
1486 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1487  if (RetVT != MVT::f32 && RetVT != MVT::f64)
1488  return false;
1489 
1490  // Check to see if the 2nd operand is a constant that we can encode directly
1491  // in the compare.
1492  bool UseImm = false;
1493  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1494  if (CFP->isZero() && !CFP->isNegative())
1495  UseImm = true;
1496 
1497  unsigned LHSReg = getRegForValue(LHS);
1498  if (!LHSReg)
1499  return false;
1500  bool LHSIsKill = hasTrivialKill(LHS);
1501 
1502  if (UseImm) {
1503  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1504  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1505  .addReg(LHSReg, getKillRegState(LHSIsKill));
1506  return true;
1507  }
1508 
1509  unsigned RHSReg = getRegForValue(RHS);
1510  if (!RHSReg)
1511  return false;
1512  bool RHSIsKill = hasTrivialKill(RHS);
1513 
1514  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1515  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1516  .addReg(LHSReg, getKillRegState(LHSIsKill))
1517  .addReg(RHSReg, getKillRegState(RHSIsKill));
1518  return true;
1519 }
1520 
1521 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1522  bool SetFlags, bool WantResult, bool IsZExt) {
1523  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1524  IsZExt);
1525 }
1526 
1527 /// \brief This method is a wrapper to simplify add emission.
1528 ///
1529 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1530 /// that fails, then try to materialize the immediate into a register and use
1531 /// emitAddSub_rr instead.
1532 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1533  int64_t Imm) {
1534  unsigned ResultReg;
1535  if (Imm < 0)
1536  ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1537  else
1538  ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1539 
1540  if (ResultReg)
1541  return ResultReg;
1542 
1543  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1544  if (!CReg)
1545  return 0;
1546 
1547  ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1548  return ResultReg;
1549 }
1550 
1551 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1552  bool SetFlags, bool WantResult, bool IsZExt) {
1553  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1554  IsZExt);
1555 }
1556 
1557 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1558  bool LHSIsKill, unsigned RHSReg,
1559  bool RHSIsKill, bool WantResult) {
1560  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1561  RHSIsKill, /*SetFlags=*/true, WantResult);
1562 }
1563 
1564 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1565  bool LHSIsKill, unsigned RHSReg,
1566  bool RHSIsKill,
1567  AArch64_AM::ShiftExtendType ShiftType,
1568  uint64_t ShiftImm, bool WantResult) {
1569  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1570  RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1571  WantResult);
1572 }
1573 
1574 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1575  const Value *LHS, const Value *RHS) {
1576  // Canonicalize immediates to the RHS first.
1577  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1578  std::swap(LHS, RHS);
1579 
1580  // Canonicalize mul by power-of-2 to the RHS.
1581  if (LHS->hasOneUse() && isValueAvailable(LHS))
1582  if (isMulPowOf2(LHS))
1583  std::swap(LHS, RHS);
1584 
1585  // Canonicalize shift immediate to the RHS.
1586  if (LHS->hasOneUse() && isValueAvailable(LHS))
1587  if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1588  if (isa<ConstantInt>(SI->getOperand(1)))
1589  std::swap(LHS, RHS);
1590 
1591  unsigned LHSReg = getRegForValue(LHS);
1592  if (!LHSReg)
1593  return 0;
1594  bool LHSIsKill = hasTrivialKill(LHS);
1595 
1596  unsigned ResultReg = 0;
1597  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1598  uint64_t Imm = C->getZExtValue();
1599  ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1600  }
1601  if (ResultReg)
1602  return ResultReg;
1603 
1604  // Check if the mul can be folded into the instruction.
1605  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1606  if (isMulPowOf2(RHS)) {
1607  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1608  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1609 
1610  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1611  if (C->getValue().isPowerOf2())
1612  std::swap(MulLHS, MulRHS);
1613 
1614  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1615  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1616 
1617  unsigned RHSReg = getRegForValue(MulLHS);
1618  if (!RHSReg)
1619  return 0;
1620  bool RHSIsKill = hasTrivialKill(MulLHS);
1621  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1622  RHSIsKill, ShiftVal);
1623  if (ResultReg)
1624  return ResultReg;
1625  }
1626  }
1627 
1628  // Check if the shift can be folded into the instruction.
1629  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1630  if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1631  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1632  uint64_t ShiftVal = C->getZExtValue();
1633  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1634  if (!RHSReg)
1635  return 0;
1636  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1637  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1638  RHSIsKill, ShiftVal);
1639  if (ResultReg)
1640  return ResultReg;
1641  }
1642  }
1643 
1644  unsigned RHSReg = getRegForValue(RHS);
1645  if (!RHSReg)
1646  return 0;
1647  bool RHSIsKill = hasTrivialKill(RHS);
1648 
1649  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1650  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1651  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1652  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1653  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1654  }
1655  return ResultReg;
1656 }
1657 
1658 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1659  unsigned LHSReg, bool LHSIsKill,
1660  uint64_t Imm) {
1661  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1662  "ISD nodes are not consecutive!");
1663  static const unsigned OpcTable[3][2] = {
1664  { AArch64::ANDWri, AArch64::ANDXri },
1665  { AArch64::ORRWri, AArch64::ORRXri },
1666  { AArch64::EORWri, AArch64::EORXri }
1667  };
1668  const TargetRegisterClass *RC;
1669  unsigned Opc;
1670  unsigned RegSize;
1671  switch (RetVT.SimpleTy) {
1672  default:
1673  return 0;
1674  case MVT::i1:
1675  case MVT::i8:
1676  case MVT::i16:
1677  case MVT::i32: {
1678  unsigned Idx = ISDOpc - ISD::AND;
1679  Opc = OpcTable[Idx][0];
1680  RC = &AArch64::GPR32spRegClass;
1681  RegSize = 32;
1682  break;
1683  }
1684  case MVT::i64:
1685  Opc = OpcTable[ISDOpc - ISD::AND][1];
1686  RC = &AArch64::GPR64spRegClass;
1687  RegSize = 64;
1688  break;
1689  }
1690 
1691  if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1692  return 0;
1693 
1694  unsigned ResultReg =
1695  fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1696  AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1697  if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1698  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1699  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1700  }
1701  return ResultReg;
1702 }
1703 
1704 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1705  unsigned LHSReg, bool LHSIsKill,
1706  unsigned RHSReg, bool RHSIsKill,
1707  uint64_t ShiftImm) {
1708  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1709  "ISD nodes are not consecutive!");
1710  static const unsigned OpcTable[3][2] = {
1711  { AArch64::ANDWrs, AArch64::ANDXrs },
1712  { AArch64::ORRWrs, AArch64::ORRXrs },
1713  { AArch64::EORWrs, AArch64::EORXrs }
1714  };
1715 
1716  // Don't deal with undefined shifts.
1717  if (ShiftImm >= RetVT.getSizeInBits())
1718  return 0;
1719 
1720  const TargetRegisterClass *RC;
1721  unsigned Opc;
1722  switch (RetVT.SimpleTy) {
1723  default:
1724  return 0;
1725  case MVT::i1:
1726  case MVT::i8:
1727  case MVT::i16:
1728  case MVT::i32:
1729  Opc = OpcTable[ISDOpc - ISD::AND][0];
1730  RC = &AArch64::GPR32RegClass;
1731  break;
1732  case MVT::i64:
1733  Opc = OpcTable[ISDOpc - ISD::AND][1];
1734  RC = &AArch64::GPR64RegClass;
1735  break;
1736  }
1737  unsigned ResultReg =
1738  fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1740  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1741  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1742  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1743  }
1744  return ResultReg;
1745 }
1746 
1747 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1748  uint64_t Imm) {
1749  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1750 }
1751 
1752 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1753  bool WantZExt, MachineMemOperand *MMO) {
1754  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1755  return 0;
1756 
1757  // Simplify this down to something we can handle.
1758  if (!simplifyAddress(Addr, VT))
1759  return 0;
1760 
1761  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1762  if (!ScaleFactor)
1763  llvm_unreachable("Unexpected value type.");
1764 
1765  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1766  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1767  bool UseScaled = true;
1768  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1769  UseScaled = false;
1770  ScaleFactor = 1;
1771  }
1772 
1773  static const unsigned GPOpcTable[2][8][4] = {
1774  // Sign-extend.
1775  { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1776  AArch64::LDURXi },
1777  { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1778  AArch64::LDURXi },
1779  { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1780  AArch64::LDRXui },
1781  { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1782  AArch64::LDRXui },
1783  { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1784  AArch64::LDRXroX },
1785  { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1786  AArch64::LDRXroX },
1787  { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1788  AArch64::LDRXroW },
1789  { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1790  AArch64::LDRXroW }
1791  },
1792  // Zero-extend.
1793  { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1794  AArch64::LDURXi },
1795  { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1796  AArch64::LDURXi },
1797  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1798  AArch64::LDRXui },
1799  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1800  AArch64::LDRXui },
1801  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1802  AArch64::LDRXroX },
1803  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1804  AArch64::LDRXroX },
1805  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1806  AArch64::LDRXroW },
1807  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1808  AArch64::LDRXroW }
1809  }
1810  };
1811 
1812  static const unsigned FPOpcTable[4][2] = {
1813  { AArch64::LDURSi, AArch64::LDURDi },
1814  { AArch64::LDRSui, AArch64::LDRDui },
1815  { AArch64::LDRSroX, AArch64::LDRDroX },
1816  { AArch64::LDRSroW, AArch64::LDRDroW }
1817  };
1818 
1819  unsigned Opc;
1820  const TargetRegisterClass *RC;
1821  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1822  Addr.getOffsetReg();
1823  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1824  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1825  Addr.getExtendType() == AArch64_AM::SXTW)
1826  Idx++;
1827 
1828  bool IsRet64Bit = RetVT == MVT::i64;
1829  switch (VT.SimpleTy) {
1830  default:
1831  llvm_unreachable("Unexpected value type.");
1832  case MVT::i1: // Intentional fall-through.
1833  case MVT::i8:
1834  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1835  RC = (IsRet64Bit && !WantZExt) ?
1836  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1837  break;
1838  case MVT::i16:
1839  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1840  RC = (IsRet64Bit && !WantZExt) ?
1841  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842  break;
1843  case MVT::i32:
1844  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1845  RC = (IsRet64Bit && !WantZExt) ?
1846  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1847  break;
1848  case MVT::i64:
1849  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1850  RC = &AArch64::GPR64RegClass;
1851  break;
1852  case MVT::f32:
1853  Opc = FPOpcTable[Idx][0];
1854  RC = &AArch64::FPR32RegClass;
1855  break;
1856  case MVT::f64:
1857  Opc = FPOpcTable[Idx][1];
1858  RC = &AArch64::FPR64RegClass;
1859  break;
1860  }
1861 
1862  // Create the base instruction, then add the operands.
1863  unsigned ResultReg = createResultReg(RC);
1864  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1865  TII.get(Opc), ResultReg);
1866  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1867 
1868  // Loading an i1 requires special handling.
1869  if (VT == MVT::i1) {
1870  unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1871  assert(ANDReg && "Unexpected AND instruction emission failure.");
1872  ResultReg = ANDReg;
1873  }
1874 
1875  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1876  // the 32bit reg to a 64bit reg.
1877  if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1878  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1879  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1880  TII.get(AArch64::SUBREG_TO_REG), Reg64)
1881  .addImm(0)
1882  .addReg(ResultReg, getKillRegState(true))
1883  .addImm(AArch64::sub_32);
1884  ResultReg = Reg64;
1885  }
1886  return ResultReg;
1887 }
1888 
1889 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1890  MVT VT;
1891  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1892  return false;
1893 
1894  if (VT.isVector())
1895  return selectOperator(I, I->getOpcode());
1896 
1897  unsigned ResultReg;
1898  switch (I->getOpcode()) {
1899  default:
1900  llvm_unreachable("Unexpected instruction.");
1901  case Instruction::Add:
1902  ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1903  break;
1904  case Instruction::Sub:
1905  ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1906  break;
1907  }
1908  if (!ResultReg)
1909  return false;
1910 
1911  updateValueMap(I, ResultReg);
1912  return true;
1913 }
1914 
1915 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1916  MVT VT;
1917  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1918  return false;
1919 
1920  if (VT.isVector())
1921  return selectOperator(I, I->getOpcode());
1922 
1923  unsigned ResultReg;
1924  switch (I->getOpcode()) {
1925  default:
1926  llvm_unreachable("Unexpected instruction.");
1927  case Instruction::And:
1928  ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1929  break;
1930  case Instruction::Or:
1931  ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1932  break;
1933  case Instruction::Xor:
1934  ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1935  break;
1936  }
1937  if (!ResultReg)
1938  return false;
1939 
1940  updateValueMap(I, ResultReg);
1941  return true;
1942 }
1943 
1944 bool AArch64FastISel::selectLoad(const Instruction *I) {
1945  MVT VT;
1946  // Verify we have a legal type before going any further. Currently, we handle
1947  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1948  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1949  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1950  cast<LoadInst>(I)->isAtomic())
1951  return false;
1952 
1953  const Value *SV = I->getOperand(0);
1954  if (TLI.supportSwiftError()) {
1955  // Swifterror values can come from either a function parameter with
1956  // swifterror attribute or an alloca with swifterror attribute.
1957  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1958  if (Arg->hasSwiftErrorAttr())
1959  return false;
1960  }
1961 
1962  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1963  if (Alloca->isSwiftError())
1964  return false;
1965  }
1966  }
1967 
1968  // See if we can handle this address.
1969  Address Addr;
1970  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1971  return false;
1972 
1973  // Fold the following sign-/zero-extend into the load instruction.
1974  bool WantZExt = true;
1975  MVT RetVT = VT;
1976  const Value *IntExtVal = nullptr;
1977  if (I->hasOneUse()) {
1978  if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1979  if (isTypeSupported(ZE->getType(), RetVT))
1980  IntExtVal = ZE;
1981  else
1982  RetVT = VT;
1983  } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1984  if (isTypeSupported(SE->getType(), RetVT))
1985  IntExtVal = SE;
1986  else
1987  RetVT = VT;
1988  WantZExt = false;
1989  }
1990  }
1991 
1992  unsigned ResultReg =
1993  emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1994  if (!ResultReg)
1995  return false;
1996 
1997  // There are a few different cases we have to handle, because the load or the
1998  // sign-/zero-extend might not be selected by FastISel if we fall-back to
1999  // SelectionDAG. There is also an ordering issue when both instructions are in
2000  // different basic blocks.
2001  // 1.) The load instruction is selected by FastISel, but the integer extend
2002  // not. This usually happens when the integer extend is in a different
2003  // basic block and SelectionDAG took over for that basic block.
2004  // 2.) The load instruction is selected before the integer extend. This only
2005  // happens when the integer extend is in a different basic block.
2006  // 3.) The load instruction is selected by SelectionDAG and the integer extend
2007  // by FastISel. This happens if there are instructions between the load
2008  // and the integer extend that couldn't be selected by FastISel.
2009  if (IntExtVal) {
2010  // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2011  // could select it. Emit a copy to subreg if necessary. FastISel will remove
2012  // it when it selects the integer extend.
2013  unsigned Reg = lookUpRegForValue(IntExtVal);
2014  auto *MI = MRI.getUniqueVRegDef(Reg);
2015  if (!MI) {
2016  if (RetVT == MVT::i64 && VT <= MVT::i32) {
2017  if (WantZExt) {
2018  // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2019  std::prev(FuncInfo.InsertPt)->eraseFromParent();
2020  ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
2021  } else
2022  ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2023  /*IsKill=*/true,
2024  AArch64::sub_32);
2025  }
2026  updateValueMap(I, ResultReg);
2027  return true;
2028  }
2029 
2030  // The integer extend has already been emitted - delete all the instructions
2031  // that have been emitted by the integer extend lowering code and use the
2032  // result from the load instruction directly.
2033  while (MI) {
2034  Reg = 0;
2035  for (auto &Opnd : MI->uses()) {
2036  if (Opnd.isReg()) {
2037  Reg = Opnd.getReg();
2038  break;
2039  }
2040  }
2041  MI->eraseFromParent();
2042  MI = nullptr;
2043  if (Reg)
2044  MI = MRI.getUniqueVRegDef(Reg);
2045  }
2046  updateValueMap(IntExtVal, ResultReg);
2047  return true;
2048  }
2049 
2050  updateValueMap(I, ResultReg);
2051  return true;
2052 }
2053 
2054 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2055  unsigned AddrReg,
2056  MachineMemOperand *MMO) {
2057  unsigned Opc;
2058  switch (VT.SimpleTy) {
2059  default: return false;
2060  case MVT::i8: Opc = AArch64::STLRB; break;
2061  case MVT::i16: Opc = AArch64::STLRH; break;
2062  case MVT::i32: Opc = AArch64::STLRW; break;
2063  case MVT::i64: Opc = AArch64::STLRX; break;
2064  }
2065 
2066  const MCInstrDesc &II = TII.get(Opc);
2067  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2068  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2069  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2070  .addReg(SrcReg)
2071  .addReg(AddrReg)
2072  .addMemOperand(MMO);
2073  return true;
2074 }
2075 
2076 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2077  MachineMemOperand *MMO) {
2078  if (!TLI.allowsMisalignedMemoryAccesses(VT))
2079  return false;
2080 
2081  // Simplify this down to something we can handle.
2082  if (!simplifyAddress(Addr, VT))
2083  return false;
2084 
2085  unsigned ScaleFactor = getImplicitScaleFactor(VT);
2086  if (!ScaleFactor)
2087  llvm_unreachable("Unexpected value type.");
2088 
2089  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2090  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2091  bool UseScaled = true;
2092  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2093  UseScaled = false;
2094  ScaleFactor = 1;
2095  }
2096 
2097  static const unsigned OpcTable[4][6] = {
2098  { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2099  AArch64::STURSi, AArch64::STURDi },
2100  { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2101  AArch64::STRSui, AArch64::STRDui },
2102  { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2103  AArch64::STRSroX, AArch64::STRDroX },
2104  { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2105  AArch64::STRSroW, AArch64::STRDroW }
2106  };
2107 
2108  unsigned Opc;
2109  bool VTIsi1 = false;
2110  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2111  Addr.getOffsetReg();
2112  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2113  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2114  Addr.getExtendType() == AArch64_AM::SXTW)
2115  Idx++;
2116 
2117  switch (VT.SimpleTy) {
2118  default: llvm_unreachable("Unexpected value type.");
2119  case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH;
2120  case MVT::i8: Opc = OpcTable[Idx][0]; break;
2121  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2122  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2123  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2124  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2125  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2126  }
2127 
2128  // Storing an i1 requires special handling.
2129  if (VTIsi1 && SrcReg != AArch64::WZR) {
2130  unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2131  assert(ANDReg && "Unexpected AND instruction emission failure.");
2132  SrcReg = ANDReg;
2133  }
2134  // Create the base instruction, then add the operands.
2135  const MCInstrDesc &II = TII.get(Opc);
2136  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2137  MachineInstrBuilder MIB =
2138  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2139  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2140 
2141  return true;
2142 }
2143 
2144 bool AArch64FastISel::selectStore(const Instruction *I) {
2145  MVT VT;
2146  const Value *Op0 = I->getOperand(0);
2147  // Verify we have a legal type before going any further. Currently, we handle
2148  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2149  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2150  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2151  return false;
2152 
2153  const Value *PtrV = I->getOperand(1);
2154  if (TLI.supportSwiftError()) {
2155  // Swifterror values can come from either a function parameter with
2156  // swifterror attribute or an alloca with swifterror attribute.
2157  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2158  if (Arg->hasSwiftErrorAttr())
2159  return false;
2160  }
2161 
2162  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2163  if (Alloca->isSwiftError())
2164  return false;
2165  }
2166  }
2167 
2168  // Get the value to be stored into a register. Use the zero register directly
2169  // when possible to avoid an unnecessary copy and a wasted register.
2170  unsigned SrcReg = 0;
2171  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2172  if (CI->isZero())
2173  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2174  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2175  if (CF->isZero() && !CF->isNegative()) {
2176  VT = MVT::getIntegerVT(VT.getSizeInBits());
2177  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2178  }
2179  }
2180 
2181  if (!SrcReg)
2182  SrcReg = getRegForValue(Op0);
2183 
2184  if (!SrcReg)
2185  return false;
2186 
2187  auto *SI = cast<StoreInst>(I);
2188 
2189  // Try to emit a STLR for seq_cst/release.
2190  if (SI->isAtomic()) {
2191  AtomicOrdering Ord = SI->getOrdering();
2192  // The non-atomic instructions are sufficient for relaxed stores.
2193  if (isReleaseOrStronger(Ord)) {
2194  // The STLR addressing mode only supports a base reg; pass that directly.
2195  unsigned AddrReg = getRegForValue(PtrV);
2196  return emitStoreRelease(VT, SrcReg, AddrReg,
2197  createMachineMemOperandFor(I));
2198  }
2199  }
2200 
2201  // See if we can handle this address.
2202  Address Addr;
2203  if (!computeAddress(PtrV, Addr, Op0->getType()))
2204  return false;
2205 
2206  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2207  return false;
2208  return true;
2209 }
2210 
2212  switch (Pred) {
2213  case CmpInst::FCMP_ONE:
2214  case CmpInst::FCMP_UEQ:
2215  default:
2216  // AL is our "false" for now. The other two need more compares.
2217  return AArch64CC::AL;
2218  case CmpInst::ICMP_EQ:
2219  case CmpInst::FCMP_OEQ:
2220  return AArch64CC::EQ;
2221  case CmpInst::ICMP_SGT:
2222  case CmpInst::FCMP_OGT:
2223  return AArch64CC::GT;
2224  case CmpInst::ICMP_SGE:
2225  case CmpInst::FCMP_OGE:
2226  return AArch64CC::GE;
2227  case CmpInst::ICMP_UGT:
2228  case CmpInst::FCMP_UGT:
2229  return AArch64CC::HI;
2230  case CmpInst::FCMP_OLT:
2231  return AArch64CC::MI;
2232  case CmpInst::ICMP_ULE:
2233  case CmpInst::FCMP_OLE:
2234  return AArch64CC::LS;
2235  case CmpInst::FCMP_ORD:
2236  return AArch64CC::VC;
2237  case CmpInst::FCMP_UNO:
2238  return AArch64CC::VS;
2239  case CmpInst::FCMP_UGE:
2240  return AArch64CC::PL;
2241  case CmpInst::ICMP_SLT:
2242  case CmpInst::FCMP_ULT:
2243  return AArch64CC::LT;
2244  case CmpInst::ICMP_SLE:
2245  case CmpInst::FCMP_ULE:
2246  return AArch64CC::LE;
2247  case CmpInst::FCMP_UNE:
2248  case CmpInst::ICMP_NE:
2249  return AArch64CC::NE;
2250  case CmpInst::ICMP_UGE:
2251  return AArch64CC::HS;
2252  case CmpInst::ICMP_ULT:
2253  return AArch64CC::LO;
2254  }
2255 }
2256 
2257 /// \brief Try to emit a combined compare-and-branch instruction.
2258 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2259  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2260  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2261  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2262 
2263  const Value *LHS = CI->getOperand(0);
2264  const Value *RHS = CI->getOperand(1);
2265 
2266  MVT VT;
2267  if (!isTypeSupported(LHS->getType(), VT))
2268  return false;
2269 
2270  unsigned BW = VT.getSizeInBits();
2271  if (BW > 64)
2272  return false;
2273 
2274  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2275  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2276 
2277  // Try to take advantage of fallthrough opportunities.
2278  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2279  std::swap(TBB, FBB);
2280  Predicate = CmpInst::getInversePredicate(Predicate);
2281  }
2282 
2283  int TestBit = -1;
2284  bool IsCmpNE;
2285  switch (Predicate) {
2286  default:
2287  return false;
2288  case CmpInst::ICMP_EQ:
2289  case CmpInst::ICMP_NE:
2290  if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2291  std::swap(LHS, RHS);
2292 
2293  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2294  return false;
2295 
2296  if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2297  if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2298  const Value *AndLHS = AI->getOperand(0);
2299  const Value *AndRHS = AI->getOperand(1);
2300 
2301  if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2302  if (C->getValue().isPowerOf2())
2303  std::swap(AndLHS, AndRHS);
2304 
2305  if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2306  if (C->getValue().isPowerOf2()) {
2307  TestBit = C->getValue().logBase2();
2308  LHS = AndLHS;
2309  }
2310  }
2311 
2312  if (VT == MVT::i1)
2313  TestBit = 0;
2314 
2315  IsCmpNE = Predicate == CmpInst::ICMP_NE;
2316  break;
2317  case CmpInst::ICMP_SLT:
2318  case CmpInst::ICMP_SGE:
2319  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2320  return false;
2321 
2322  TestBit = BW - 1;
2323  IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2324  break;
2325  case CmpInst::ICMP_SGT:
2326  case CmpInst::ICMP_SLE:
2327  if (!isa<ConstantInt>(RHS))
2328  return false;
2329 
2330  if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2331  return false;
2332 
2333  TestBit = BW - 1;
2334  IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2335  break;
2336  } // end switch
2337 
2338  static const unsigned OpcTable[2][2][2] = {
2339  { {AArch64::CBZW, AArch64::CBZX },
2340  {AArch64::CBNZW, AArch64::CBNZX} },
2341  { {AArch64::TBZW, AArch64::TBZX },
2342  {AArch64::TBNZW, AArch64::TBNZX} }
2343  };
2344 
2345  bool IsBitTest = TestBit != -1;
2346  bool Is64Bit = BW == 64;
2347  if (TestBit < 32 && TestBit >= 0)
2348  Is64Bit = false;
2349 
2350  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2351  const MCInstrDesc &II = TII.get(Opc);
2352 
2353  unsigned SrcReg = getRegForValue(LHS);
2354  if (!SrcReg)
2355  return false;
2356  bool SrcIsKill = hasTrivialKill(LHS);
2357 
2358  if (BW == 64 && !Is64Bit)
2359  SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2360  AArch64::sub_32);
2361 
2362  if ((BW < 32) && !IsBitTest)
2363  SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2364 
2365  // Emit the combined compare and branch instruction.
2366  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2367  MachineInstrBuilder MIB =
2368  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2369  .addReg(SrcReg, getKillRegState(SrcIsKill));
2370  if (IsBitTest)
2371  MIB.addImm(TestBit);
2372  MIB.addMBB(TBB);
2373 
2374  finishCondBranch(BI->getParent(), TBB, FBB);
2375  return true;
2376 }
2377 
2378 bool AArch64FastISel::selectBranch(const Instruction *I) {
2379  const BranchInst *BI = cast<BranchInst>(I);
2380  if (BI->isUnconditional()) {
2381  MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2382  fastEmitBranch(MSucc, BI->getDebugLoc());
2383  return true;
2384  }
2385 
2386  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2387  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2388 
2389  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2390  if (CI->hasOneUse() && isValueAvailable(CI)) {
2391  // Try to optimize or fold the cmp.
2392  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2393  switch (Predicate) {
2394  default:
2395  break;
2396  case CmpInst::FCMP_FALSE:
2397  fastEmitBranch(FBB, DbgLoc);
2398  return true;
2399  case CmpInst::FCMP_TRUE:
2400  fastEmitBranch(TBB, DbgLoc);
2401  return true;
2402  }
2403 
2404  // Try to emit a combined compare-and-branch first.
2405  if (emitCompareAndBranch(BI))
2406  return true;
2407 
2408  // Try to take advantage of fallthrough opportunities.
2409  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2410  std::swap(TBB, FBB);
2411  Predicate = CmpInst::getInversePredicate(Predicate);
2412  }
2413 
2414  // Emit the cmp.
2415  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2416  return false;
2417 
2418  // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2419  // instruction.
2420  AArch64CC::CondCode CC = getCompareCC(Predicate);
2422  switch (Predicate) {
2423  default:
2424  break;
2425  case CmpInst::FCMP_UEQ:
2426  ExtraCC = AArch64CC::EQ;
2427  CC = AArch64CC::VS;
2428  break;
2429  case CmpInst::FCMP_ONE:
2430  ExtraCC = AArch64CC::MI;
2431  CC = AArch64CC::GT;
2432  break;
2433  }
2434  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2435 
2436  // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2437  if (ExtraCC != AArch64CC::AL) {
2438  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2439  .addImm(ExtraCC)
2440  .addMBB(TBB);
2441  }
2442 
2443  // Emit the branch.
2444  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2445  .addImm(CC)
2446  .addMBB(TBB);
2447 
2448  finishCondBranch(BI->getParent(), TBB, FBB);
2449  return true;
2450  }
2451  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2452  uint64_t Imm = CI->getZExtValue();
2453  MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2454  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2455  .addMBB(Target);
2456 
2457  // Obtain the branch probability and add the target to the successor list.
2458  if (FuncInfo.BPI) {
2459  auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2460  BI->getParent(), Target->getBasicBlock());
2461  FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2462  } else
2463  FuncInfo.MBB->addSuccessorWithoutProb(Target);
2464  return true;
2465  } else {
2467  if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2468  // Fake request the condition, otherwise the intrinsic might be completely
2469  // optimized away.
2470  unsigned CondReg = getRegForValue(BI->getCondition());
2471  if (!CondReg)
2472  return false;
2473 
2474  // Emit the branch.
2475  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2476  .addImm(CC)
2477  .addMBB(TBB);
2478 
2479  finishCondBranch(BI->getParent(), TBB, FBB);
2480  return true;
2481  }
2482  }
2483 
2484  unsigned CondReg = getRegForValue(BI->getCondition());
2485  if (CondReg == 0)
2486  return false;
2487  bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2488 
2489  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2490  unsigned Opcode = AArch64::TBNZW;
2491  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2492  std::swap(TBB, FBB);
2493  Opcode = AArch64::TBZW;
2494  }
2495 
2496  const MCInstrDesc &II = TII.get(Opcode);
2497  unsigned ConstrainedCondReg
2498  = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2499  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2500  .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2501  .addImm(0)
2502  .addMBB(TBB);
2503 
2504  finishCondBranch(BI->getParent(), TBB, FBB);
2505  return true;
2506 }
2507 
2508 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2509  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2510  unsigned AddrReg = getRegForValue(BI->getOperand(0));
2511  if (AddrReg == 0)
2512  return false;
2513 
2514  // Emit the indirect branch.
2515  const MCInstrDesc &II = TII.get(AArch64::BR);
2516  AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2517  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2518 
2519  // Make sure the CFG is up-to-date.
2520  for (auto *Succ : BI->successors())
2521  FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2522 
2523  return true;
2524 }
2525 
2526 bool AArch64FastISel::selectCmp(const Instruction *I) {
2527  const CmpInst *CI = cast<CmpInst>(I);
2528 
2529  // Vectors of i1 are weird: bail out.
2530  if (CI->getType()->isVectorTy())
2531  return false;
2532 
2533  // Try to optimize or fold the cmp.
2534  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2535  unsigned ResultReg = 0;
2536  switch (Predicate) {
2537  default:
2538  break;
2539  case CmpInst::FCMP_FALSE:
2540  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2541  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2542  TII.get(TargetOpcode::COPY), ResultReg)
2543  .addReg(AArch64::WZR, getKillRegState(true));
2544  break;
2545  case CmpInst::FCMP_TRUE:
2546  ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2547  break;
2548  }
2549 
2550  if (ResultReg) {
2551  updateValueMap(I, ResultReg);
2552  return true;
2553  }
2554 
2555  // Emit the cmp.
2556  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2557  return false;
2558 
2559  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2560 
2561  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2562  // condition codes are inverted, because they are used by CSINC.
2563  static unsigned CondCodeTable[2][2] = {
2566  };
2567  unsigned *CondCodes = nullptr;
2568  switch (Predicate) {
2569  default:
2570  break;
2571  case CmpInst::FCMP_UEQ:
2572  CondCodes = &CondCodeTable[0][0];
2573  break;
2574  case CmpInst::FCMP_ONE:
2575  CondCodes = &CondCodeTable[1][0];
2576  break;
2577  }
2578 
2579  if (CondCodes) {
2580  unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2581  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2582  TmpReg1)
2583  .addReg(AArch64::WZR, getKillRegState(true))
2584  .addReg(AArch64::WZR, getKillRegState(true))
2585  .addImm(CondCodes[0]);
2586  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2587  ResultReg)
2588  .addReg(TmpReg1, getKillRegState(true))
2589  .addReg(AArch64::WZR, getKillRegState(true))
2590  .addImm(CondCodes[1]);
2591 
2592  updateValueMap(I, ResultReg);
2593  return true;
2594  }
2595 
2596  // Now set a register based on the comparison.
2597  AArch64CC::CondCode CC = getCompareCC(Predicate);
2598  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2599  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2600  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2601  ResultReg)
2602  .addReg(AArch64::WZR, getKillRegState(true))
2603  .addReg(AArch64::WZR, getKillRegState(true))
2604  .addImm(invertedCC);
2605 
2606  updateValueMap(I, ResultReg);
2607  return true;
2608 }
2609 
2610 /// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
2611 /// value.
2612 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2613  if (!SI->getType()->isIntegerTy(1))
2614  return false;
2615 
2616  const Value *Src1Val, *Src2Val;
2617  unsigned Opc = 0;
2618  bool NeedExtraOp = false;
2619  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2620  if (CI->isOne()) {
2621  Src1Val = SI->getCondition();
2622  Src2Val = SI->getFalseValue();
2623  Opc = AArch64::ORRWrr;
2624  } else {
2625  assert(CI->isZero());
2626  Src1Val = SI->getFalseValue();
2627  Src2Val = SI->getCondition();
2628  Opc = AArch64::BICWrr;
2629  }
2630  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2631  if (CI->isOne()) {
2632  Src1Val = SI->getCondition();
2633  Src2Val = SI->getTrueValue();
2634  Opc = AArch64::ORRWrr;
2635  NeedExtraOp = true;
2636  } else {
2637  assert(CI->isZero());
2638  Src1Val = SI->getCondition();
2639  Src2Val = SI->getTrueValue();
2640  Opc = AArch64::ANDWrr;
2641  }
2642  }
2643 
2644  if (!Opc)
2645  return false;
2646 
2647  unsigned Src1Reg = getRegForValue(Src1Val);
2648  if (!Src1Reg)
2649  return false;
2650  bool Src1IsKill = hasTrivialKill(Src1Val);
2651 
2652  unsigned Src2Reg = getRegForValue(Src2Val);
2653  if (!Src2Reg)
2654  return false;
2655  bool Src2IsKill = hasTrivialKill(Src2Val);
2656 
2657  if (NeedExtraOp) {
2658  Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2659  Src1IsKill = true;
2660  }
2661  unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2662  Src1IsKill, Src2Reg, Src2IsKill);
2663  updateValueMap(SI, ResultReg);
2664  return true;
2665 }
2666 
2667 bool AArch64FastISel::selectSelect(const Instruction *I) {
2668  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2669  MVT VT;
2670  if (!isTypeSupported(I->getType(), VT))
2671  return false;
2672 
2673  unsigned Opc;
2674  const TargetRegisterClass *RC;
2675  switch (VT.SimpleTy) {
2676  default:
2677  return false;
2678  case MVT::i1:
2679  case MVT::i8:
2680  case MVT::i16:
2681  case MVT::i32:
2682  Opc = AArch64::CSELWr;
2683  RC = &AArch64::GPR32RegClass;
2684  break;
2685  case MVT::i64:
2686  Opc = AArch64::CSELXr;
2687  RC = &AArch64::GPR64RegClass;
2688  break;
2689  case MVT::f32:
2690  Opc = AArch64::FCSELSrrr;
2691  RC = &AArch64::FPR32RegClass;
2692  break;
2693  case MVT::f64:
2694  Opc = AArch64::FCSELDrrr;
2695  RC = &AArch64::FPR64RegClass;
2696  break;
2697  }
2698 
2699  const SelectInst *SI = cast<SelectInst>(I);
2700  const Value *Cond = SI->getCondition();
2703 
2704  if (optimizeSelect(SI))
2705  return true;
2706 
2707  // Try to pickup the flags, so we don't have to emit another compare.
2708  if (foldXALUIntrinsic(CC, I, Cond)) {
2709  // Fake request the condition to force emission of the XALU intrinsic.
2710  unsigned CondReg = getRegForValue(Cond);
2711  if (!CondReg)
2712  return false;
2713  } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2714  isValueAvailable(Cond)) {
2715  const auto *Cmp = cast<CmpInst>(Cond);
2716  // Try to optimize or fold the cmp.
2717  CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2718  const Value *FoldSelect = nullptr;
2719  switch (Predicate) {
2720  default:
2721  break;
2722  case CmpInst::FCMP_FALSE:
2723  FoldSelect = SI->getFalseValue();
2724  break;
2725  case CmpInst::FCMP_TRUE:
2726  FoldSelect = SI->getTrueValue();
2727  break;
2728  }
2729 
2730  if (FoldSelect) {
2731  unsigned SrcReg = getRegForValue(FoldSelect);
2732  if (!SrcReg)
2733  return false;
2734  unsigned UseReg = lookUpRegForValue(SI);
2735  if (UseReg)
2736  MRI.clearKillFlags(UseReg);
2737 
2738  updateValueMap(I, SrcReg);
2739  return true;
2740  }
2741 
2742  // Emit the cmp.
2743  if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2744  return false;
2745 
2746  // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2747  CC = getCompareCC(Predicate);
2748  switch (Predicate) {
2749  default:
2750  break;
2751  case CmpInst::FCMP_UEQ:
2752  ExtraCC = AArch64CC::EQ;
2753  CC = AArch64CC::VS;
2754  break;
2755  case CmpInst::FCMP_ONE:
2756  ExtraCC = AArch64CC::MI;
2757  CC = AArch64CC::GT;
2758  break;
2759  }
2760  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2761  } else {
2762  unsigned CondReg = getRegForValue(Cond);
2763  if (!CondReg)
2764  return false;
2765  bool CondIsKill = hasTrivialKill(Cond);
2766 
2767  const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2768  CondReg = constrainOperandRegClass(II, CondReg, 1);
2769 
2770  // Emit a TST instruction (ANDS wzr, reg, #imm).
2771  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2772  AArch64::WZR)
2773  .addReg(CondReg, getKillRegState(CondIsKill))
2775  }
2776 
2777  unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2778  bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2779 
2780  unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2781  bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2782 
2783  if (!Src1Reg || !Src2Reg)
2784  return false;
2785 
2786  if (ExtraCC != AArch64CC::AL) {
2787  Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2788  Src2IsKill, ExtraCC);
2789  Src2IsKill = true;
2790  }
2791  unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2792  Src2IsKill, CC);
2793  updateValueMap(I, ResultReg);
2794  return true;
2795 }
2796 
2797 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2798  Value *V = I->getOperand(0);
2799  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2800  return false;
2801 
2802  unsigned Op = getRegForValue(V);
2803  if (Op == 0)
2804  return false;
2805 
2806  unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2807  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2808  ResultReg).addReg(Op);
2809  updateValueMap(I, ResultReg);
2810  return true;
2811 }
2812 
2813 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2814  Value *V = I->getOperand(0);
2815  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2816  return false;
2817 
2818  unsigned Op = getRegForValue(V);
2819  if (Op == 0)
2820  return false;
2821 
2822  unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2823  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2824  ResultReg).addReg(Op);
2825  updateValueMap(I, ResultReg);
2826  return true;
2827 }
2828 
2829 // FPToUI and FPToSI
2830 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2831  MVT DestVT;
2832  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2833  return false;
2834 
2835  unsigned SrcReg = getRegForValue(I->getOperand(0));
2836  if (SrcReg == 0)
2837  return false;
2838 
2839  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2840  if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2841  return false;
2842 
2843  unsigned Opc;
2844  if (SrcVT == MVT::f64) {
2845  if (Signed)
2846  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2847  else
2848  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2849  } else {
2850  if (Signed)
2851  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2852  else
2853  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2854  }
2855  unsigned ResultReg = createResultReg(
2856  DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2857  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2858  .addReg(SrcReg);
2859  updateValueMap(I, ResultReg);
2860  return true;
2861 }
2862 
2863 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2864  MVT DestVT;
2865  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2866  return false;
2867  // Let regular ISEL handle FP16
2868  if (DestVT == MVT::f16)
2869  return false;
2870 
2871  assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2872  "Unexpected value type.");
2873 
2874  unsigned SrcReg = getRegForValue(I->getOperand(0));
2875  if (!SrcReg)
2876  return false;
2877  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2878 
2879  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2880 
2881  // Handle sign-extension.
2882  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2883  SrcReg =
2884  emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2885  if (!SrcReg)
2886  return false;
2887  SrcIsKill = true;
2888  }
2889 
2890  unsigned Opc;
2891  if (SrcVT == MVT::i64) {
2892  if (Signed)
2893  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2894  else
2895  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2896  } else {
2897  if (Signed)
2898  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2899  else
2900  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2901  }
2902 
2903  unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2904  SrcIsKill);
2905  updateValueMap(I, ResultReg);
2906  return true;
2907 }
2908 
2909 bool AArch64FastISel::fastLowerArguments() {
2910  if (!FuncInfo.CanLowerReturn)
2911  return false;
2912 
2913  const Function *F = FuncInfo.Fn;
2914  if (F->isVarArg())
2915  return false;
2916 
2917  CallingConv::ID CC = F->getCallingConv();
2918  if (CC != CallingConv::C && CC != CallingConv::Swift)
2919  return false;
2920 
2921  // Only handle simple cases of up to 8 GPR and FPR each.
2922  unsigned GPRCnt = 0;
2923  unsigned FPRCnt = 0;
2924  for (auto const &Arg : F->args()) {
2925  if (Arg.hasAttribute(Attribute::ByVal) ||
2926  Arg.hasAttribute(Attribute::InReg) ||
2927  Arg.hasAttribute(Attribute::StructRet) ||
2928  Arg.hasAttribute(Attribute::SwiftSelf) ||
2929  Arg.hasAttribute(Attribute::SwiftError) ||
2930  Arg.hasAttribute(Attribute::Nest))
2931  return false;
2932 
2933  Type *ArgTy = Arg.getType();
2934  if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2935  return false;
2936 
2937  EVT ArgVT = TLI.getValueType(DL, ArgTy);
2938  if (!ArgVT.isSimple())
2939  return false;
2940 
2941  MVT VT = ArgVT.getSimpleVT().SimpleTy;
2942  if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2943  return false;
2944 
2945  if (VT.isVector() &&
2946  (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2947  return false;
2948 
2949  if (VT >= MVT::i1 && VT <= MVT::i64)
2950  ++GPRCnt;
2951  else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2952  VT.is128BitVector())
2953  ++FPRCnt;
2954  else
2955  return false;
2956 
2957  if (GPRCnt > 8 || FPRCnt > 8)
2958  return false;
2959  }
2960 
2961  static const MCPhysReg Registers[6][8] = {
2962  { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2963  AArch64::W5, AArch64::W6, AArch64::W7 },
2964  { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2965  AArch64::X5, AArch64::X6, AArch64::X7 },
2966  { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2967  AArch64::H5, AArch64::H6, AArch64::H7 },
2968  { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2969  AArch64::S5, AArch64::S6, AArch64::S7 },
2970  { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2971  AArch64::D5, AArch64::D6, AArch64::D7 },
2972  { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2973  AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2974  };
2975 
2976  unsigned GPRIdx = 0;
2977  unsigned FPRIdx = 0;
2978  for (auto const &Arg : F->args()) {
2979  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2980  unsigned SrcReg;
2981  const TargetRegisterClass *RC;
2982  if (VT >= MVT::i1 && VT <= MVT::i32) {
2983  SrcReg = Registers[0][GPRIdx++];
2984  RC = &AArch64::GPR32RegClass;
2985  VT = MVT::i32;
2986  } else if (VT == MVT::i64) {
2987  SrcReg = Registers[1][GPRIdx++];
2988  RC = &AArch64::GPR64RegClass;
2989  } else if (VT == MVT::f16) {
2990  SrcReg = Registers[2][FPRIdx++];
2991  RC = &AArch64::FPR16RegClass;
2992  } else if (VT == MVT::f32) {
2993  SrcReg = Registers[3][FPRIdx++];
2994  RC = &AArch64::FPR32RegClass;
2995  } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2996  SrcReg = Registers[4][FPRIdx++];
2997  RC = &AArch64::FPR64RegClass;
2998  } else if (VT.is128BitVector()) {
2999  SrcReg = Registers[5][FPRIdx++];
3000  RC = &AArch64::FPR128RegClass;
3001  } else
3002  llvm_unreachable("Unexpected value type.");
3003 
3004  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3005  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3006  // Without this, EmitLiveInCopies may eliminate the livein if its only
3007  // use is a bitcast (which isn't turned into an instruction).
3008  unsigned ResultReg = createResultReg(RC);
3009  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3010  TII.get(TargetOpcode::COPY), ResultReg)
3011  .addReg(DstReg, getKillRegState(true));
3012  updateValueMap(&Arg, ResultReg);
3013  }
3014  return true;
3015 }
3016 
3017 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3018  SmallVectorImpl<MVT> &OutVTs,
3019  unsigned &NumBytes) {
3020  CallingConv::ID CC = CLI.CallConv;
3022  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3023  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3024 
3025  // Get a count of how many bytes are to be pushed on the stack.
3026  NumBytes = CCInfo.getNextStackOffset();
3027 
3028  // Issue CALLSEQ_START
3029  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3030  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3031  .addImm(NumBytes).addImm(0);
3032 
3033  // Process the args.
3034  for (CCValAssign &VA : ArgLocs) {
3035  const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3036  MVT ArgVT = OutVTs[VA.getValNo()];
3037 
3038  unsigned ArgReg = getRegForValue(ArgVal);
3039  if (!ArgReg)
3040  return false;
3041 
3042  // Handle arg promotion: SExt, ZExt, AExt.
3043  switch (VA.getLocInfo()) {
3044  case CCValAssign::Full:
3045  break;
3046  case CCValAssign::SExt: {
3047  MVT DestVT = VA.getLocVT();
3048  MVT SrcVT = ArgVT;
3049  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3050  if (!ArgReg)
3051  return false;
3052  break;
3053  }
3054  case CCValAssign::AExt:
3055  // Intentional fall-through.
3056  case CCValAssign::ZExt: {
3057  MVT DestVT = VA.getLocVT();
3058  MVT SrcVT = ArgVT;
3059  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3060  if (!ArgReg)
3061  return false;
3062  break;
3063  }
3064  default:
3065  llvm_unreachable("Unknown arg promotion!");
3066  }
3067 
3068  // Now copy/store arg to correct locations.
3069  if (VA.isRegLoc() && !VA.needsCustom()) {
3070  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3071  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3072  CLI.OutRegs.push_back(VA.getLocReg());
3073  } else if (VA.needsCustom()) {
3074  // FIXME: Handle custom args.
3075  return false;
3076  } else {
3077  assert(VA.isMemLoc() && "Assuming store on stack.");
3078 
3079  // Don't emit stores for undef values.
3080  if (isa<UndefValue>(ArgVal))
3081  continue;
3082 
3083  // Need to store on the stack.
3084  unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3085 
3086  unsigned BEAlign = 0;
3087  if (ArgSize < 8 && !Subtarget->isLittleEndian())
3088  BEAlign = 8 - ArgSize;
3089 
3090  Address Addr;
3091  Addr.setKind(Address::RegBase);
3092  Addr.setReg(AArch64::SP);
3093  Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3094 
3095  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3096  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3097  MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3098  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3099 
3100  if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3101  return false;
3102  }
3103  }
3104  return true;
3105 }
3106 
3107 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3108  unsigned NumBytes) {
3109  CallingConv::ID CC = CLI.CallConv;
3110 
3111  // Issue CALLSEQ_END
3112  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3113  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3114  .addImm(NumBytes).addImm(0);
3115 
3116  // Now the return value.
3117  if (RetVT != MVT::isVoid) {
3119  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3120  CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3121 
3122  // Only handle a single return value.
3123  if (RVLocs.size() != 1)
3124  return false;
3125 
3126  // Copy all of the result registers out of their specified physreg.
3127  MVT CopyVT = RVLocs[0].getValVT();
3128 
3129  // TODO: Handle big-endian results
3130  if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3131  return false;
3132 
3133  unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3134  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3135  TII.get(TargetOpcode::COPY), ResultReg)
3136  .addReg(RVLocs[0].getLocReg());
3137  CLI.InRegs.push_back(RVLocs[0].getLocReg());
3138 
3139  CLI.ResultReg = ResultReg;
3140  CLI.NumResultRegs = 1;
3141  }
3142 
3143  return true;
3144 }
3145 
3146 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3147  CallingConv::ID CC = CLI.CallConv;
3148  bool IsTailCall = CLI.IsTailCall;
3149  bool IsVarArg = CLI.IsVarArg;
3150  const Value *Callee = CLI.Callee;
3151  MCSymbol *Symbol = CLI.Symbol;
3152 
3153  if (!Callee && !Symbol)
3154  return false;
3155 
3156  // Allow SelectionDAG isel to handle tail calls.
3157  if (IsTailCall)
3158  return false;
3159 
3160  CodeModel::Model CM = TM.getCodeModel();
3161  // Only support the small-addressing and large code models.
3162  if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3163  return false;
3164 
3165  // FIXME: Add large code model support for ELF.
3166  if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3167  return false;
3168 
3169  // Let SDISel handle vararg functions.
3170  if (IsVarArg)
3171  return false;
3172 
3173  // FIXME: Only handle *simple* calls for now.
3174  MVT RetVT;
3175  if (CLI.RetTy->isVoidTy())
3176  RetVT = MVT::isVoid;
3177  else if (!isTypeLegal(CLI.RetTy, RetVT))
3178  return false;
3179 
3180  for (auto Flag : CLI.OutFlags)
3181  if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3182  Flag.isSwiftSelf() || Flag.isSwiftError())
3183  return false;
3184 
3185  // Set up the argument vectors.
3186  SmallVector<MVT, 16> OutVTs;
3187  OutVTs.reserve(CLI.OutVals.size());
3188 
3189  for (auto *Val : CLI.OutVals) {
3190  MVT VT;
3191  if (!isTypeLegal(Val->getType(), VT) &&
3192  !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3193  return false;
3194 
3195  // We don't handle vector parameters yet.
3196  if (VT.isVector() || VT.getSizeInBits() > 64)
3197  return false;
3198 
3199  OutVTs.push_back(VT);
3200  }
3201 
3202  Address Addr;
3203  if (Callee && !computeCallAddress(Callee, Addr))
3204  return false;
3205 
3206  // Handle the arguments now that we've gotten them.
3207  unsigned NumBytes;
3208  if (!processCallArgs(CLI, OutVTs, NumBytes))
3209  return false;
3210 
3211  // Issue the call.
3212  MachineInstrBuilder MIB;
3213  if (Subtarget->useSmallAddressing()) {
3214  const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3215  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3216  if (Symbol)
3217  MIB.addSym(Symbol, 0);
3218  else if (Addr.getGlobalValue())
3219  MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3220  else if (Addr.getReg()) {
3221  unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3222  MIB.addReg(Reg);
3223  } else
3224  return false;
3225  } else {
3226  unsigned CallReg = 0;
3227  if (Symbol) {
3228  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3229  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3230  ADRPReg)
3231  .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3232 
3233  CallReg = createResultReg(&AArch64::GPR64RegClass);
3234  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3235  TII.get(AArch64::LDRXui), CallReg)
3236  .addReg(ADRPReg)
3237  .addSym(Symbol,
3239  } else if (Addr.getGlobalValue())
3240  CallReg = materializeGV(Addr.getGlobalValue());
3241  else if (Addr.getReg())
3242  CallReg = Addr.getReg();
3243 
3244  if (!CallReg)
3245  return false;
3246 
3247  const MCInstrDesc &II = TII.get(AArch64::BLR);
3248  CallReg = constrainOperandRegClass(II, CallReg, 0);
3249  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3250  }
3251 
3252  // Add implicit physical register uses to the call.
3253  for (auto Reg : CLI.OutRegs)
3254  MIB.addReg(Reg, RegState::Implicit);
3255 
3256  // Add a register mask with the call-preserved registers.
3257  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3258  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3259 
3260  CLI.Call = MIB;
3261 
3262  // Finish off the call including any return values.
3263  return finishCall(CLI, RetVT, NumBytes);
3264 }
3265 
3266 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3267  if (Alignment)
3268  return Len / Alignment <= 4;
3269  else
3270  return Len < 32;
3271 }
3272 
3273 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3274  uint64_t Len, unsigned Alignment) {
3275  // Make sure we don't bloat code by inlining very large memcpy's.
3276  if (!isMemCpySmall(Len, Alignment))
3277  return false;
3278 
3279  int64_t UnscaledOffset = 0;
3280  Address OrigDest = Dest;
3281  Address OrigSrc = Src;
3282 
3283  while (Len) {
3284  MVT VT;
3285  if (!Alignment || Alignment >= 8) {
3286  if (Len >= 8)
3287  VT = MVT::i64;
3288  else if (Len >= 4)
3289  VT = MVT::i32;
3290  else if (Len >= 2)
3291  VT = MVT::i16;
3292  else {
3293  VT = MVT::i8;
3294  }
3295  } else {
3296  // Bound based on alignment.
3297  if (Len >= 4 && Alignment == 4)
3298  VT = MVT::i32;
3299  else if (Len >= 2 && Alignment == 2)
3300  VT = MVT::i16;
3301  else {
3302  VT = MVT::i8;
3303  }
3304  }
3305 
3306  unsigned ResultReg = emitLoad(VT, VT, Src);
3307  if (!ResultReg)
3308  return false;
3309 
3310  if (!emitStore(VT, ResultReg, Dest))
3311  return false;
3312 
3313  int64_t Size = VT.getSizeInBits() / 8;
3314  Len -= Size;
3315  UnscaledOffset += Size;
3316 
3317  // We need to recompute the unscaled offset for each iteration.
3318  Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3319  Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3320  }
3321 
3322  return true;
3323 }
3324 
3325 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
3326 /// into the user. The condition code will only be updated on success.
3327 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3328  const Instruction *I,
3329  const Value *Cond) {
3330  if (!isa<ExtractValueInst>(Cond))
3331  return false;
3332 
3333  const auto *EV = cast<ExtractValueInst>(Cond);
3334  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3335  return false;
3336 
3337  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3338  MVT RetVT;
3339  const Function *Callee = II->getCalledFunction();
3340  Type *RetTy =
3341  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3342  if (!isTypeLegal(RetTy, RetVT))
3343  return false;
3344 
3345  if (RetVT != MVT::i32 && RetVT != MVT::i64)
3346  return false;
3347 
3348  const Value *LHS = II->getArgOperand(0);
3349  const Value *RHS = II->getArgOperand(1);
3350 
3351  // Canonicalize immediate to the RHS.
3352  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3353  isCommutativeIntrinsic(II))
3354  std::swap(LHS, RHS);
3355 
3356  // Simplify multiplies.
3357  Intrinsic::ID IID = II->getIntrinsicID();
3358  switch (IID) {
3359  default:
3360  break;
3361  case Intrinsic::smul_with_overflow:
3362  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3363  if (C->getValue() == 2)
3364  IID = Intrinsic::sadd_with_overflow;
3365  break;
3366  case Intrinsic::umul_with_overflow:
3367  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3368  if (C->getValue() == 2)
3369  IID = Intrinsic::uadd_with_overflow;
3370  break;
3371  }
3372 
3373  AArch64CC::CondCode TmpCC;
3374  switch (IID) {
3375  default:
3376  return false;
3377  case Intrinsic::sadd_with_overflow:
3378  case Intrinsic::ssub_with_overflow:
3379  TmpCC = AArch64CC::VS;
3380  break;
3381  case Intrinsic::uadd_with_overflow:
3382  TmpCC = AArch64CC::HS;
3383  break;
3384  case Intrinsic::usub_with_overflow:
3385  TmpCC = AArch64CC::LO;
3386  break;
3387  case Intrinsic::smul_with_overflow:
3388  case Intrinsic::umul_with_overflow:
3389  TmpCC = AArch64CC::NE;
3390  break;
3391  }
3392 
3393  // Check if both instructions are in the same basic block.
3394  if (!isValueAvailable(II))
3395  return false;
3396 
3397  // Make sure nothing is in the way
3398  BasicBlock::const_iterator Start(I);
3400  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3401  // We only expect extractvalue instructions between the intrinsic and the
3402  // instruction to be selected.
3403  if (!isa<ExtractValueInst>(Itr))
3404  return false;
3405 
3406  // Check that the extractvalue operand comes from the intrinsic.
3407  const auto *EVI = cast<ExtractValueInst>(Itr);
3408  if (EVI->getAggregateOperand() != II)
3409  return false;
3410  }
3411 
3412  CC = TmpCC;
3413  return true;
3414 }
3415 
3416 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3417  // FIXME: Handle more intrinsics.
3418  switch (II->getIntrinsicID()) {
3419  default: return false;
3420  case Intrinsic::frameaddress: {
3421  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3422  MFI.setFrameAddressIsTaken(true);
3423 
3424  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3425  unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3426  unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3427  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3428  TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3429  // Recursively load frame address
3430  // ldr x0, [fp]
3431  // ldr x0, [x0]
3432  // ldr x0, [x0]
3433  // ...
3434  unsigned DestReg;
3435  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3436  while (Depth--) {
3437  DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3438  SrcReg, /*IsKill=*/true, 0);
3439  assert(DestReg && "Unexpected LDR instruction emission failure.");
3440  SrcReg = DestReg;
3441  }
3442 
3443  updateValueMap(II, SrcReg);
3444  return true;
3445  }
3446  case Intrinsic::memcpy:
3447  case Intrinsic::memmove: {
3448  const auto *MTI = cast<MemTransferInst>(II);
3449  // Don't handle volatile.
3450  if (MTI->isVolatile())
3451  return false;
3452 
3453  // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3454  // we would emit dead code because we don't currently handle memmoves.
3455  bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3456  if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3457  // Small memcpy's are common enough that we want to do them without a call
3458  // if possible.
3459  uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3460  unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3461  MTI->getSourceAlignment());
3462  if (isMemCpySmall(Len, Alignment)) {
3463  Address Dest, Src;
3464  if (!computeAddress(MTI->getRawDest(), Dest) ||
3465  !computeAddress(MTI->getRawSource(), Src))
3466  return false;
3467  if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3468  return true;
3469  }
3470  }
3471 
3472  if (!MTI->getLength()->getType()->isIntegerTy(64))
3473  return false;
3474 
3475  if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3476  // Fast instruction selection doesn't support the special
3477  // address spaces.
3478  return false;
3479 
3480  const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3481  return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3482  }
3483  case Intrinsic::memset: {
3484  const MemSetInst *MSI = cast<MemSetInst>(II);
3485  // Don't handle volatile.
3486  if (MSI->isVolatile())
3487  return false;
3488 
3489  if (!MSI->getLength()->getType()->isIntegerTy(64))
3490  return false;
3491 
3492  if (MSI->getDestAddressSpace() > 255)
3493  // Fast instruction selection doesn't support the special
3494  // address spaces.
3495  return false;
3496 
3497  return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3498  }
3499  case Intrinsic::sin:
3500  case Intrinsic::cos:
3501  case Intrinsic::pow: {
3502  MVT RetVT;
3503  if (!isTypeLegal(II->getType(), RetVT))
3504  return false;
3505 
3506  if (RetVT != MVT::f32 && RetVT != MVT::f64)
3507  return false;
3508 
3509  static const RTLIB::Libcall LibCallTable[3][2] = {
3510  { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3511  { RTLIB::COS_F32, RTLIB::COS_F64 },
3512  { RTLIB::POW_F32, RTLIB::POW_F64 }
3513  };
3514  RTLIB::Libcall LC;
3515  bool Is64Bit = RetVT == MVT::f64;
3516  switch (II->getIntrinsicID()) {
3517  default:
3518  llvm_unreachable("Unexpected intrinsic.");
3519  case Intrinsic::sin:
3520  LC = LibCallTable[0][Is64Bit];
3521  break;
3522  case Intrinsic::cos:
3523  LC = LibCallTable[1][Is64Bit];
3524  break;
3525  case Intrinsic::pow:
3526  LC = LibCallTable[2][Is64Bit];
3527  break;
3528  }
3529 
3530  ArgListTy Args;
3531  Args.reserve(II->getNumArgOperands());
3532 
3533  // Populate the argument list.
3534  for (auto &Arg : II->arg_operands()) {
3535  ArgListEntry Entry;
3536  Entry.Val = Arg;
3537  Entry.Ty = Arg->getType();
3538  Args.push_back(Entry);
3539  }
3540 
3541  CallLoweringInfo CLI;
3542  MCContext &Ctx = MF->getContext();
3543  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3544  TLI.getLibcallName(LC), std::move(Args));
3545  if (!lowerCallTo(CLI))
3546  return false;
3547  updateValueMap(II, CLI.ResultReg);
3548  return true;
3549  }
3550  case Intrinsic::fabs: {
3551  MVT VT;
3552  if (!isTypeLegal(II->getType(), VT))
3553  return false;
3554 
3555  unsigned Opc;
3556  switch (VT.SimpleTy) {
3557  default:
3558  return false;
3559  case MVT::f32:
3560  Opc = AArch64::FABSSr;
3561  break;
3562  case MVT::f64:
3563  Opc = AArch64::FABSDr;
3564  break;
3565  }
3566  unsigned SrcReg = getRegForValue(II->getOperand(0));
3567  if (!SrcReg)
3568  return false;
3569  bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3570  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3571  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3572  .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3573  updateValueMap(II, ResultReg);
3574  return true;
3575  }
3576  case Intrinsic::trap:
3577  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3578  .addImm(1);
3579  return true;
3580 
3581  case Intrinsic::sqrt: {
3582  Type *RetTy = II->getCalledFunction()->getReturnType();
3583 
3584  MVT VT;
3585  if (!isTypeLegal(RetTy, VT))
3586  return false;
3587 
3588  unsigned Op0Reg = getRegForValue(II->getOperand(0));
3589  if (!Op0Reg)
3590  return false;
3591  bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3592 
3593  unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3594  if (!ResultReg)
3595  return false;
3596 
3597  updateValueMap(II, ResultReg);
3598  return true;
3599  }
3600  case Intrinsic::sadd_with_overflow:
3601  case Intrinsic::uadd_with_overflow:
3602  case Intrinsic::ssub_with_overflow:
3603  case Intrinsic::usub_with_overflow:
3604  case Intrinsic::smul_with_overflow:
3605  case Intrinsic::umul_with_overflow: {
3606  // This implements the basic lowering of the xalu with overflow intrinsics.
3607  const Function *Callee = II->getCalledFunction();
3608  auto *Ty = cast<StructType>(Callee->getReturnType());
3609  Type *RetTy = Ty->getTypeAtIndex(0U);
3610 
3611  MVT VT;
3612  if (!isTypeLegal(RetTy, VT))
3613  return false;
3614 
3615  if (VT != MVT::i32 && VT != MVT::i64)
3616  return false;
3617 
3618  const Value *LHS = II->getArgOperand(0);
3619  const Value *RHS = II->getArgOperand(1);
3620  // Canonicalize immediate to the RHS.
3621  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3622  isCommutativeIntrinsic(II))
3623  std::swap(LHS, RHS);
3624 
3625  // Simplify multiplies.
3626  Intrinsic::ID IID = II->getIntrinsicID();
3627  switch (IID) {
3628  default:
3629  break;
3630  case Intrinsic::smul_with_overflow:
3631  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3632  if (C->getValue() == 2) {
3633  IID = Intrinsic::sadd_with_overflow;
3634  RHS = LHS;
3635  }
3636  break;
3637  case Intrinsic::umul_with_overflow:
3638  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3639  if (C->getValue() == 2) {
3640  IID = Intrinsic::uadd_with_overflow;
3641  RHS = LHS;
3642  }
3643  break;
3644  }
3645 
3646  unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3648  switch (IID) {
3649  default: llvm_unreachable("Unexpected intrinsic!");
3650  case Intrinsic::sadd_with_overflow:
3651  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3652  CC = AArch64CC::VS;
3653  break;
3654  case Intrinsic::uadd_with_overflow:
3655  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3656  CC = AArch64CC::HS;
3657  break;
3658  case Intrinsic::ssub_with_overflow:
3659  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3660  CC = AArch64CC::VS;
3661  break;
3662  case Intrinsic::usub_with_overflow:
3663  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3664  CC = AArch64CC::LO;
3665  break;
3666  case Intrinsic::smul_with_overflow: {
3667  CC = AArch64CC::NE;
3668  unsigned LHSReg = getRegForValue(LHS);
3669  if (!LHSReg)
3670  return false;
3671  bool LHSIsKill = hasTrivialKill(LHS);
3672 
3673  unsigned RHSReg = getRegForValue(RHS);
3674  if (!RHSReg)
3675  return false;
3676  bool RHSIsKill = hasTrivialKill(RHS);
3677 
3678  if (VT == MVT::i32) {
3679  MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3680  unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3681  /*IsKill=*/false, 32);
3682  MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3683  AArch64::sub_32);
3684  ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3685  AArch64::sub_32);
3686  emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3687  AArch64_AM::ASR, 31, /*WantResult=*/false);
3688  } else {
3689  assert(VT == MVT::i64 && "Unexpected value type.");
3690  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3691  // reused in the next instruction.
3692  MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3693  /*IsKill=*/false);
3694  unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3695  RHSReg, RHSIsKill);
3696  emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3697  AArch64_AM::ASR, 63, /*WantResult=*/false);
3698  }
3699  break;
3700  }
3701  case Intrinsic::umul_with_overflow: {
3702  CC = AArch64CC::NE;
3703  unsigned LHSReg = getRegForValue(LHS);
3704  if (!LHSReg)
3705  return false;
3706  bool LHSIsKill = hasTrivialKill(LHS);
3707 
3708  unsigned RHSReg = getRegForValue(RHS);
3709  if (!RHSReg)
3710  return false;
3711  bool RHSIsKill = hasTrivialKill(RHS);
3712 
3713  if (VT == MVT::i32) {
3714  MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3715  emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3716  /*IsKill=*/false, AArch64_AM::LSR, 32,
3717  /*WantResult=*/false);
3718  MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3719  AArch64::sub_32);
3720  } else {
3721  assert(VT == MVT::i64 && "Unexpected value type.");
3722  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3723  // reused in the next instruction.
3724  MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3725  /*IsKill=*/false);
3726  unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3727  RHSReg, RHSIsKill);
3728  emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3729  /*IsKill=*/false, /*WantResult=*/false);
3730  }
3731  break;
3732  }
3733  }
3734 
3735  if (MulReg) {
3736  ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3737  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3738  TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3739  }
3740 
3741  ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3742  AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3743  /*IsKill=*/true, getInvertedCondCode(CC));
3744  (void)ResultReg2;
3745  assert((ResultReg1 + 1) == ResultReg2 &&
3746  "Nonconsecutive result registers.");
3747  updateValueMap(II, ResultReg1, 2);
3748  return true;
3749  }
3750  }
3751  return false;
3752 }
3753 
3754 bool AArch64FastISel::selectRet(const Instruction *I) {
3755  const ReturnInst *Ret = cast<ReturnInst>(I);
3756  const Function &F = *I->getParent()->getParent();
3757 
3758  if (!FuncInfo.CanLowerReturn)
3759  return false;
3760 
3761  if (F.isVarArg())
3762  return false;
3763 
3764  if (TLI.supportSwiftError() &&
3765  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3766  return false;
3767 
3768  if (TLI.supportSplitCSR(FuncInfo.MF))
3769  return false;
3770 
3771  // Build a list of return value registers.
3772  SmallVector<unsigned, 4> RetRegs;
3773 
3774  if (Ret->getNumOperands() > 0) {
3775  CallingConv::ID CC = F.getCallingConv();
3777  GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3778 
3779  // Analyze operands of the call, assigning locations to each operand.
3781  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3782  CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3783  : RetCC_AArch64_AAPCS;
3784  CCInfo.AnalyzeReturn(Outs, RetCC);
3785 
3786  // Only handle a single return value for now.
3787  if (ValLocs.size() != 1)
3788  return false;
3789 
3790  CCValAssign &VA = ValLocs[0];
3791  const Value *RV = Ret->getOperand(0);
3792 
3793  // Don't bother handling odd stuff for now.
3794  if ((VA.getLocInfo() != CCValAssign::Full) &&
3795  (VA.getLocInfo() != CCValAssign::BCvt))
3796  return false;
3797 
3798  // Only handle register returns for now.
3799  if (!VA.isRegLoc())
3800  return false;
3801 
3802  unsigned Reg = getRegForValue(RV);
3803  if (Reg == 0)
3804  return false;
3805 
3806  unsigned SrcReg = Reg + VA.getValNo();
3807  unsigned DestReg = VA.getLocReg();
3808  // Avoid a cross-class copy. This is very unlikely.
3809  if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3810  return false;
3811 
3812  EVT RVEVT = TLI.getValueType(DL, RV->getType());
3813  if (!RVEVT.isSimple())
3814  return false;
3815 
3816  // Vectors (of > 1 lane) in big endian need tricky handling.
3817  if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3818  !Subtarget->isLittleEndian())
3819  return false;
3820 
3821  MVT RVVT = RVEVT.getSimpleVT();
3822  if (RVVT == MVT::f128)
3823  return false;
3824 
3825  MVT DestVT = VA.getValVT();
3826  // Special handling for extended integers.
3827  if (RVVT != DestVT) {
3828  if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3829  return false;
3830 
3831  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3832  return false;
3833 
3834  bool IsZExt = Outs[0].Flags.isZExt();
3835  SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3836  if (SrcReg == 0)
3837  return false;
3838  }
3839 
3840  // Make the copy.
3841  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3842  TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3843 
3844  // Add register to return instruction.
3845  RetRegs.push_back(VA.getLocReg());
3846  }
3847 
3848  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3849  TII.get(AArch64::RET_ReallyLR));
3850  for (unsigned RetReg : RetRegs)
3851  MIB.addReg(RetReg, RegState::Implicit);
3852  return true;
3853 }
3854 
3855 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3856  Type *DestTy = I->getType();
3857  Value *Op = I->getOperand(0);
3858  Type *SrcTy = Op->getType();
3859 
3860  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3861  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3862  if (!SrcEVT.isSimple())
3863  return false;
3864  if (!DestEVT.isSimple())
3865  return false;
3866 
3867  MVT SrcVT = SrcEVT.getSimpleVT();
3868  MVT DestVT = DestEVT.getSimpleVT();
3869 
3870  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3871  SrcVT != MVT::i8)
3872  return false;
3873  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3874  DestVT != MVT::i1)
3875  return false;
3876 
3877  unsigned SrcReg = getRegForValue(Op);
3878  if (!SrcReg)
3879  return false;
3880  bool SrcIsKill = hasTrivialKill(Op);
3881 
3882  // If we're truncating from i64 to a smaller non-legal type then generate an
3883  // AND. Otherwise, we know the high bits are undefined and a truncate only
3884  // generate a COPY. We cannot mark the source register also as result
3885  // register, because this can incorrectly transfer the kill flag onto the
3886  // source register.
3887  unsigned ResultReg;
3888  if (SrcVT == MVT::i64) {
3889  uint64_t Mask = 0;
3890  switch (DestVT.SimpleTy) {
3891  default:
3892  // Trunc i64 to i32 is handled by the target-independent fast-isel.
3893  return false;
3894  case MVT::i1:
3895  Mask = 0x1;
3896  break;
3897  case MVT::i8:
3898  Mask = 0xff;
3899  break;
3900  case MVT::i16:
3901  Mask = 0xffff;
3902  break;
3903  }
3904  // Issue an extract_subreg to get the lower 32-bits.
3905  unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3906  AArch64::sub_32);
3907  // Create the AND instruction which performs the actual truncation.
3908  ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3909  assert(ResultReg && "Unexpected AND instruction emission failure.");
3910  } else {
3911  ResultReg = createResultReg(&AArch64::GPR32RegClass);
3912  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3913  TII.get(TargetOpcode::COPY), ResultReg)
3914  .addReg(SrcReg, getKillRegState(SrcIsKill));
3915  }
3916 
3917  updateValueMap(I, ResultReg);
3918  return true;
3919 }
3920 
3921 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3922  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3923  DestVT == MVT::i64) &&
3924  "Unexpected value type.");
3925  // Handle i8 and i16 as i32.
3926  if (DestVT == MVT::i8 || DestVT == MVT::i16)
3927  DestVT = MVT::i32;
3928 
3929  if (IsZExt) {
3930  unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3931  assert(ResultReg && "Unexpected AND instruction emission failure.");
3932  if (DestVT == MVT::i64) {
3933  // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
3934  // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
3935  unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3936  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3937  TII.get(AArch64::SUBREG_TO_REG), Reg64)
3938  .addImm(0)
3939  .addReg(ResultReg)
3940  .addImm(AArch64::sub_32);
3941  ResultReg = Reg64;
3942  }
3943  return ResultReg;
3944  } else {
3945  if (DestVT == MVT::i64) {
3946  // FIXME: We're SExt i1 to i64.
3947  return 0;
3948  }
3949  return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3950  /*TODO:IsKill=*/false, 0, 0);
3951  }
3952 }
3953 
3954 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3955  unsigned Op1, bool Op1IsKill) {
3956  unsigned Opc, ZReg;
3957  switch (RetVT.SimpleTy) {
3958  default: return 0;
3959  case MVT::i8:
3960  case MVT::i16:
3961  case MVT::i32:
3962  RetVT = MVT::i32;
3963  Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3964  case MVT::i64:
3965  Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3966  }
3967 
3968  const TargetRegisterClass *RC =
3969  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3970  return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3971  /*IsKill=*/ZReg, true);
3972 }
3973 
3974 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3975  unsigned Op1, bool Op1IsKill) {
3976  if (RetVT != MVT::i64)
3977  return 0;
3978 
3979  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3980  Op0, Op0IsKill, Op1, Op1IsKill,
3981  AArch64::XZR, /*IsKill=*/true);
3982 }
3983 
3984 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3985  unsigned Op1, bool Op1IsKill) {
3986  if (RetVT != MVT::i64)
3987  return 0;
3988 
3989  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3990  Op0, Op0IsKill, Op1, Op1IsKill,
3991  AArch64::XZR, /*IsKill=*/true);
3992 }
3993 
3994 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3995  unsigned Op1Reg, bool Op1IsKill) {
3996  unsigned Opc = 0;
3997  bool NeedTrunc = false;
3998  uint64_t Mask = 0;
3999  switch (RetVT.SimpleTy) {
4000  default: return 0;
4001  case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4002  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4003  case MVT::i32: Opc = AArch64::LSLVWr; break;
4004  case MVT::i64: Opc = AArch64::LSLVXr; break;
4005  }
4006 
4007  const TargetRegisterClass *RC =
4008  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4009  if (NeedTrunc) {
4010  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4011  Op1IsKill = true;
4012  }
4013  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4014  Op1IsKill);
4015  if (NeedTrunc)
4016  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4017  return ResultReg;
4018 }
4019 
4020 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4021  bool Op0IsKill, uint64_t Shift,
4022  bool IsZExt) {
4023  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4024  "Unexpected source/return type pair.");
4025  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4026  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4027  "Unexpected source value type.");
4028  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4029  RetVT == MVT::i64) && "Unexpected return value type.");
4030 
4031  bool Is64Bit = (RetVT == MVT::i64);
4032  unsigned RegSize = Is64Bit ? 64 : 32;
4033  unsigned DstBits = RetVT.getSizeInBits();
4034  unsigned SrcBits = SrcVT.getSizeInBits();
4035  const TargetRegisterClass *RC =
4036  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4037 
4038  // Just emit a copy for "zero" shifts.
4039  if (Shift == 0) {
4040  if (RetVT == SrcVT) {
4041  unsigned ResultReg = createResultReg(RC);
4042  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4043  TII.get(TargetOpcode::COPY), ResultReg)
4044  .addReg(Op0, getKillRegState(Op0IsKill));
4045  return ResultReg;
4046  } else
4047  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4048  }
4049 
4050  // Don't deal with undefined shifts.
4051  if (Shift >= DstBits)
4052  return 0;
4053 
4054  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4055  // {S|U}BFM Wd, Wn, #r, #s
4056  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4057 
4058  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4059  // %2 = shl i16 %1, 4
4060  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4061  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4062  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4063  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4064 
4065  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4066  // %2 = shl i16 %1, 8
4067  // Wd<32+7-24,32-24> = Wn<7:0>
4068  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4069  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4070  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4071 
4072  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4073  // %2 = shl i16 %1, 12
4074  // Wd<32+3-20,32-20> = Wn<3:0>
4075  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4076  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4077  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4078 
4079  unsigned ImmR = RegSize - Shift;
4080  // Limit the width to the length of the source type.
4081  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4082  static const unsigned OpcTable[2][2] = {
4083  {AArch64::SBFMWri, AArch64::SBFMXri},
4084  {AArch64::UBFMWri, AArch64::UBFMXri}
4085  };
4086  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4087  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4088  unsigned TmpReg = MRI.createVirtualRegister(RC);
4089  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4090  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4091  .addImm(0)
4092  .addReg(Op0, getKillRegState(Op0IsKill))
4093  .addImm(AArch64::sub_32);
4094  Op0 = TmpReg;
4095  Op0IsKill = true;
4096  }
4097  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4098 }
4099 
4100 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4101  unsigned Op1Reg, bool Op1IsKill) {
4102  unsigned Opc = 0;
4103  bool NeedTrunc = false;
4104  uint64_t Mask = 0;
4105  switch (RetVT.SimpleTy) {
4106  default: return 0;
4107  case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4108  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4109  case MVT::i32: Opc = AArch64::LSRVWr; break;
4110  case MVT::i64: Opc = AArch64::LSRVXr; break;
4111  }
4112 
4113  const TargetRegisterClass *RC =
4114  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4115  if (NeedTrunc) {
4116  Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4117  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4118  Op0IsKill = Op1IsKill = true;
4119  }
4120  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4121  Op1IsKill);
4122  if (NeedTrunc)
4123  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4124  return ResultReg;
4125 }
4126 
4127 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4128  bool Op0IsKill, uint64_t Shift,
4129  bool IsZExt) {
4130  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4131  "Unexpected source/return type pair.");
4132  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4133  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4134  "Unexpected source value type.");
4135  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4136  RetVT == MVT::i64) && "Unexpected return value type.");
4137 
4138  bool Is64Bit = (RetVT == MVT::i64);
4139  unsigned RegSize = Is64Bit ? 64 : 32;
4140  unsigned DstBits = RetVT.getSizeInBits();
4141  unsigned SrcBits = SrcVT.getSizeInBits();
4142  const TargetRegisterClass *RC =
4143  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4144 
4145  // Just emit a copy for "zero" shifts.
4146  if (Shift == 0) {
4147  if (RetVT == SrcVT) {
4148  unsigned ResultReg = createResultReg(RC);
4149  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4150  TII.get(TargetOpcode::COPY), ResultReg)
4151  .addReg(Op0, getKillRegState(Op0IsKill));
4152  return ResultReg;
4153  } else
4154  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4155  }
4156 
4157  // Don't deal with undefined shifts.
4158  if (Shift >= DstBits)
4159  return 0;
4160 
4161  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4162  // {S|U}BFM Wd, Wn, #r, #s
4163  // Wd<s-r:0> = Wn<s:r> when r <= s
4164 
4165  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4166  // %2 = lshr i16 %1, 4
4167  // Wd<7-4:0> = Wn<7:4>
4168  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4169  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4170  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4171 
4172  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4173  // %2 = lshr i16 %1, 8
4174  // Wd<7-7,0> = Wn<7:7>
4175  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4176  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4177  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4178 
4179  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4180  // %2 = lshr i16 %1, 12
4181  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4182  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4183  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4184  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4185 
4186  if (Shift >= SrcBits && IsZExt)
4187  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4188 
4189  // It is not possible to fold a sign-extend into the LShr instruction. In this
4190  // case emit a sign-extend.
4191  if (!IsZExt) {
4192  Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4193  if (!Op0)
4194  return 0;
4195  Op0IsKill = true;
4196  SrcVT = RetVT;
4197  SrcBits = SrcVT.getSizeInBits();
4198  IsZExt = true;
4199  }
4200 
4201  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4202  unsigned ImmS = SrcBits - 1;
4203  static const unsigned OpcTable[2][2] = {
4204  {AArch64::SBFMWri, AArch64::SBFMXri},
4205  {AArch64::UBFMWri, AArch64::UBFMXri}
4206  };
4207  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4208  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4209  unsigned TmpReg = MRI.createVirtualRegister(RC);
4210  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4211  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4212  .addImm(0)
4213  .addReg(Op0, getKillRegState(Op0IsKill))
4214  .addImm(AArch64::sub_32);
4215  Op0 = TmpReg;
4216  Op0IsKill = true;
4217  }
4218  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4219 }
4220 
4221 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4222  unsigned Op1Reg, bool Op1IsKill) {
4223  unsigned Opc = 0;
4224  bool NeedTrunc = false;
4225  uint64_t Mask = 0;
4226  switch (RetVT.SimpleTy) {
4227  default: return 0;
4228  case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4229  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4230  case MVT::i32: Opc = AArch64::ASRVWr; break;
4231  case MVT::i64: Opc = AArch64::ASRVXr; break;
4232  }
4233 
4234  const TargetRegisterClass *RC =
4235  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4236  if (NeedTrunc) {
4237  Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4238  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4239  Op0IsKill = Op1IsKill = true;
4240  }
4241  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4242  Op1IsKill);
4243  if (NeedTrunc)
4244  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4245  return ResultReg;
4246 }
4247 
4248 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4249  bool Op0IsKill, uint64_t Shift,
4250  bool IsZExt) {
4251  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4252  "Unexpected source/return type pair.");
4253  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4254  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4255  "Unexpected source value type.");
4256  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4257  RetVT == MVT::i64) && "Unexpected return value type.");
4258 
4259  bool Is64Bit = (RetVT == MVT::i64);
4260  unsigned RegSize = Is64Bit ? 64 : 32;
4261  unsigned DstBits = RetVT.getSizeInBits();
4262  unsigned SrcBits = SrcVT.getSizeInBits();
4263  const TargetRegisterClass *RC =
4264  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4265 
4266  // Just emit a copy for "zero" shifts.
4267  if (Shift == 0) {
4268  if (RetVT == SrcVT) {
4269  unsigned ResultReg = createResultReg(RC);
4270  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4271  TII.get(TargetOpcode::COPY), ResultReg)
4272  .addReg(Op0, getKillRegState(Op0IsKill));
4273  return ResultReg;
4274  } else
4275  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4276  }
4277 
4278  // Don't deal with undefined shifts.
4279  if (Shift >= DstBits)
4280  return 0;
4281 
4282  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4283  // {S|U}BFM Wd, Wn, #r, #s
4284  // Wd<s-r:0> = Wn<s:r> when r <= s
4285 
4286  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4287  // %2 = ashr i16 %1, 4
4288  // Wd<7-4:0> = Wn<7:4>
4289  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4290  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4291  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4292 
4293  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4294  // %2 = ashr i16 %1, 8
4295  // Wd<7-7,0> = Wn<7:7>
4296  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4297  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4298  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4299 
4300  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4301  // %2 = ashr i16 %1, 12
4302  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4303  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4304  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4305  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4306 
4307  if (Shift >= SrcBits && IsZExt)
4308  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4309 
4310  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4311  unsigned ImmS = SrcBits - 1;
4312  static const unsigned OpcTable[2][2] = {
4313  {AArch64::SBFMWri, AArch64::SBFMXri},
4314  {AArch64::UBFMWri, AArch64::UBFMXri}
4315  };
4316  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4317  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4318  unsigned TmpReg = MRI.createVirtualRegister(RC);
4319  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4320  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4321  .addImm(0)
4322  .addReg(Op0, getKillRegState(Op0IsKill))
4323  .addImm(AArch64::sub_32);
4324  Op0 = TmpReg;
4325  Op0IsKill = true;
4326  }
4327  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4328 }
4329 
4330 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4331  bool IsZExt) {
4332  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4333 
4334  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4335  // DestVT are odd things, so test to make sure that they are both types we can
4336  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4337  // bail out to SelectionDAG.
4338  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4339  (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4340  ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4341  (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4342  return 0;
4343 
4344  unsigned Opc;
4345  unsigned Imm = 0;
4346 
4347  switch (SrcVT.SimpleTy) {
4348  default:
4349  return 0;
4350  case MVT::i1:
4351  return emiti1Ext(SrcReg, DestVT, IsZExt);
4352  case MVT::i8:
4353  if (DestVT == MVT::i64)
4354  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4355  else
4356  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4357  Imm = 7;
4358  break;
4359  case MVT::i16:
4360  if (DestVT == MVT::i64)
4361  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4362  else
4363  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4364  Imm = 15;
4365  break;
4366  case MVT::i32:
4367  assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4368  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4369  Imm = 31;
4370  break;
4371  }
4372 
4373  // Handle i8 and i16 as i32.
4374  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4375  DestVT = MVT::i32;
4376  else if (DestVT == MVT::i64) {
4377  unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4378  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4379  TII.get(AArch64::SUBREG_TO_REG), Src64)
4380  .addImm(0)
4381  .addReg(SrcReg)
4382  .addImm(AArch64::sub_32);
4383  SrcReg = Src64;
4384  }
4385 
4386  const TargetRegisterClass *RC =
4387  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4388  return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4389 }
4390 
4391 static bool isZExtLoad(const MachineInstr *LI) {
4392  switch (LI->getOpcode()) {
4393  default:
4394  return false;
4395  case AArch64::LDURBBi:
4396  case AArch64::LDURHHi:
4397  case AArch64::LDURWi:
4398  case AArch64::LDRBBui:
4399  case AArch64::LDRHHui:
4400  case AArch64::LDRWui:
4401  case AArch64::LDRBBroX:
4402  case AArch64::LDRHHroX:
4403  case AArch64::LDRWroX:
4404  case AArch64::LDRBBroW:
4405  case AArch64::LDRHHroW:
4406  case AArch64::LDRWroW:
4407  return true;
4408  }
4409 }
4410 
4411 static bool isSExtLoad(const MachineInstr *LI) {
4412  switch (LI->getOpcode()) {
4413  default:
4414  return false;
4415  case AArch64::LDURSBWi:
4416  case AArch64::LDURSHWi:
4417  case AArch64::LDURSBXi:
4418  case AArch64::LDURSHXi:
4419  case AArch64::LDURSWi:
4420  case AArch64::LDRSBWui:
4421  case AArch64::LDRSHWui:
4422  case AArch64::LDRSBXui:
4423  case AArch64::LDRSHXui:
4424  case AArch64::LDRSWui:
4425  case AArch64::LDRSBWroX:
4426  case AArch64::LDRSHWroX:
4427  case AArch64::LDRSBXroX:
4428  case AArch64::LDRSHXroX:
4429  case AArch64::LDRSWroX:
4430  case AArch64::LDRSBWroW:
4431  case AArch64::LDRSHWroW:
4432  case AArch64::LDRSBXroW:
4433  case AArch64::LDRSHXroW:
4434  case AArch64::LDRSWroW:
4435  return true;
4436  }
4437 }
4438 
4439 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4440  MVT SrcVT) {
4441  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4442  if (!LI || !LI->hasOneUse())
4443  return false;
4444 
4445  // Check if the load instruction has already been selected.
4446  unsigned Reg = lookUpRegForValue(LI);
4447  if (!Reg)
4448  return false;
4449 
4450  MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4451  if (!MI)
4452  return false;
4453 
4454  // Check if the correct load instruction has been emitted - SelectionDAG might
4455  // have emitted a zero-extending load, but we need a sign-extending load.
4456  bool IsZExt = isa<ZExtInst>(I);
4457  const auto *LoadMI = MI;
4458  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4459  LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4460  unsigned LoadReg = MI->getOperand(1).getReg();
4461  LoadMI = MRI.getUniqueVRegDef(LoadReg);
4462  assert(LoadMI && "Expected valid instruction");
4463  }
4464  if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4465  return false;
4466 
4467  // Nothing to be done.
4468  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4469  updateValueMap(I, Reg);
4470  return true;
4471  }
4472 
4473  if (IsZExt) {
4474  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4475  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4476  TII.get(AArch64::SUBREG_TO_REG), Reg64)
4477  .addImm(0)
4478  .addReg(Reg, getKillRegState(true))
4479  .addImm(AArch64::sub_32);
4480  Reg = Reg64;
4481  } else {
4482  assert((MI->getOpcode() == TargetOpcode::COPY &&
4483  MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4484  "Expected copy instruction");
4485  Reg = MI->getOperand(1).getReg();
4486  MI->eraseFromParent();
4487  }
4488  updateValueMap(I, Reg);
4489  return true;
4490 }
4491 
4492 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4493  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4494  "Unexpected integer extend instruction.");
4495  MVT RetVT;
4496  MVT SrcVT;
4497  if (!isTypeSupported(I->getType(), RetVT))
4498  return false;
4499 
4500  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4501  return false;
4502 
4503  // Try to optimize already sign-/zero-extended values from load instructions.
4504  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4505  return true;
4506 
4507  unsigned SrcReg = getRegForValue(I->getOperand(0));
4508  if (!SrcReg)
4509  return false;
4510  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4511 
4512  // Try to optimize already sign-/zero-extended values from function arguments.
4513  bool IsZExt = isa<ZExtInst>(I);
4514  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4515  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4516  if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4517  unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4518  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4519  TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4520  .addImm(0)
4521  .addReg(SrcReg, getKillRegState(SrcIsKill))
4522  .addImm(AArch64::sub_32);
4523  SrcReg = ResultReg;
4524  }
4525  // Conservatively clear all kill flags from all uses, because we are
4526  // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4527  // level. The result of the instruction at IR level might have been
4528  // trivially dead, which is now not longer true.
4529  unsigned UseReg = lookUpRegForValue(I);
4530  if (UseReg)
4531  MRI.clearKillFlags(UseReg);
4532 
4533  updateValueMap(I, SrcReg);
4534  return true;
4535  }
4536  }
4537 
4538  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4539  if (!ResultReg)
4540  return false;
4541 
4542  updateValueMap(I, ResultReg);
4543  return true;
4544 }
4545 
4546 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4547  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4548  if (!DestEVT.isSimple())
4549  return false;
4550 
4551  MVT DestVT = DestEVT.getSimpleVT();
4552  if (DestVT != MVT::i64 && DestVT != MVT::i32)
4553  return false;
4554 
4555  unsigned DivOpc;
4556  bool Is64bit = (DestVT == MVT::i64);
4557  switch (ISDOpcode) {
4558  default:
4559  return false;
4560  case ISD::SREM:
4561  DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4562  break;
4563  case ISD::UREM:
4564  DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4565  break;
4566  }
4567  unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4568  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4569  if (!Src0Reg)
4570  return false;
4571  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4572 
4573  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4574  if (!Src1Reg)
4575  return false;
4576  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4577 
4578  const TargetRegisterClass *RC =
4579  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4580  unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4581  Src1Reg, /*IsKill=*/false);
4582  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4583  // The remainder is computed as numerator - (quotient * denominator) using the
4584  // MSUB instruction.
4585  unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4586  Src1Reg, Src1IsKill, Src0Reg,
4587  Src0IsKill);
4588  updateValueMap(I, ResultReg);
4589  return true;
4590 }
4591 
4592 bool AArch64FastISel::selectMul(const Instruction *I) {
4593  MVT VT;
4594  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4595  return false;
4596 
4597  if (VT.isVector())
4598  return selectBinaryOp(I, ISD::MUL);
4599 
4600  const Value *Src0 = I->getOperand(0);
4601  const Value *Src1 = I->getOperand(1);
4602  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4603  if (C->getValue().isPowerOf2())
4604  std::swap(Src0, Src1);
4605 
4606  // Try to simplify to a shift instruction.
4607  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4608  if (C->getValue().isPowerOf2()) {
4609  uint64_t ShiftVal = C->getValue().logBase2();
4610  MVT SrcVT = VT;
4611  bool IsZExt = true;
4612  if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4613  if (!isIntExtFree(ZExt)) {
4614  MVT VT;
4615  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4616  SrcVT = VT;
4617  IsZExt = true;
4618  Src0 = ZExt->getOperand(0);
4619  }
4620  }
4621  } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4622  if (!isIntExtFree(SExt)) {
4623  MVT VT;
4624  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4625  SrcVT = VT;
4626  IsZExt = false;
4627  Src0 = SExt->getOperand(0);
4628  }
4629  }
4630  }
4631 
4632  unsigned Src0Reg = getRegForValue(Src0);
4633  if (!Src0Reg)
4634  return false;
4635  bool Src0IsKill = hasTrivialKill(Src0);
4636 
4637  unsigned ResultReg =
4638  emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4639 
4640  if (ResultReg) {
4641  updateValueMap(I, ResultReg);
4642  return true;
4643  }
4644  }
4645 
4646  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4647  if (!Src0Reg)
4648  return false;
4649  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4650 
4651  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4652  if (!Src1Reg)
4653  return false;
4654  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4655 
4656  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4657 
4658  if (!ResultReg)
4659  return false;
4660 
4661  updateValueMap(I, ResultReg);
4662  return true;
4663 }
4664 
4665 bool AArch64FastISel::selectShift(const Instruction *I) {
4666  MVT RetVT;
4667  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4668  return false;
4669 
4670  if (RetVT.isVector())
4671  return selectOperator(I, I->getOpcode());
4672 
4673  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4674  unsigned ResultReg = 0;
4675  uint64_t ShiftVal = C->getZExtValue();
4676  MVT SrcVT = RetVT;
4677  bool IsZExt = I->getOpcode() != Instruction::AShr;
4678  const Value *Op0 = I->getOperand(0);
4679  if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4680  if (!isIntExtFree(ZExt)) {
4681  MVT TmpVT;
4682  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4683  SrcVT = TmpVT;
4684  IsZExt = true;
4685  Op0 = ZExt->getOperand(0);
4686  }
4687  }
4688  } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4689  if (!isIntExtFree(SExt)) {
4690  MVT TmpVT;
4691  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4692  SrcVT = TmpVT;
4693  IsZExt = false;
4694  Op0 = SExt->getOperand(0);
4695  }
4696  }
4697  }
4698 
4699  unsigned Op0Reg = getRegForValue(Op0);
4700  if (!Op0Reg)
4701  return false;
4702  bool Op0IsKill = hasTrivialKill(Op0);
4703 
4704  switch (I->getOpcode()) {
4705  default: llvm_unreachable("Unexpected instruction.");
4706  case Instruction::Shl:
4707  ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4708  break;
4709  case Instruction::AShr:
4710  ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4711  break;
4712  case Instruction::LShr:
4713  ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4714  break;
4715  }
4716  if (!ResultReg)
4717  return false;
4718 
4719  updateValueMap(I, ResultReg);
4720  return true;
4721  }
4722 
4723  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4724  if (!Op0Reg)
4725  return false;
4726  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4727 
4728  unsigned Op1Reg = getRegForValue(I->getOperand(1));
4729  if (!Op1Reg)
4730  return false;
4731  bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4732 
4733  unsigned ResultReg = 0;
4734  switch (I->getOpcode()) {
4735  default: llvm_unreachable("Unexpected instruction.");
4736  case Instruction::Shl:
4737  ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4738  break;
4739  case Instruction::AShr:
4740  ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4741  break;
4742  case Instruction::LShr:
4743  ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4744  break;
4745  }
4746 
4747  if (!ResultReg)
4748  return false;
4749 
4750  updateValueMap(I, ResultReg);
4751  return true;
4752 }
4753 
4754 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4755  MVT RetVT, SrcVT;
4756 
4757  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4758  return false;
4759  if (!isTypeLegal(I->getType(), RetVT))
4760  return false;
4761 
4762  unsigned Opc;
4763  if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4764  Opc = AArch64::FMOVWSr;
4765  else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4766  Opc = AArch64::FMOVXDr;
4767  else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4768  Opc = AArch64::FMOVSWr;
4769  else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4770  Opc = AArch64::FMOVDXr;
4771  else
4772  return false;
4773 
4774  const TargetRegisterClass *RC = nullptr;
4775  switch (RetVT.SimpleTy) {
4776  default: llvm_unreachable("Unexpected value type.");
4777  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4778  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4779  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4780  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4781  }
4782  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4783  if (!Op0Reg)
4784  return false;
4785  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4786  unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4787 
4788  if (!ResultReg)
4789  return false;
4790 
4791  updateValueMap(I, ResultReg);
4792  return true;
4793 }
4794 
4795 bool AArch64FastISel::selectFRem(const Instruction *I) {
4796  MVT RetVT;
4797  if (!isTypeLegal(I->getType(), RetVT))
4798  return false;
4799 
4800  RTLIB::Libcall LC;
4801  switch (RetVT.SimpleTy) {
4802  default:
4803  return false;
4804  case MVT::f32:
4805  LC = RTLIB::REM_F32;
4806  break;
4807  case MVT::f64:
4808  LC = RTLIB::REM_F64;
4809  break;
4810  }
4811 
4812  ArgListTy Args;
4813  Args.reserve(I->getNumOperands());
4814 
4815  // Populate the argument list.
4816  for (auto &Arg : I->operands()) {
4817  ArgListEntry Entry;
4818  Entry.Val = Arg;
4819  Entry.Ty = Arg->getType();
4820  Args.push_back(Entry);
4821  }
4822 
4823  CallLoweringInfo CLI;
4824  MCContext &Ctx = MF->getContext();
4825  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4826  TLI.getLibcallName(LC), std::move(Args));
4827  if (!lowerCallTo(CLI))
4828  return false;
4829  updateValueMap(I, CLI.ResultReg);
4830  return true;
4831 }
4832 
4833 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4834  MVT VT;
4835  if (!isTypeLegal(I->getType(), VT))
4836  return false;
4837 
4838  if (!isa<ConstantInt>(I->getOperand(1)))
4839  return selectBinaryOp(I, ISD::SDIV);
4840 
4841  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4842  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4843  !(C.isPowerOf2() || (-C).isPowerOf2()))
4844  return selectBinaryOp(I, ISD::SDIV);
4845 
4846  unsigned Lg2 = C.countTrailingZeros();
4847  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4848  if (!Src0Reg)
4849  return false;
4850  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4851 
4852  if (cast<BinaryOperator>(I)->isExact()) {
4853  unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4854  if (!ResultReg)
4855  return false;
4856  updateValueMap(I, ResultReg);
4857  return true;
4858  }
4859 
4860  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4861  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4862  if (!AddReg)
4863  return false;
4864 
4865  // (Src0 < 0) ? Pow2 - 1 : 0;
4866  if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4867  return false;
4868 
4869  unsigned SelectOpc;
4870  const TargetRegisterClass *RC;
4871  if (VT == MVT::i64) {
4872  SelectOpc = AArch64::CSELXr;
4873  RC = &AArch64::GPR64RegClass;
4874  } else {
4875  SelectOpc = AArch64::CSELWr;
4876  RC = &AArch64::GPR32RegClass;
4877  }
4878  unsigned SelectReg =
4879  fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4880  Src0IsKill, AArch64CC::LT);
4881  if (!SelectReg)
4882  return false;
4883 
4884  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4885  // negate the result.
4886  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4887  unsigned ResultReg;
4888  if (C.isNegative())
4889  ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4890  SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4891  else
4892  ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4893 
4894  if (!ResultReg)
4895  return false;
4896 
4897  updateValueMap(I, ResultReg);
4898  return true;
4899 }
4900 
4901 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4902 /// have to duplicate it for AArch64, because otherwise we would fail during the
4903 /// sign-extend emission.
4904 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4905  unsigned IdxN = getRegForValue(Idx);
4906  if (IdxN == 0)
4907  // Unhandled operand. Halt "fast" selection and bail.
4908  return std::pair<unsigned, bool>(0, false);
4909 
4910  bool IdxNIsKill = hasTrivialKill(Idx);
4911 
4912  // If the index is smaller or larger than intptr_t, truncate or extend it.
4913  MVT PtrVT = TLI.getPointerTy(DL);
4914  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4915  if (IdxVT.bitsLT(PtrVT)) {
4916  IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4917  IdxNIsKill = true;
4918  } else if (IdxVT.bitsGT(PtrVT))
4919  llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4920  return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4921 }
4922 
4923 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4924 /// duplicate it for AArch64, because otherwise we would bail out even for
4925 /// simple cases. This is because the standard fastEmit functions don't cover
4926 /// MUL at all and ADD is lowered very inefficientily.
4927 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4928  unsigned N = getRegForValue(I->getOperand(0));
4929  if (!N)
4930  return false;
4931  bool NIsKill = hasTrivialKill(I->getOperand(0));
4932 
4933  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4934  // into a single N = N + TotalOffset.
4935  uint64_t TotalOffs = 0;
4936  MVT VT = TLI.getPointerTy(DL);
4937  for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4938  GTI != E; ++GTI) {
4939  const Value *Idx = GTI.getOperand();
4940  if (auto *StTy = GTI.getStructTypeOrNull()) {
4941  unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4942  // N = N + Offset
4943  if (Field)
4944  TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4945  } else {
4946  Type *Ty = GTI.getIndexedType();
4947 
4948  // If this is a constant subscript, handle it quickly.
4949  if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4950  if (CI->isZero())
4951  continue;
4952  // N = N + Offset
4953  TotalOffs +=
4954  DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4955  continue;
4956  }
4957  if (TotalOffs) {
4958  N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4959  if (!N)
4960  return false;
4961  NIsKill = true;
4962  TotalOffs = 0;
4963  }
4964 
4965  // N = N + Idx * ElementSize;
4966  uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4967  std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
4968  unsigned IdxN = Pair.first;
4969  bool IdxNIsKill = Pair.second;
4970  if (!IdxN)
4971  return false;
4972 
4973  if (ElementSize != 1) {
4974  unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4975  if (!C)
4976  return false;
4977  IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
4978  if (!IdxN)
4979  return false;
4980  IdxNIsKill = true;
4981  }
4982  N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
4983  if (!N)
4984  return false;
4985  }
4986  }
4987  if (TotalOffs) {
4988  N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4989  if (!N)
4990  return false;
4991  }
4992  updateValueMap(I, N);
4993  return true;
4994 }
4995 
4996 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4997  assert(TM.getOptLevel() == CodeGenOpt::None &&
4998  "cmpxchg survived AtomicExpand at optlevel > -O0");
4999 
5000  auto *RetPairTy = cast<StructType>(I->getType());
5001  Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5002  assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5003  "cmpxchg has a non-i1 status result");
5004 
5005  MVT VT;
5006  if (!isTypeLegal(RetTy, VT))
5007  return false;
5008 
5009  const TargetRegisterClass *ResRC;
5010  unsigned Opc, CmpOpc;
5011  // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5012  // extractvalue selection doesn't support that.
5013  if (VT == MVT::i32) {
5014  Opc = AArch64::CMP_SWAP_32;
5015  CmpOpc = AArch64::SUBSWrs;
5016  ResRC = &AArch64::GPR32RegClass;
5017  } else if (VT == MVT::i64) {
5018  Opc = AArch64::CMP_SWAP_64;
5019  CmpOpc = AArch64::SUBSXrs;
5020  ResRC = &AArch64::GPR64RegClass;
5021  } else {
5022  return false;
5023  }
5024 
5025  const MCInstrDesc &II = TII.get(Opc);
5026 
5027  const unsigned AddrReg = constrainOperandRegClass(
5028  II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5029  const unsigned DesiredReg = constrainOperandRegClass(
5030  II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5031  const unsigned NewReg = constrainOperandRegClass(
5032  II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5033 
5034  const unsigned ResultReg1 = createResultReg(ResRC);
5035  const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5036  const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5037 
5038  // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5039  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5040  .addDef(ResultReg1)
5041  .addDef(ScratchReg)
5042  .addUse(AddrReg)
5043  .addUse(DesiredReg)
5044  .addUse(NewReg);
5045 
5046  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5047  .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5048  .addUse(ResultReg1)
5049  .addUse(DesiredReg)
5050  .addImm(0);
5051 
5052  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5053  .addDef(ResultReg2)
5054  .addUse(AArch64::WZR)
5055  .addUse(AArch64::WZR)
5057 
5058  assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5059  updateValueMap(I, ResultReg1, 2);
5060  return true;
5061 }
5062 
5063 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5064  switch (I->getOpcode()) {
5065  default:
5066  break;
5067  case Instruction::Add:
5068  case Instruction::Sub:
5069  return selectAddSub(I);
5070  case Instruction::Mul:
5071  return selectMul(I);
5072  case Instruction::SDiv:
5073  return selectSDiv(I);
5074  case Instruction::SRem:
5075  if (!selectBinaryOp(I, ISD::SREM))
5076  return selectRem(I, ISD::SREM);
5077  return true;
5078  case Instruction::URem:
5079  if (!selectBinaryOp(I, ISD::UREM))
5080  return selectRem(I, ISD::UREM);
5081  return true;
5082  case Instruction::Shl:
5083  case Instruction::LShr:
5084  case Instruction::AShr:
5085  return selectShift(I);
5086  case Instruction::And:
5087  case Instruction::Or:
5088  case Instruction::Xor:
5089  return selectLogicalOp(I);
5090  case Instruction::Br:
5091  return selectBranch(I);
5092  case Instruction::IndirectBr:
5093  return selectIndirectBr(I);
5094  case Instruction::BitCast:
5095  if (!FastISel::selectBitCast(I))
5096  return selectBitCast(I);
5097  return true;
5098  case Instruction::FPToSI:
5099  if (!selectCast(I, ISD::FP_TO_SINT))
5100  return selectFPToInt(I, /*Signed=*/true);
5101  return true;
5102  case Instruction::FPToUI:
5103  return selectFPToInt(I, /*Signed=*/false);
5104  case Instruction::ZExt:
5105  case Instruction::SExt:
5106  return selectIntExt(I);
5107  case Instruction::Trunc:
5108  if (!selectCast(I, ISD::TRUNCATE))
5109  return selectTrunc(I);
5110  return true;
5111  case Instruction::FPExt:
5112  return selectFPExt(I);
5113  case Instruction::FPTrunc:
5114  return selectFPTrunc(I);
5115  case Instruction::SIToFP:
5116  if (!selectCast(I, ISD::SINT_TO_FP))
5117  return selectIntToFP(I, /*Signed=*/true);
5118  return true;
5119  case Instruction::UIToFP:
5120  return selectIntToFP(I, /*Signed=*/false);
5121  case Instruction::Load:
5122  return selectLoad(I);
5123  case Instruction::Store:
5124  return selectStore(I);
5125  case Instruction::FCmp:
5126  case Instruction::ICmp:
5127  return selectCmp(I);
5128  case Instruction::Select:
5129  return selectSelect(I);
5130  case Instruction::Ret:
5131  return selectRet(I);
5132  case Instruction::FRem:
5133  return selectFRem(I);
5134  case Instruction::GetElementPtr:
5135  return selectGetElementPtr(I);
5136  case Instruction::AtomicCmpXchg:
5137  return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5138  }
5139 
5140  // Silence warnings.
5141  (void)&CC_AArch64_DarwinPCS_VarArg;
5142  (void)&CC_AArch64_Win64_VarArg;
5143 
5144  // fall-back to target-independent instruction selection.
5145  return selectOperator(I, I->getOpcode());
5146 }
5147 
5148 namespace llvm {
5149 
5151  const TargetLibraryInfo *LibInfo) {
5152  return new AArch64FastISel(FuncInfo, LibInfo);
5153 }
5154 
5155 } // end namespace llvm
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:158
void setFrameAddressIsTaken(bool T)
uint64_t CallInst * C
Return a value (possibly void), from a function.
static MVT getIntegerVT(unsigned BitWidth)
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:875
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1542
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
LLVMContext & Context
Atomic ordering constants.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
unsigned constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, unsigned Reg, unsigned OpIdx)
Try to constrain Reg so that it is usable by argument OpIdx of the provided MCInstrDesc II...
Definition: Utils.cpp:46
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:262
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:514
bool isVector() const
Return true if this is a vector value type.
static CondCode getInvertedCondCode(CondCode Code)
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
unsigned char ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
unsigned getSubReg() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
unsigned less or equal
Definition: InstrTypes.h:911
unsigned less than
Definition: InstrTypes.h:910
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:891
unsigned getValNo() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:738
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:901
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
Value * getCondition() const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
void reserve(size_type N)
Definition: SmallVector.h:378
Value * getLength() const
unsigned getFrameRegister(const MachineFunction &MF) const override
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:896
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1611
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:515
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:895
A description of a memory reference used in the backend.
This class represents the LLVM &#39;select&#39; instruction.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:983
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:97
Class to represent struct types.
Definition: DerivedTypes.h:201
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool isUnsigned() const
Determine if this instruction is using an unsigned comparison.
Definition: InstrTypes.h:1060
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
Reg
All possible values of the reg field in the ModR/M byte.
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:892
This file contains the simple types necessary to represent the attributes associated with functions a...
SimpleValueType SimpleTy
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:293
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
LocInfo getLocInfo() const
This file implements a class to represent arbitrary precision integral constant values and operations...
bool is64BitVector() const
Return true if this is a 64-bit vector type.
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
unsigned getSizeInBits() const
Context object for machine code objects.
Definition: MCContext.h:63
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:201
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:67
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:867
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:290
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:456
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
const AArch64RegisterInfo * getRegisterInfo() const override
static bool isZExtLoad(const MachineInstr *LI)
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:305
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
amdgpu Simplify well known AMD library false Value * Callee
Value * getOperand(unsigned i) const
Definition: User.h:154
unsigned getKillRegState(bool B)
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:499
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:121
Value * getOperand(unsigned i_nocapture) const
bool is128BitVector() const
Return true if this is a 128-bit vector type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:147
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:602
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV)...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
succ_range successors()
Definition: InstrTypes.h:268
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:357
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:150
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:597
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Conditional or Unconditional Branch instruction.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Indirect Branch Instruction.
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:426
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool useSmallAddressing() const
static bool isSExtLoad(const MachineInstr *LI)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:885
static const unsigned End
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
op_range operands()
Definition: User.h:222
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:894
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const Value * getCondition() const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:902
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:194
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type...
Extended Value Type.
Definition: ValueTypes.h:34
iterator_range< User::op_iterator > arg_operands()
Iteration adapter for range-for loops.
bool isVolatile() const
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:900
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
signed greater than
Definition: InstrTypes.h:912
The memory access writes data.
const APFloat & getValueAPF() const
Definition: Constants.h:299
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
bool isReleaseOrStronger(AtomicOrdering ao)
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:889
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static AArch64_AM::ShiftExtendType getExtendType(unsigned Imm)
getExtendType - Extract the extend type for operands of arithmetic ops.
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:176
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:194
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:899
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
signed less than
Definition: InstrTypes.h:914
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned char TargetFlags=0) const
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:598
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
signed less or equal
Definition: InstrTypes.h:915
bool selectBitCast(const User *I)
Definition: FastISel.cpp:1314
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:69
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
This file defines the FastISel class.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:241
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:457