LLVM  6.0.0svn
AArch64FastISel.cpp
Go to the documentation of this file.
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the AArch64-specific support for the FastISel class. Some
11 // of the target-specific code is generated by tablegen in the file
12 // AArch64GenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/CodeGen/FastISel.h"
41 #include "llvm/IR/Argument.h"
42 #include "llvm/IR/Attributes.h"
43 #include "llvm/IR/BasicBlock.h"
44 #include "llvm/IR/CallingConv.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/Constants.h"
47 #include "llvm/IR/DataLayout.h"
48 #include "llvm/IR/DerivedTypes.h"
49 #include "llvm/IR/Function.h"
51 #include "llvm/IR/GlobalValue.h"
52 #include "llvm/IR/InstrTypes.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/IntrinsicInst.h"
56 #include "llvm/IR/Intrinsics.h"
57 #include "llvm/IR/Operator.h"
58 #include "llvm/IR/Type.h"
59 #include "llvm/IR/User.h"
60 #include "llvm/IR/Value.h"
61 #include "llvm/MC/MCInstrDesc.h"
62 #include "llvm/MC/MCRegisterInfo.h"
63 #include "llvm/MC/MCSymbol.h"
65 #include "llvm/Support/Casting.h"
66 #include "llvm/Support/CodeGen.h"
67 #include "llvm/Support/Compiler.h"
70 #include <algorithm>
71 #include <cassert>
72 #include <cstdint>
73 #include <iterator>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 namespace {
79 
80 class AArch64FastISel final : public FastISel {
81  class Address {
82  public:
83  using BaseKind = enum {
84  RegBase,
85  FrameIndexBase
86  };
87 
88  private:
89  BaseKind Kind = RegBase;
91  union {
92  unsigned Reg;
93  int FI;
94  } Base;
95  unsigned OffsetReg = 0;
96  unsigned Shift = 0;
97  int64_t Offset = 0;
98  const GlobalValue *GV = nullptr;
99 
100  public:
101  Address() { Base.Reg = 0; }
102 
103  void setKind(BaseKind K) { Kind = K; }
104  BaseKind getKind() const { return Kind; }
105  void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
106  AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
107  bool isRegBase() const { return Kind == RegBase; }
108  bool isFIBase() const { return Kind == FrameIndexBase; }
109 
110  void setReg(unsigned Reg) {
111  assert(isRegBase() && "Invalid base register access!");
112  Base.Reg = Reg;
113  }
114 
115  unsigned getReg() const {
116  assert(isRegBase() && "Invalid base register access!");
117  return Base.Reg;
118  }
119 
120  void setOffsetReg(unsigned Reg) {
121  OffsetReg = Reg;
122  }
123 
124  unsigned getOffsetReg() const {
125  return OffsetReg;
126  }
127 
128  void setFI(unsigned FI) {
129  assert(isFIBase() && "Invalid base frame index access!");
130  Base.FI = FI;
131  }
132 
133  unsigned getFI() const {
134  assert(isFIBase() && "Invalid base frame index access!");
135  return Base.FI;
136  }
137 
138  void setOffset(int64_t O) { Offset = O; }
139  int64_t getOffset() { return Offset; }
140  void setShift(unsigned S) { Shift = S; }
141  unsigned getShift() { return Shift; }
142 
143  void setGlobalValue(const GlobalValue *G) { GV = G; }
144  const GlobalValue *getGlobalValue() { return GV; }
145  };
146 
147  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
148  /// make the right decision when generating code for different targets.
149  const AArch64Subtarget *Subtarget;
151 
152  bool fastLowerArguments() override;
153  bool fastLowerCall(CallLoweringInfo &CLI) override;
154  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
155 
156 private:
157  // Selection routines.
158  bool selectAddSub(const Instruction *I);
159  bool selectLogicalOp(const Instruction *I);
160  bool selectLoad(const Instruction *I);
161  bool selectStore(const Instruction *I);
162  bool selectBranch(const Instruction *I);
163  bool selectIndirectBr(const Instruction *I);
164  bool selectCmp(const Instruction *I);
165  bool selectSelect(const Instruction *I);
166  bool selectFPExt(const Instruction *I);
167  bool selectFPTrunc(const Instruction *I);
168  bool selectFPToInt(const Instruction *I, bool Signed);
169  bool selectIntToFP(const Instruction *I, bool Signed);
170  bool selectRem(const Instruction *I, unsigned ISDOpcode);
171  bool selectRet(const Instruction *I);
172  bool selectTrunc(const Instruction *I);
173  bool selectIntExt(const Instruction *I);
174  bool selectMul(const Instruction *I);
175  bool selectShift(const Instruction *I);
176  bool selectBitCast(const Instruction *I);
177  bool selectFRem(const Instruction *I);
178  bool selectSDiv(const Instruction *I);
179  bool selectGetElementPtr(const Instruction *I);
180  bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
181 
182  // Utility helper routines.
183  bool isTypeLegal(Type *Ty, MVT &VT);
184  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
185  bool isValueAvailable(const Value *V) const;
186  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
187  bool computeCallAddress(const Value *V, Address &Addr);
188  bool simplifyAddress(Address &Addr, MVT VT);
189  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
191  unsigned ScaleFactor, MachineMemOperand *MMO);
192  bool isMemCpySmall(uint64_t Len, unsigned Alignment);
193  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
194  unsigned Alignment);
195  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
196  const Value *Cond);
197  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
198  bool optimizeSelect(const SelectInst *SI);
199  std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
200 
201  // Emit helper routines.
202  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
203  const Value *RHS, bool SetFlags = false,
204  bool WantResult = true, bool IsZExt = false);
205  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
206  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
207  bool SetFlags = false, bool WantResult = true);
208  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
209  bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
210  bool WantResult = true);
211  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
212  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
213  AArch64_AM::ShiftExtendType ShiftType,
214  uint64_t ShiftImm, bool SetFlags = false,
215  bool WantResult = true);
216  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
217  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
219  uint64_t ShiftImm, bool SetFlags = false,
220  bool WantResult = true);
221 
222  // Emit functions.
223  bool emitCompareAndBranch(const BranchInst *BI);
224  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
225  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
226  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
227  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
228  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
229  MachineMemOperand *MMO = nullptr);
230  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
231  MachineMemOperand *MMO = nullptr);
232  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
233  MachineMemOperand *MMO = nullptr);
234  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
235  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
236  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
237  bool SetFlags = false, bool WantResult = true,
238  bool IsZExt = false);
239  unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
240  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
241  bool SetFlags = false, bool WantResult = true,
242  bool IsZExt = false);
243  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
244  unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
245  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
246  unsigned RHSReg, bool RHSIsKill,
247  AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
248  bool WantResult = true);
249  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
250  const Value *RHS);
251  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
252  bool LHSIsKill, uint64_t Imm);
253  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
254  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
255  uint64_t ShiftImm);
256  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
257  unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
258  unsigned Op1, bool Op1IsKill);
259  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
260  unsigned Op1, bool Op1IsKill);
261  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
262  unsigned Op1, bool Op1IsKill);
263  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
264  unsigned Op1Reg, bool Op1IsKill);
265  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
266  uint64_t Imm, bool IsZExt = true);
267  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
268  unsigned Op1Reg, bool Op1IsKill);
269  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
270  uint64_t Imm, bool IsZExt = true);
271  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
272  unsigned Op1Reg, bool Op1IsKill);
273  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
274  uint64_t Imm, bool IsZExt = false);
275 
276  unsigned materializeInt(const ConstantInt *CI, MVT VT);
277  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
278  unsigned materializeGV(const GlobalValue *GV);
279 
280  // Call handling routines.
281 private:
282  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
283  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
284  unsigned &NumBytes);
285  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
286 
287 public:
288  // Backend specific FastISel code.
289  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
290  unsigned fastMaterializeConstant(const Constant *C) override;
291  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
292 
293  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
294  const TargetLibraryInfo *LibInfo)
295  : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
296  Subtarget =
297  &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
298  Context = &FuncInfo.Fn->getContext();
299  }
300 
301  bool fastSelectInstruction(const Instruction *I) override;
302 
303 #include "AArch64GenFastISel.inc"
304 };
305 
306 } // end anonymous namespace
307 
308 #include "AArch64GenCallingConv.inc"
309 
310 /// \brief Check if the sign-/zero-extend will be a noop.
311 static bool isIntExtFree(const Instruction *I) {
312  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
313  "Unexpected integer extend instruction.");
314  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
315  "Unexpected value type.");
316  bool IsZExt = isa<ZExtInst>(I);
317 
318  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
319  if (LI->hasOneUse())
320  return true;
321 
322  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
323  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
324  return true;
325 
326  return false;
327 }
328 
329 /// \brief Determine the implicit scale factor that is applied by a memory
330 /// operation for a given value type.
331 static unsigned getImplicitScaleFactor(MVT VT) {
332  switch (VT.SimpleTy) {
333  default:
334  return 0; // invalid
335  case MVT::i1: // fall-through
336  case MVT::i8:
337  return 1;
338  case MVT::i16:
339  return 2;
340  case MVT::i32: // fall-through
341  case MVT::f32:
342  return 4;
343  case MVT::i64: // fall-through
344  case MVT::f64:
345  return 8;
346  }
347 }
348 
349 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
350  if (CC == CallingConv::WebKit_JS)
351  return CC_AArch64_WebKit_JS;
352  if (CC == CallingConv::GHC)
353  return CC_AArch64_GHC;
354  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
355 }
356 
357 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
358  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
359  "Alloca should always return a pointer.");
360 
361  // Don't handle dynamic allocas.
362  if (!FuncInfo.StaticAllocaMap.count(AI))
363  return 0;
364 
366  FuncInfo.StaticAllocaMap.find(AI);
367 
368  if (SI != FuncInfo.StaticAllocaMap.end()) {
369  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
370  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
371  ResultReg)
372  .addFrameIndex(SI->second)
373  .addImm(0)
374  .addImm(0);
375  return ResultReg;
376  }
377 
378  return 0;
379 }
380 
381 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
382  if (VT > MVT::i64)
383  return 0;
384 
385  if (!CI->isZero())
386  return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
387 
388  // Create a copy from the zero register to materialize a "0" value.
389  const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
390  : &AArch64::GPR32RegClass;
391  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
392  unsigned ResultReg = createResultReg(RC);
393  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
394  ResultReg).addReg(ZeroReg, getKillRegState(true));
395  return ResultReg;
396 }
397 
398 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
399  // Positive zero (+0.0) has to be materialized with a fmov from the zero
400  // register, because the immediate version of fmov cannot encode zero.
401  if (CFP->isNullValue())
402  return fastMaterializeFloatZero(CFP);
403 
404  if (VT != MVT::f32 && VT != MVT::f64)
405  return 0;
406 
407  const APFloat Val = CFP->getValueAPF();
408  bool Is64Bit = (VT == MVT::f64);
409  // This checks to see if we can use FMOV instructions to materialize
410  // a constant, otherwise we have to materialize via the constant pool.
411  if (TLI.isFPImmLegal(Val, VT)) {
412  int Imm =
413  Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
414  assert((Imm != -1) && "Cannot encode floating-point constant.");
415  unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
416  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
417  }
418 
419  // For the MachO large code model materialize the FP constant in code.
420  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
421  unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
422  const TargetRegisterClass *RC = Is64Bit ?
423  &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
424 
425  unsigned TmpReg = createResultReg(RC);
426  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
427  .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
428 
429  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
430  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
431  TII.get(TargetOpcode::COPY), ResultReg)
432  .addReg(TmpReg, getKillRegState(true));
433 
434  return ResultReg;
435  }
436 
437  // Materialize via constant pool. MachineConstantPool wants an explicit
438  // alignment.
439  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
440  if (Align == 0)
441  Align = DL.getTypeAllocSize(CFP->getType());
442 
443  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
444  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
445  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
446  ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
447 
448  unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
449  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
450  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
451  .addReg(ADRPReg)
453  return ResultReg;
454 }
455 
456 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
457  // We can't handle thread-local variables quickly yet.
458  if (GV->isThreadLocal())
459  return 0;
460 
461  // MachO still uses GOT for large code-model accesses, but ELF requires
462  // movz/movk sequences, which FastISel doesn't handle yet.
463  if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
464  return 0;
465 
466  unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
467 
468  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
469  if (!DestEVT.isSimple())
470  return 0;
471 
472  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
473  unsigned ResultReg;
474 
475  if (OpFlags & AArch64II::MO_GOT) {
476  // ADRP + LDRX
477  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
478  ADRPReg)
479  .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
480 
481  ResultReg = createResultReg(&AArch64::GPR64RegClass);
482  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
483  ResultReg)
484  .addReg(ADRPReg)
485  .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
487  } else {
488  // ADRP + ADDX
489  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
490  ADRPReg)
491  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
492 
493  ResultReg = createResultReg(&AArch64::GPR64spRegClass);
494  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
495  ResultReg)
496  .addReg(ADRPReg)
498  .addImm(0);
499  }
500  return ResultReg;
501 }
502 
503 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
504  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
505 
506  // Only handle simple types.
507  if (!CEVT.isSimple())
508  return 0;
509  MVT VT = CEVT.getSimpleVT();
510 
511  if (const auto *CI = dyn_cast<ConstantInt>(C))
512  return materializeInt(CI, VT);
513  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
514  return materializeFP(CFP, VT);
515  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
516  return materializeGV(GV);
517 
518  return 0;
519 }
520 
521 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
522  assert(CFP->isNullValue() &&
523  "Floating-point constant is not a positive zero.");
524  MVT VT;
525  if (!isTypeLegal(CFP->getType(), VT))
526  return 0;
527 
528  if (VT != MVT::f32 && VT != MVT::f64)
529  return 0;
530 
531  bool Is64Bit = (VT == MVT::f64);
532  unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
533  unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
534  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
535 }
536 
537 /// \brief Check if the multiply is by a power-of-2 constant.
538 static bool isMulPowOf2(const Value *I) {
539  if (const auto *MI = dyn_cast<MulOperator>(I)) {
540  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
541  if (C->getValue().isPowerOf2())
542  return true;
543  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
544  if (C->getValue().isPowerOf2())
545  return true;
546  }
547  return false;
548 }
549 
550 // Computes the address to get to an object.
551 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
552 {
553  const User *U = nullptr;
554  unsigned Opcode = Instruction::UserOp1;
555  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
556  // Don't walk into other basic blocks unless the object is an alloca from
557  // another block, otherwise it may not have a virtual register assigned.
558  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
559  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
560  Opcode = I->getOpcode();
561  U = I;
562  }
563  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
564  Opcode = C->getOpcode();
565  U = C;
566  }
567 
568  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
569  if (Ty->getAddressSpace() > 255)
570  // Fast instruction selection doesn't support the special
571  // address spaces.
572  return false;
573 
574  switch (Opcode) {
575  default:
576  break;
577  case Instruction::BitCast:
578  // Look through bitcasts.
579  return computeAddress(U->getOperand(0), Addr, Ty);
580 
581  case Instruction::IntToPtr:
582  // Look past no-op inttoptrs.
583  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
584  TLI.getPointerTy(DL))
585  return computeAddress(U->getOperand(0), Addr, Ty);
586  break;
587 
588  case Instruction::PtrToInt:
589  // Look past no-op ptrtoints.
590  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
591  return computeAddress(U->getOperand(0), Addr, Ty);
592  break;
593 
594  case Instruction::GetElementPtr: {
595  Address SavedAddr = Addr;
596  uint64_t TmpOffset = Addr.getOffset();
597 
598  // Iterate through the GEP folding the constants into offsets where
599  // we can.
600  for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
601  GTI != E; ++GTI) {
602  const Value *Op = GTI.getOperand();
603  if (StructType *STy = GTI.getStructTypeOrNull()) {
604  const StructLayout *SL = DL.getStructLayout(STy);
605  unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
606  TmpOffset += SL->getElementOffset(Idx);
607  } else {
608  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
609  while (true) {
610  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
611  // Constant-offset addressing.
612  TmpOffset += CI->getSExtValue() * S;
613  break;
614  }
615  if (canFoldAddIntoGEP(U, Op)) {
616  // A compatible add with a constant operand. Fold the constant.
617  ConstantInt *CI =
618  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
619  TmpOffset += CI->getSExtValue() * S;
620  // Iterate on the other operand.
621  Op = cast<AddOperator>(Op)->getOperand(0);
622  continue;
623  }
624  // Unsupported
625  goto unsupported_gep;
626  }
627  }
628  }
629 
630  // Try to grab the base operand now.
631  Addr.setOffset(TmpOffset);
632  if (computeAddress(U->getOperand(0), Addr, Ty))
633  return true;
634 
635  // We failed, restore everything and try the other options.
636  Addr = SavedAddr;
637 
638  unsupported_gep:
639  break;
640  }
641  case Instruction::Alloca: {
642  const AllocaInst *AI = cast<AllocaInst>(Obj);
644  FuncInfo.StaticAllocaMap.find(AI);
645  if (SI != FuncInfo.StaticAllocaMap.end()) {
646  Addr.setKind(Address::FrameIndexBase);
647  Addr.setFI(SI->second);
648  return true;
649  }
650  break;
651  }
652  case Instruction::Add: {
653  // Adds of constants are common and easy enough.
654  const Value *LHS = U->getOperand(0);
655  const Value *RHS = U->getOperand(1);
656 
657  if (isa<ConstantInt>(LHS))
658  std::swap(LHS, RHS);
659 
660  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
661  Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
662  return computeAddress(LHS, Addr, Ty);
663  }
664 
665  Address Backup = Addr;
666  if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
667  return true;
668  Addr = Backup;
669 
670  break;
671  }
672  case Instruction::Sub: {
673  // Subs of constants are common and easy enough.
674  const Value *LHS = U->getOperand(0);
675  const Value *RHS = U->getOperand(1);
676 
677  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
678  Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
679  return computeAddress(LHS, Addr, Ty);
680  }
681  break;
682  }
683  case Instruction::Shl: {
684  if (Addr.getOffsetReg())
685  break;
686 
687  const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
688  if (!CI)
689  break;
690 
691  unsigned Val = CI->getZExtValue();
692  if (Val < 1 || Val > 3)
693  break;
694 
695  uint64_t NumBytes = 0;
696  if (Ty && Ty->isSized()) {
697  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
698  NumBytes = NumBits / 8;
699  if (!isPowerOf2_64(NumBits))
700  NumBytes = 0;
701  }
702 
703  if (NumBytes != (1ULL << Val))
704  break;
705 
706  Addr.setShift(Val);
707  Addr.setExtendType(AArch64_AM::LSL);
708 
709  const Value *Src = U->getOperand(0);
710  if (const auto *I = dyn_cast<Instruction>(Src)) {
711  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
712  // Fold the zext or sext when it won't become a noop.
713  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
714  if (!isIntExtFree(ZE) &&
715  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
716  Addr.setExtendType(AArch64_AM::UXTW);
717  Src = ZE->getOperand(0);
718  }
719  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
720  if (!isIntExtFree(SE) &&
721  SE->getOperand(0)->getType()->isIntegerTy(32)) {
722  Addr.setExtendType(AArch64_AM::SXTW);
723  Src = SE->getOperand(0);
724  }
725  }
726  }
727  }
728 
729  if (const auto *AI = dyn_cast<BinaryOperator>(Src))
730  if (AI->getOpcode() == Instruction::And) {
731  const Value *LHS = AI->getOperand(0);
732  const Value *RHS = AI->getOperand(1);
733 
734  if (const auto *C = dyn_cast<ConstantInt>(LHS))
735  if (C->getValue() == 0xffffffff)
736  std::swap(LHS, RHS);
737 
738  if (const auto *C = dyn_cast<ConstantInt>(RHS))
739  if (C->getValue() == 0xffffffff) {
740  Addr.setExtendType(AArch64_AM::UXTW);
741  unsigned Reg = getRegForValue(LHS);
742  if (!Reg)
743  return false;
744  bool RegIsKill = hasTrivialKill(LHS);
745  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
746  AArch64::sub_32);
747  Addr.setOffsetReg(Reg);
748  return true;
749  }
750  }
751 
752  unsigned Reg = getRegForValue(Src);
753  if (!Reg)
754  return false;
755  Addr.setOffsetReg(Reg);
756  return true;
757  }
758  case Instruction::Mul: {
759  if (Addr.getOffsetReg())
760  break;
761 
762  if (!isMulPowOf2(U))
763  break;
764 
765  const Value *LHS = U->getOperand(0);
766  const Value *RHS = U->getOperand(1);
767 
768  // Canonicalize power-of-2 value to the RHS.
769  if (const auto *C = dyn_cast<ConstantInt>(LHS))
770  if (C->getValue().isPowerOf2())
771  std::swap(LHS, RHS);
772 
773  assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
774  const auto *C = cast<ConstantInt>(RHS);
775  unsigned Val = C->getValue().logBase2();
776  if (Val < 1 || Val > 3)
777  break;
778 
779  uint64_t NumBytes = 0;
780  if (Ty && Ty->isSized()) {
781  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
782  NumBytes = NumBits / 8;
783  if (!isPowerOf2_64(NumBits))
784  NumBytes = 0;
785  }
786 
787  if (NumBytes != (1ULL << Val))
788  break;
789 
790  Addr.setShift(Val);
791  Addr.setExtendType(AArch64_AM::LSL);
792 
793  const Value *Src = LHS;
794  if (const auto *I = dyn_cast<Instruction>(Src)) {
795  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
796  // Fold the zext or sext when it won't become a noop.
797  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
798  if (!isIntExtFree(ZE) &&
799  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
800  Addr.setExtendType(AArch64_AM::UXTW);
801  Src = ZE->getOperand(0);
802  }
803  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
804  if (!isIntExtFree(SE) &&
805  SE->getOperand(0)->getType()->isIntegerTy(32)) {
806  Addr.setExtendType(AArch64_AM::SXTW);
807  Src = SE->getOperand(0);
808  }
809  }
810  }
811  }
812 
813  unsigned Reg = getRegForValue(Src);
814  if (!Reg)
815  return false;
816  Addr.setOffsetReg(Reg);
817  return true;
818  }
819  case Instruction::And: {
820  if (Addr.getOffsetReg())
821  break;
822 
823  if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
824  break;
825 
826  const Value *LHS = U->getOperand(0);
827  const Value *RHS = U->getOperand(1);
828 
829  if (const auto *C = dyn_cast<ConstantInt>(LHS))
830  if (C->getValue() == 0xffffffff)
831  std::swap(LHS, RHS);
832 
833  if (const auto *C = dyn_cast<ConstantInt>(RHS))
834  if (C->getValue() == 0xffffffff) {
835  Addr.setShift(0);
836  Addr.setExtendType(AArch64_AM::LSL);
837  Addr.setExtendType(AArch64_AM::UXTW);
838 
839  unsigned Reg = getRegForValue(LHS);
840  if (!Reg)
841  return false;
842  bool RegIsKill = hasTrivialKill(LHS);
843  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
844  AArch64::sub_32);
845  Addr.setOffsetReg(Reg);
846  return true;
847  }
848  break;
849  }
850  case Instruction::SExt:
851  case Instruction::ZExt: {
852  if (!Addr.getReg() || Addr.getOffsetReg())
853  break;
854 
855  const Value *Src = nullptr;
856  // Fold the zext or sext when it won't become a noop.
857  if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
858  if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
859  Addr.setExtendType(AArch64_AM::UXTW);
860  Src = ZE->getOperand(0);
861  }
862  } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
863  if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
864  Addr.setExtendType(AArch64_AM::SXTW);
865  Src = SE->getOperand(0);
866  }
867  }
868 
869  if (!Src)
870  break;
871 
872  Addr.setShift(0);
873  unsigned Reg = getRegForValue(Src);
874  if (!Reg)
875  return false;
876  Addr.setOffsetReg(Reg);
877  return true;
878  }
879  } // end switch
880 
881  if (Addr.isRegBase() && !Addr.getReg()) {
882  unsigned Reg = getRegForValue(Obj);
883  if (!Reg)
884  return false;
885  Addr.setReg(Reg);
886  return true;
887  }
888 
889  if (!Addr.getOffsetReg()) {
890  unsigned Reg = getRegForValue(Obj);
891  if (!Reg)
892  return false;
893  Addr.setOffsetReg(Reg);
894  return true;
895  }
896 
897  return false;
898 }
899 
900 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
901  const User *U = nullptr;
902  unsigned Opcode = Instruction::UserOp1;
903  bool InMBB = true;
904 
905  if (const auto *I = dyn_cast<Instruction>(V)) {
906  Opcode = I->getOpcode();
907  U = I;
908  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
909  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
910  Opcode = C->getOpcode();
911  U = C;
912  }
913 
914  switch (Opcode) {
915  default: break;
916  case Instruction::BitCast:
917  // Look past bitcasts if its operand is in the same BB.
918  if (InMBB)
919  return computeCallAddress(U->getOperand(0), Addr);
920  break;
921  case Instruction::IntToPtr:
922  // Look past no-op inttoptrs if its operand is in the same BB.
923  if (InMBB &&
924  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
925  TLI.getPointerTy(DL))
926  return computeCallAddress(U->getOperand(0), Addr);
927  break;
928  case Instruction::PtrToInt:
929  // Look past no-op ptrtoints if its operand is in the same BB.
930  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
931  return computeCallAddress(U->getOperand(0), Addr);
932  break;
933  }
934 
935  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
936  Addr.setGlobalValue(GV);
937  return true;
938  }
939 
940  // If all else fails, try to materialize the value in a register.
941  if (!Addr.getGlobalValue()) {
942  Addr.setReg(getRegForValue(V));
943  return Addr.getReg() != 0;
944  }
945 
946  return false;
947 }
948 
949 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
950  EVT evt = TLI.getValueType(DL, Ty, true);
951 
952  // Only handle simple types.
953  if (evt == MVT::Other || !evt.isSimple())
954  return false;
955  VT = evt.getSimpleVT();
956 
957  // This is a legal type, but it's not something we handle in fast-isel.
958  if (VT == MVT::f128)
959  return false;
960 
961  // Handle all other legal types, i.e. a register that will directly hold this
962  // value.
963  return TLI.isTypeLegal(VT);
964 }
965 
966 /// \brief Determine if the value type is supported by FastISel.
967 ///
968 /// FastISel for AArch64 can handle more value types than are legal. This adds
969 /// simple value type such as i1, i8, and i16.
970 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
971  if (Ty->isVectorTy() && !IsVectorAllowed)
972  return false;
973 
974  if (isTypeLegal(Ty, VT))
975  return true;
976 
977  // If this is a type than can be sign or zero-extended to a basic operation
978  // go ahead and accept it now.
979  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
980  return true;
981 
982  return false;
983 }
984 
985 bool AArch64FastISel::isValueAvailable(const Value *V) const {
986  if (!isa<Instruction>(V))
987  return true;
988 
989  const auto *I = cast<Instruction>(V);
990  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
991 }
992 
993 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
994  unsigned ScaleFactor = getImplicitScaleFactor(VT);
995  if (!ScaleFactor)
996  return false;
997 
998  bool ImmediateOffsetNeedsLowering = false;
999  bool RegisterOffsetNeedsLowering = false;
1000  int64_t Offset = Addr.getOffset();
1001  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1002  ImmediateOffsetNeedsLowering = true;
1003  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1004  !isUInt<12>(Offset / ScaleFactor))
1005  ImmediateOffsetNeedsLowering = true;
1006 
1007  // Cannot encode an offset register and an immediate offset in the same
1008  // instruction. Fold the immediate offset into the load/store instruction and
1009  // emit an additional add to take care of the offset register.
1010  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1011  RegisterOffsetNeedsLowering = true;
1012 
1013  // Cannot encode zero register as base.
1014  if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1015  RegisterOffsetNeedsLowering = true;
1016 
1017  // If this is a stack pointer and the offset needs to be simplified then put
1018  // the alloca address into a register, set the base type back to register and
1019  // continue. This should almost never happen.
1020  if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1021  {
1022  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1023  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1024  ResultReg)
1025  .addFrameIndex(Addr.getFI())
1026  .addImm(0)
1027  .addImm(0);
1028  Addr.setKind(Address::RegBase);
1029  Addr.setReg(ResultReg);
1030  }
1031 
1032  if (RegisterOffsetNeedsLowering) {
1033  unsigned ResultReg = 0;
1034  if (Addr.getReg()) {
1035  if (Addr.getExtendType() == AArch64_AM::SXTW ||
1036  Addr.getExtendType() == AArch64_AM::UXTW )
1037  ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1038  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1039  /*TODO:IsKill=*/false, Addr.getExtendType(),
1040  Addr.getShift());
1041  else
1042  ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1043  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1044  /*TODO:IsKill=*/false, AArch64_AM::LSL,
1045  Addr.getShift());
1046  } else {
1047  if (Addr.getExtendType() == AArch64_AM::UXTW)
1048  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1049  /*Op0IsKill=*/false, Addr.getShift(),
1050  /*IsZExt=*/true);
1051  else if (Addr.getExtendType() == AArch64_AM::SXTW)
1052  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1053  /*Op0IsKill=*/false, Addr.getShift(),
1054  /*IsZExt=*/false);
1055  else
1056  ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1057  /*Op0IsKill=*/false, Addr.getShift());
1058  }
1059  if (!ResultReg)
1060  return false;
1061 
1062  Addr.setReg(ResultReg);
1063  Addr.setOffsetReg(0);
1064  Addr.setShift(0);
1065  Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1066  }
1067 
1068  // Since the offset is too large for the load/store instruction get the
1069  // reg+offset into a register.
1070  if (ImmediateOffsetNeedsLowering) {
1071  unsigned ResultReg;
1072  if (Addr.getReg())
1073  // Try to fold the immediate into the add instruction.
1074  ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1075  else
1076  ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1077 
1078  if (!ResultReg)
1079  return false;
1080  Addr.setReg(ResultReg);
1081  Addr.setOffset(0);
1082  }
1083  return true;
1084 }
1085 
1086 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1087  const MachineInstrBuilder &MIB,
1089  unsigned ScaleFactor,
1090  MachineMemOperand *MMO) {
1091  int64_t Offset = Addr.getOffset() / ScaleFactor;
1092  // Frame base works a bit differently. Handle it separately.
1093  if (Addr.isFIBase()) {
1094  int FI = Addr.getFI();
1095  // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1096  // and alignment should be based on the VT.
1097  MMO = FuncInfo.MF->getMachineMemOperand(
1098  MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1099  MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1100  // Now add the rest of the operands.
1101  MIB.addFrameIndex(FI).addImm(Offset);
1102  } else {
1103  assert(Addr.isRegBase() && "Unexpected address kind.");
1104  const MCInstrDesc &II = MIB->getDesc();
1105  unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1106  Addr.setReg(
1107  constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1108  Addr.setOffsetReg(
1109  constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1110  if (Addr.getOffsetReg()) {
1111  assert(Addr.getOffset() == 0 && "Unexpected offset");
1112  bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1113  Addr.getExtendType() == AArch64_AM::SXTX;
1114  MIB.addReg(Addr.getReg());
1115  MIB.addReg(Addr.getOffsetReg());
1116  MIB.addImm(IsSigned);
1117  MIB.addImm(Addr.getShift() != 0);
1118  } else
1119  MIB.addReg(Addr.getReg()).addImm(Offset);
1120  }
1121 
1122  if (MMO)
1123  MIB.addMemOperand(MMO);
1124 }
1125 
1126 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1127  const Value *RHS, bool SetFlags,
1128  bool WantResult, bool IsZExt) {
1130  bool NeedExtend = false;
1131  switch (RetVT.SimpleTy) {
1132  default:
1133  return 0;
1134  case MVT::i1:
1135  NeedExtend = true;
1136  break;
1137  case MVT::i8:
1138  NeedExtend = true;
1139  ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1140  break;
1141  case MVT::i16:
1142  NeedExtend = true;
1143  ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1144  break;
1145  case MVT::i32: // fall-through
1146  case MVT::i64:
1147  break;
1148  }
1149  MVT SrcVT = RetVT;
1150  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1151 
1152  // Canonicalize immediates to the RHS first.
1153  if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1154  std::swap(LHS, RHS);
1155 
1156  // Canonicalize mul by power of 2 to the RHS.
1157  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1158  if (isMulPowOf2(LHS))
1159  std::swap(LHS, RHS);
1160 
1161  // Canonicalize shift immediate to the RHS.
1162  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1163  if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1164  if (isa<ConstantInt>(SI->getOperand(1)))
1165  if (SI->getOpcode() == Instruction::Shl ||
1166  SI->getOpcode() == Instruction::LShr ||
1167  SI->getOpcode() == Instruction::AShr )
1168  std::swap(LHS, RHS);
1169 
1170  unsigned LHSReg = getRegForValue(LHS);
1171  if (!LHSReg)
1172  return 0;
1173  bool LHSIsKill = hasTrivialKill(LHS);
1174 
1175  if (NeedExtend)
1176  LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1177 
1178  unsigned ResultReg = 0;
1179  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1180  uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1181  if (C->isNegative())
1182  ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1183  SetFlags, WantResult);
1184  else
1185  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1186  WantResult);
1187  } else if (const auto *C = dyn_cast<Constant>(RHS))
1188  if (C->isNullValue())
1189  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1190  WantResult);
1191 
1192  if (ResultReg)
1193  return ResultReg;
1194 
1195  // Only extend the RHS within the instruction if there is a valid extend type.
1196  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1197  isValueAvailable(RHS)) {
1198  if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1199  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1200  if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1201  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1202  if (!RHSReg)
1203  return 0;
1204  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1205  return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1206  RHSIsKill, ExtendType, C->getZExtValue(),
1207  SetFlags, WantResult);
1208  }
1209  unsigned RHSReg = getRegForValue(RHS);
1210  if (!RHSReg)
1211  return 0;
1212  bool RHSIsKill = hasTrivialKill(RHS);
1213  return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1214  ExtendType, 0, SetFlags, WantResult);
1215  }
1216 
1217  // Check if the mul can be folded into the instruction.
1218  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1219  if (isMulPowOf2(RHS)) {
1220  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1221  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1222 
1223  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1224  if (C->getValue().isPowerOf2())
1225  std::swap(MulLHS, MulRHS);
1226 
1227  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1228  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1229  unsigned RHSReg = getRegForValue(MulLHS);
1230  if (!RHSReg)
1231  return 0;
1232  bool RHSIsKill = hasTrivialKill(MulLHS);
1233  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1234  RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1235  WantResult);
1236  if (ResultReg)
1237  return ResultReg;
1238  }
1239  }
1240 
1241  // Check if the shift can be folded into the instruction.
1242  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1243  if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1244  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1246  switch (SI->getOpcode()) {
1247  default: break;
1248  case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1249  case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1250  case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1251  }
1252  uint64_t ShiftVal = C->getZExtValue();
1253  if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1254  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1255  if (!RHSReg)
1256  return 0;
1257  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1258  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1259  RHSIsKill, ShiftType, ShiftVal, SetFlags,
1260  WantResult);
1261  if (ResultReg)
1262  return ResultReg;
1263  }
1264  }
1265  }
1266  }
1267 
1268  unsigned RHSReg = getRegForValue(RHS);
1269  if (!RHSReg)
1270  return 0;
1271  bool RHSIsKill = hasTrivialKill(RHS);
1272 
1273  if (NeedExtend)
1274  RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1275 
1276  return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1277  SetFlags, WantResult);
1278 }
1279 
1280 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1281  bool LHSIsKill, unsigned RHSReg,
1282  bool RHSIsKill, bool SetFlags,
1283  bool WantResult) {
1284  assert(LHSReg && RHSReg && "Invalid register number.");
1285 
1286  if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1287  RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1288  return 0;
1289 
1290  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1291  return 0;
1292 
1293  static const unsigned OpcTable[2][2][2] = {
1294  { { AArch64::SUBWrr, AArch64::SUBXrr },
1295  { AArch64::ADDWrr, AArch64::ADDXrr } },
1296  { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1297  { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1298  };
1299  bool Is64Bit = RetVT == MVT::i64;
1300  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1301  const TargetRegisterClass *RC =
1302  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1303  unsigned ResultReg;
1304  if (WantResult)
1305  ResultReg = createResultReg(RC);
1306  else
1307  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1308 
1309  const MCInstrDesc &II = TII.get(Opc);
1310  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1311  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1312  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1313  .addReg(LHSReg, getKillRegState(LHSIsKill))
1314  .addReg(RHSReg, getKillRegState(RHSIsKill));
1315  return ResultReg;
1316 }
1317 
1318 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1319  bool LHSIsKill, uint64_t Imm,
1320  bool SetFlags, bool WantResult) {
1321  assert(LHSReg && "Invalid register number.");
1322 
1323  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1324  return 0;
1325 
1326  unsigned ShiftImm;
1327  if (isUInt<12>(Imm))
1328  ShiftImm = 0;
1329  else if ((Imm & 0xfff000) == Imm) {
1330  ShiftImm = 12;
1331  Imm >>= 12;
1332  } else
1333  return 0;
1334 
1335  static const unsigned OpcTable[2][2][2] = {
1336  { { AArch64::SUBWri, AArch64::SUBXri },
1337  { AArch64::ADDWri, AArch64::ADDXri } },
1338  { { AArch64::SUBSWri, AArch64::SUBSXri },
1339  { AArch64::ADDSWri, AArch64::ADDSXri } }
1340  };
1341  bool Is64Bit = RetVT == MVT::i64;
1342  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1343  const TargetRegisterClass *RC;
1344  if (SetFlags)
1345  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1346  else
1347  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1348  unsigned ResultReg;
1349  if (WantResult)
1350  ResultReg = createResultReg(RC);
1351  else
1352  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1353 
1354  const MCInstrDesc &II = TII.get(Opc);
1355  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1356  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1357  .addReg(LHSReg, getKillRegState(LHSIsKill))
1358  .addImm(Imm)
1359  .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1360  return ResultReg;
1361 }
1362 
1363 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1364  bool LHSIsKill, unsigned RHSReg,
1365  bool RHSIsKill,
1366  AArch64_AM::ShiftExtendType ShiftType,
1367  uint64_t ShiftImm, bool SetFlags,
1368  bool WantResult) {
1369  assert(LHSReg && RHSReg && "Invalid register number.");
1370  assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1371  RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1372 
1373  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1374  return 0;
1375 
1376  // Don't deal with undefined shifts.
1377  if (ShiftImm >= RetVT.getSizeInBits())
1378  return 0;
1379 
1380  static const unsigned OpcTable[2][2][2] = {
1381  { { AArch64::SUBWrs, AArch64::SUBXrs },
1382  { AArch64::ADDWrs, AArch64::ADDXrs } },
1383  { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1384  { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1385  };
1386  bool Is64Bit = RetVT == MVT::i64;
1387  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1388  const TargetRegisterClass *RC =
1389  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1390  unsigned ResultReg;
1391  if (WantResult)
1392  ResultReg = createResultReg(RC);
1393  else
1394  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1395 
1396  const MCInstrDesc &II = TII.get(Opc);
1397  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1398  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1399  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1400  .addReg(LHSReg, getKillRegState(LHSIsKill))
1401  .addReg(RHSReg, getKillRegState(RHSIsKill))
1402  .addImm(getShifterImm(ShiftType, ShiftImm));
1403  return ResultReg;
1404 }
1405 
1406 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1407  bool LHSIsKill, unsigned RHSReg,
1408  bool RHSIsKill,
1410  uint64_t ShiftImm, bool SetFlags,
1411  bool WantResult) {
1412  assert(LHSReg && RHSReg && "Invalid register number.");
1413  assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1414  RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1415 
1416  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1417  return 0;
1418 
1419  if (ShiftImm >= 4)
1420  return 0;
1421 
1422  static const unsigned OpcTable[2][2][2] = {
1423  { { AArch64::SUBWrx, AArch64::SUBXrx },
1424  { AArch64::ADDWrx, AArch64::ADDXrx } },
1425  { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1426  { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1427  };
1428  bool Is64Bit = RetVT == MVT::i64;
1429  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1430  const TargetRegisterClass *RC = nullptr;
1431  if (SetFlags)
1432  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1433  else
1434  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1435  unsigned ResultReg;
1436  if (WantResult)
1437  ResultReg = createResultReg(RC);
1438  else
1439  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1440 
1441  const MCInstrDesc &II = TII.get(Opc);
1442  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1443  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1444  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1445  .addReg(LHSReg, getKillRegState(LHSIsKill))
1446  .addReg(RHSReg, getKillRegState(RHSIsKill))
1447  .addImm(getArithExtendImm(ExtType, ShiftImm));
1448  return ResultReg;
1449 }
1450 
1451 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1452  Type *Ty = LHS->getType();
1453  EVT EVT = TLI.getValueType(DL, Ty, true);
1454  if (!EVT.isSimple())
1455  return false;
1456  MVT VT = EVT.getSimpleVT();
1457 
1458  switch (VT.SimpleTy) {
1459  default:
1460  return false;
1461  case MVT::i1:
1462  case MVT::i8:
1463  case MVT::i16:
1464  case MVT::i32:
1465  case MVT::i64:
1466  return emitICmp(VT, LHS, RHS, IsZExt);
1467  case MVT::f32:
1468  case MVT::f64:
1469  return emitFCmp(VT, LHS, RHS);
1470  }
1471 }
1472 
1473 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1474  bool IsZExt) {
1475  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1476  IsZExt) != 0;
1477 }
1478 
1479 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1480  uint64_t Imm) {
1481  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1482  /*SetFlags=*/true, /*WantResult=*/false) != 0;
1483 }
1484 
1485 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1486  if (RetVT != MVT::f32 && RetVT != MVT::f64)
1487  return false;
1488 
1489  // Check to see if the 2nd operand is a constant that we can encode directly
1490  // in the compare.
1491  bool UseImm = false;
1492  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1493  if (CFP->isZero() && !CFP->isNegative())
1494  UseImm = true;
1495 
1496  unsigned LHSReg = getRegForValue(LHS);
1497  if (!LHSReg)
1498  return false;
1499  bool LHSIsKill = hasTrivialKill(LHS);
1500 
1501  if (UseImm) {
1502  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1503  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1504  .addReg(LHSReg, getKillRegState(LHSIsKill));
1505  return true;
1506  }
1507 
1508  unsigned RHSReg = getRegForValue(RHS);
1509  if (!RHSReg)
1510  return false;
1511  bool RHSIsKill = hasTrivialKill(RHS);
1512 
1513  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1514  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1515  .addReg(LHSReg, getKillRegState(LHSIsKill))
1516  .addReg(RHSReg, getKillRegState(RHSIsKill));
1517  return true;
1518 }
1519 
1520 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1521  bool SetFlags, bool WantResult, bool IsZExt) {
1522  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1523  IsZExt);
1524 }
1525 
1526 /// \brief This method is a wrapper to simplify add emission.
1527 ///
1528 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1529 /// that fails, then try to materialize the immediate into a register and use
1530 /// emitAddSub_rr instead.
1531 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1532  int64_t Imm) {
1533  unsigned ResultReg;
1534  if (Imm < 0)
1535  ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1536  else
1537  ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1538 
1539  if (ResultReg)
1540  return ResultReg;
1541 
1542  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1543  if (!CReg)
1544  return 0;
1545 
1546  ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1547  return ResultReg;
1548 }
1549 
1550 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1551  bool SetFlags, bool WantResult, bool IsZExt) {
1552  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1553  IsZExt);
1554 }
1555 
1556 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1557  bool LHSIsKill, unsigned RHSReg,
1558  bool RHSIsKill, bool WantResult) {
1559  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1560  RHSIsKill, /*SetFlags=*/true, WantResult);
1561 }
1562 
1563 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1564  bool LHSIsKill, unsigned RHSReg,
1565  bool RHSIsKill,
1566  AArch64_AM::ShiftExtendType ShiftType,
1567  uint64_t ShiftImm, bool WantResult) {
1568  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1569  RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1570  WantResult);
1571 }
1572 
1573 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1574  const Value *LHS, const Value *RHS) {
1575  // Canonicalize immediates to the RHS first.
1576  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1577  std::swap(LHS, RHS);
1578 
1579  // Canonicalize mul by power-of-2 to the RHS.
1580  if (LHS->hasOneUse() && isValueAvailable(LHS))
1581  if (isMulPowOf2(LHS))
1582  std::swap(LHS, RHS);
1583 
1584  // Canonicalize shift immediate to the RHS.
1585  if (LHS->hasOneUse() && isValueAvailable(LHS))
1586  if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1587  if (isa<ConstantInt>(SI->getOperand(1)))
1588  std::swap(LHS, RHS);
1589 
1590  unsigned LHSReg = getRegForValue(LHS);
1591  if (!LHSReg)
1592  return 0;
1593  bool LHSIsKill = hasTrivialKill(LHS);
1594 
1595  unsigned ResultReg = 0;
1596  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1597  uint64_t Imm = C->getZExtValue();
1598  ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1599  }
1600  if (ResultReg)
1601  return ResultReg;
1602 
1603  // Check if the mul can be folded into the instruction.
1604  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1605  if (isMulPowOf2(RHS)) {
1606  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1607  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1608 
1609  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1610  if (C->getValue().isPowerOf2())
1611  std::swap(MulLHS, MulRHS);
1612 
1613  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1614  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1615 
1616  unsigned RHSReg = getRegForValue(MulLHS);
1617  if (!RHSReg)
1618  return 0;
1619  bool RHSIsKill = hasTrivialKill(MulLHS);
1620  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1621  RHSIsKill, ShiftVal);
1622  if (ResultReg)
1623  return ResultReg;
1624  }
1625  }
1626 
1627  // Check if the shift can be folded into the instruction.
1628  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1629  if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1630  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1631  uint64_t ShiftVal = C->getZExtValue();
1632  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1633  if (!RHSReg)
1634  return 0;
1635  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1636  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1637  RHSIsKill, ShiftVal);
1638  if (ResultReg)
1639  return ResultReg;
1640  }
1641  }
1642 
1643  unsigned RHSReg = getRegForValue(RHS);
1644  if (!RHSReg)
1645  return 0;
1646  bool RHSIsKill = hasTrivialKill(RHS);
1647 
1648  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1649  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1650  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1651  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1652  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1653  }
1654  return ResultReg;
1655 }
1656 
1657 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1658  unsigned LHSReg, bool LHSIsKill,
1659  uint64_t Imm) {
1660  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1661  "ISD nodes are not consecutive!");
1662  static const unsigned OpcTable[3][2] = {
1663  { AArch64::ANDWri, AArch64::ANDXri },
1664  { AArch64::ORRWri, AArch64::ORRXri },
1665  { AArch64::EORWri, AArch64::EORXri }
1666  };
1667  const TargetRegisterClass *RC;
1668  unsigned Opc;
1669  unsigned RegSize;
1670  switch (RetVT.SimpleTy) {
1671  default:
1672  return 0;
1673  case MVT::i1:
1674  case MVT::i8:
1675  case MVT::i16:
1676  case MVT::i32: {
1677  unsigned Idx = ISDOpc - ISD::AND;
1678  Opc = OpcTable[Idx][0];
1679  RC = &AArch64::GPR32spRegClass;
1680  RegSize = 32;
1681  break;
1682  }
1683  case MVT::i64:
1684  Opc = OpcTable[ISDOpc - ISD::AND][1];
1685  RC = &AArch64::GPR64spRegClass;
1686  RegSize = 64;
1687  break;
1688  }
1689 
1690  if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1691  return 0;
1692 
1693  unsigned ResultReg =
1694  fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1695  AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1696  if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1697  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1698  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1699  }
1700  return ResultReg;
1701 }
1702 
1703 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1704  unsigned LHSReg, bool LHSIsKill,
1705  unsigned RHSReg, bool RHSIsKill,
1706  uint64_t ShiftImm) {
1707  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1708  "ISD nodes are not consecutive!");
1709  static const unsigned OpcTable[3][2] = {
1710  { AArch64::ANDWrs, AArch64::ANDXrs },
1711  { AArch64::ORRWrs, AArch64::ORRXrs },
1712  { AArch64::EORWrs, AArch64::EORXrs }
1713  };
1714 
1715  // Don't deal with undefined shifts.
1716  if (ShiftImm >= RetVT.getSizeInBits())
1717  return 0;
1718 
1719  const TargetRegisterClass *RC;
1720  unsigned Opc;
1721  switch (RetVT.SimpleTy) {
1722  default:
1723  return 0;
1724  case MVT::i1:
1725  case MVT::i8:
1726  case MVT::i16:
1727  case MVT::i32:
1728  Opc = OpcTable[ISDOpc - ISD::AND][0];
1729  RC = &AArch64::GPR32RegClass;
1730  break;
1731  case MVT::i64:
1732  Opc = OpcTable[ISDOpc - ISD::AND][1];
1733  RC = &AArch64::GPR64RegClass;
1734  break;
1735  }
1736  unsigned ResultReg =
1737  fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1739  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1740  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1741  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1742  }
1743  return ResultReg;
1744 }
1745 
1746 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1747  uint64_t Imm) {
1748  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1749 }
1750 
1751 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1752  bool WantZExt, MachineMemOperand *MMO) {
1753  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1754  return 0;
1755 
1756  // Simplify this down to something we can handle.
1757  if (!simplifyAddress(Addr, VT))
1758  return 0;
1759 
1760  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1761  if (!ScaleFactor)
1762  llvm_unreachable("Unexpected value type.");
1763 
1764  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1765  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1766  bool UseScaled = true;
1767  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1768  UseScaled = false;
1769  ScaleFactor = 1;
1770  }
1771 
1772  static const unsigned GPOpcTable[2][8][4] = {
1773  // Sign-extend.
1774  { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1775  AArch64::LDURXi },
1776  { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1777  AArch64::LDURXi },
1778  { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1779  AArch64::LDRXui },
1780  { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1781  AArch64::LDRXui },
1782  { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1783  AArch64::LDRXroX },
1784  { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1785  AArch64::LDRXroX },
1786  { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1787  AArch64::LDRXroW },
1788  { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1789  AArch64::LDRXroW }
1790  },
1791  // Zero-extend.
1792  { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1793  AArch64::LDURXi },
1794  { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1795  AArch64::LDURXi },
1796  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1797  AArch64::LDRXui },
1798  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1799  AArch64::LDRXui },
1800  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1801  AArch64::LDRXroX },
1802  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1803  AArch64::LDRXroX },
1804  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1805  AArch64::LDRXroW },
1806  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1807  AArch64::LDRXroW }
1808  }
1809  };
1810 
1811  static const unsigned FPOpcTable[4][2] = {
1812  { AArch64::LDURSi, AArch64::LDURDi },
1813  { AArch64::LDRSui, AArch64::LDRDui },
1814  { AArch64::LDRSroX, AArch64::LDRDroX },
1815  { AArch64::LDRSroW, AArch64::LDRDroW }
1816  };
1817 
1818  unsigned Opc;
1819  const TargetRegisterClass *RC;
1820  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1821  Addr.getOffsetReg();
1822  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1823  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1824  Addr.getExtendType() == AArch64_AM::SXTW)
1825  Idx++;
1826 
1827  bool IsRet64Bit = RetVT == MVT::i64;
1828  switch (VT.SimpleTy) {
1829  default:
1830  llvm_unreachable("Unexpected value type.");
1831  case MVT::i1: // Intentional fall-through.
1832  case MVT::i8:
1833  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1834  RC = (IsRet64Bit && !WantZExt) ?
1835  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1836  break;
1837  case MVT::i16:
1838  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1839  RC = (IsRet64Bit && !WantZExt) ?
1840  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1841  break;
1842  case MVT::i32:
1843  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1844  RC = (IsRet64Bit && !WantZExt) ?
1845  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1846  break;
1847  case MVT::i64:
1848  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1849  RC = &AArch64::GPR64RegClass;
1850  break;
1851  case MVT::f32:
1852  Opc = FPOpcTable[Idx][0];
1853  RC = &AArch64::FPR32RegClass;
1854  break;
1855  case MVT::f64:
1856  Opc = FPOpcTable[Idx][1];
1857  RC = &AArch64::FPR64RegClass;
1858  break;
1859  }
1860 
1861  // Create the base instruction, then add the operands.
1862  unsigned ResultReg = createResultReg(RC);
1863  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1864  TII.get(Opc), ResultReg);
1865  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1866 
1867  // Loading an i1 requires special handling.
1868  if (VT == MVT::i1) {
1869  unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1870  assert(ANDReg && "Unexpected AND instruction emission failure.");
1871  ResultReg = ANDReg;
1872  }
1873 
1874  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1875  // the 32bit reg to a 64bit reg.
1876  if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1877  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1878  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1879  TII.get(AArch64::SUBREG_TO_REG), Reg64)
1880  .addImm(0)
1881  .addReg(ResultReg, getKillRegState(true))
1882  .addImm(AArch64::sub_32);
1883  ResultReg = Reg64;
1884  }
1885  return ResultReg;
1886 }
1887 
1888 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1889  MVT VT;
1890  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1891  return false;
1892 
1893  if (VT.isVector())
1894  return selectOperator(I, I->getOpcode());
1895 
1896  unsigned ResultReg;
1897  switch (I->getOpcode()) {
1898  default:
1899  llvm_unreachable("Unexpected instruction.");
1900  case Instruction::Add:
1901  ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1902  break;
1903  case Instruction::Sub:
1904  ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1905  break;
1906  }
1907  if (!ResultReg)
1908  return false;
1909 
1910  updateValueMap(I, ResultReg);
1911  return true;
1912 }
1913 
1914 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1915  MVT VT;
1916  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1917  return false;
1918 
1919  if (VT.isVector())
1920  return selectOperator(I, I->getOpcode());
1921 
1922  unsigned ResultReg;
1923  switch (I->getOpcode()) {
1924  default:
1925  llvm_unreachable("Unexpected instruction.");
1926  case Instruction::And:
1927  ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1928  break;
1929  case Instruction::Or:
1930  ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1931  break;
1932  case Instruction::Xor:
1933  ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1934  break;
1935  }
1936  if (!ResultReg)
1937  return false;
1938 
1939  updateValueMap(I, ResultReg);
1940  return true;
1941 }
1942 
1943 bool AArch64FastISel::selectLoad(const Instruction *I) {
1944  MVT VT;
1945  // Verify we have a legal type before going any further. Currently, we handle
1946  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1947  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1948  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1949  cast<LoadInst>(I)->isAtomic())
1950  return false;
1951 
1952  const Value *SV = I->getOperand(0);
1953  if (TLI.supportSwiftError()) {
1954  // Swifterror values can come from either a function parameter with
1955  // swifterror attribute or an alloca with swifterror attribute.
1956  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1957  if (Arg->hasSwiftErrorAttr())
1958  return false;
1959  }
1960 
1961  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1962  if (Alloca->isSwiftError())
1963  return false;
1964  }
1965  }
1966 
1967  // See if we can handle this address.
1968  Address Addr;
1969  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1970  return false;
1971 
1972  // Fold the following sign-/zero-extend into the load instruction.
1973  bool WantZExt = true;
1974  MVT RetVT = VT;
1975  const Value *IntExtVal = nullptr;
1976  if (I->hasOneUse()) {
1977  if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1978  if (isTypeSupported(ZE->getType(), RetVT))
1979  IntExtVal = ZE;
1980  else
1981  RetVT = VT;
1982  } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1983  if (isTypeSupported(SE->getType(), RetVT))
1984  IntExtVal = SE;
1985  else
1986  RetVT = VT;
1987  WantZExt = false;
1988  }
1989  }
1990 
1991  unsigned ResultReg =
1992  emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1993  if (!ResultReg)
1994  return false;
1995 
1996  // There are a few different cases we have to handle, because the load or the
1997  // sign-/zero-extend might not be selected by FastISel if we fall-back to
1998  // SelectionDAG. There is also an ordering issue when both instructions are in
1999  // different basic blocks.
2000  // 1.) The load instruction is selected by FastISel, but the integer extend
2001  // not. This usually happens when the integer extend is in a different
2002  // basic block and SelectionDAG took over for that basic block.
2003  // 2.) The load instruction is selected before the integer extend. This only
2004  // happens when the integer extend is in a different basic block.
2005  // 3.) The load instruction is selected by SelectionDAG and the integer extend
2006  // by FastISel. This happens if there are instructions between the load
2007  // and the integer extend that couldn't be selected by FastISel.
2008  if (IntExtVal) {
2009  // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2010  // could select it. Emit a copy to subreg if necessary. FastISel will remove
2011  // it when it selects the integer extend.
2012  unsigned Reg = lookUpRegForValue(IntExtVal);
2013  auto *MI = MRI.getUniqueVRegDef(Reg);
2014  if (!MI) {
2015  if (RetVT == MVT::i64 && VT <= MVT::i32) {
2016  if (WantZExt) {
2017  // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2018  std::prev(FuncInfo.InsertPt)->eraseFromParent();
2019  ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
2020  } else
2021  ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2022  /*IsKill=*/true,
2023  AArch64::sub_32);
2024  }
2025  updateValueMap(I, ResultReg);
2026  return true;
2027  }
2028 
2029  // The integer extend has already been emitted - delete all the instructions
2030  // that have been emitted by the integer extend lowering code and use the
2031  // result from the load instruction directly.
2032  while (MI) {
2033  Reg = 0;
2034  for (auto &Opnd : MI->uses()) {
2035  if (Opnd.isReg()) {
2036  Reg = Opnd.getReg();
2037  break;
2038  }
2039  }
2040  MI->eraseFromParent();
2041  MI = nullptr;
2042  if (Reg)
2043  MI = MRI.getUniqueVRegDef(Reg);
2044  }
2045  updateValueMap(IntExtVal, ResultReg);
2046  return true;
2047  }
2048 
2049  updateValueMap(I, ResultReg);
2050  return true;
2051 }
2052 
2053 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2054  unsigned AddrReg,
2055  MachineMemOperand *MMO) {
2056  unsigned Opc;
2057  switch (VT.SimpleTy) {
2058  default: return false;
2059  case MVT::i8: Opc = AArch64::STLRB; break;
2060  case MVT::i16: Opc = AArch64::STLRH; break;
2061  case MVT::i32: Opc = AArch64::STLRW; break;
2062  case MVT::i64: Opc = AArch64::STLRX; break;
2063  }
2064 
2065  const MCInstrDesc &II = TII.get(Opc);
2066  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2067  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2068  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2069  .addReg(SrcReg)
2070  .addReg(AddrReg)
2071  .addMemOperand(MMO);
2072  return true;
2073 }
2074 
2075 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2076  MachineMemOperand *MMO) {
2077  if (!TLI.allowsMisalignedMemoryAccesses(VT))
2078  return false;
2079 
2080  // Simplify this down to something we can handle.
2081  if (!simplifyAddress(Addr, VT))
2082  return false;
2083 
2084  unsigned ScaleFactor = getImplicitScaleFactor(VT);
2085  if (!ScaleFactor)
2086  llvm_unreachable("Unexpected value type.");
2087 
2088  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2089  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2090  bool UseScaled = true;
2091  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2092  UseScaled = false;
2093  ScaleFactor = 1;
2094  }
2095 
2096  static const unsigned OpcTable[4][6] = {
2097  { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2098  AArch64::STURSi, AArch64::STURDi },
2099  { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2100  AArch64::STRSui, AArch64::STRDui },
2101  { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2102  AArch64::STRSroX, AArch64::STRDroX },
2103  { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2104  AArch64::STRSroW, AArch64::STRDroW }
2105  };
2106 
2107  unsigned Opc;
2108  bool VTIsi1 = false;
2109  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2110  Addr.getOffsetReg();
2111  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2112  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2113  Addr.getExtendType() == AArch64_AM::SXTW)
2114  Idx++;
2115 
2116  switch (VT.SimpleTy) {
2117  default: llvm_unreachable("Unexpected value type.");
2118  case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH;
2119  case MVT::i8: Opc = OpcTable[Idx][0]; break;
2120  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2121  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2122  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2123  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2124  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2125  }
2126 
2127  // Storing an i1 requires special handling.
2128  if (VTIsi1 && SrcReg != AArch64::WZR) {
2129  unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2130  assert(ANDReg && "Unexpected AND instruction emission failure.");
2131  SrcReg = ANDReg;
2132  }
2133  // Create the base instruction, then add the operands.
2134  const MCInstrDesc &II = TII.get(Opc);
2135  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2136  MachineInstrBuilder MIB =
2137  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2138  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2139 
2140  return true;
2141 }
2142 
2143 bool AArch64FastISel::selectStore(const Instruction *I) {
2144  MVT VT;
2145  const Value *Op0 = I->getOperand(0);
2146  // Verify we have a legal type before going any further. Currently, we handle
2147  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2148  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2149  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2150  return false;
2151 
2152  const Value *PtrV = I->getOperand(1);
2153  if (TLI.supportSwiftError()) {
2154  // Swifterror values can come from either a function parameter with
2155  // swifterror attribute or an alloca with swifterror attribute.
2156  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2157  if (Arg->hasSwiftErrorAttr())
2158  return false;
2159  }
2160 
2161  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2162  if (Alloca->isSwiftError())
2163  return false;
2164  }
2165  }
2166 
2167  // Get the value to be stored into a register. Use the zero register directly
2168  // when possible to avoid an unnecessary copy and a wasted register.
2169  unsigned SrcReg = 0;
2170  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2171  if (CI->isZero())
2172  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2173  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2174  if (CF->isZero() && !CF->isNegative()) {
2175  VT = MVT::getIntegerVT(VT.getSizeInBits());
2176  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2177  }
2178  }
2179 
2180  if (!SrcReg)
2181  SrcReg = getRegForValue(Op0);
2182 
2183  if (!SrcReg)
2184  return false;
2185 
2186  auto *SI = cast<StoreInst>(I);
2187 
2188  // Try to emit a STLR for seq_cst/release.
2189  if (SI->isAtomic()) {
2190  AtomicOrdering Ord = SI->getOrdering();
2191  // The non-atomic instructions are sufficient for relaxed stores.
2192  if (isReleaseOrStronger(Ord)) {
2193  // The STLR addressing mode only supports a base reg; pass that directly.
2194  unsigned AddrReg = getRegForValue(PtrV);
2195  return emitStoreRelease(VT, SrcReg, AddrReg,
2196  createMachineMemOperandFor(I));
2197  }
2198  }
2199 
2200  // See if we can handle this address.
2201  Address Addr;
2202  if (!computeAddress(PtrV, Addr, Op0->getType()))
2203  return false;
2204 
2205  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2206  return false;
2207  return true;
2208 }
2209 
2211  switch (Pred) {
2212  case CmpInst::FCMP_ONE:
2213  case CmpInst::FCMP_UEQ:
2214  default:
2215  // AL is our "false" for now. The other two need more compares.
2216  return AArch64CC::AL;
2217  case CmpInst::ICMP_EQ:
2218  case CmpInst::FCMP_OEQ:
2219  return AArch64CC::EQ;
2220  case CmpInst::ICMP_SGT:
2221  case CmpInst::FCMP_OGT:
2222  return AArch64CC::GT;
2223  case CmpInst::ICMP_SGE:
2224  case CmpInst::FCMP_OGE:
2225  return AArch64CC::GE;
2226  case CmpInst::ICMP_UGT:
2227  case CmpInst::FCMP_UGT:
2228  return AArch64CC::HI;
2229  case CmpInst::FCMP_OLT:
2230  return AArch64CC::MI;
2231  case CmpInst::ICMP_ULE:
2232  case CmpInst::FCMP_OLE:
2233  return AArch64CC::LS;
2234  case CmpInst::FCMP_ORD:
2235  return AArch64CC::VC;
2236  case CmpInst::FCMP_UNO:
2237  return AArch64CC::VS;
2238  case CmpInst::FCMP_UGE:
2239  return AArch64CC::PL;
2240  case CmpInst::ICMP_SLT:
2241  case CmpInst::FCMP_ULT:
2242  return AArch64CC::LT;
2243  case CmpInst::ICMP_SLE:
2244  case CmpInst::FCMP_ULE:
2245  return AArch64CC::LE;
2246  case CmpInst::FCMP_UNE:
2247  case CmpInst::ICMP_NE:
2248  return AArch64CC::NE;
2249  case CmpInst::ICMP_UGE:
2250  return AArch64CC::HS;
2251  case CmpInst::ICMP_ULT:
2252  return AArch64CC::LO;
2253  }
2254 }
2255 
2256 /// \brief Try to emit a combined compare-and-branch instruction.
2257 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2258  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2259  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2260  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2261 
2262  const Value *LHS = CI->getOperand(0);
2263  const Value *RHS = CI->getOperand(1);
2264 
2265  MVT VT;
2266  if (!isTypeSupported(LHS->getType(), VT))
2267  return false;
2268 
2269  unsigned BW = VT.getSizeInBits();
2270  if (BW > 64)
2271  return false;
2272 
2273  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2274  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2275 
2276  // Try to take advantage of fallthrough opportunities.
2277  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2278  std::swap(TBB, FBB);
2279  Predicate = CmpInst::getInversePredicate(Predicate);
2280  }
2281 
2282  int TestBit = -1;
2283  bool IsCmpNE;
2284  switch (Predicate) {
2285  default:
2286  return false;
2287  case CmpInst::ICMP_EQ:
2288  case CmpInst::ICMP_NE:
2289  if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2290  std::swap(LHS, RHS);
2291 
2292  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2293  return false;
2294 
2295  if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2296  if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2297  const Value *AndLHS = AI->getOperand(0);
2298  const Value *AndRHS = AI->getOperand(1);
2299 
2300  if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2301  if (C->getValue().isPowerOf2())
2302  std::swap(AndLHS, AndRHS);
2303 
2304  if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2305  if (C->getValue().isPowerOf2()) {
2306  TestBit = C->getValue().logBase2();
2307  LHS = AndLHS;
2308  }
2309  }
2310 
2311  if (VT == MVT::i1)
2312  TestBit = 0;
2313 
2314  IsCmpNE = Predicate == CmpInst::ICMP_NE;
2315  break;
2316  case CmpInst::ICMP_SLT:
2317  case CmpInst::ICMP_SGE:
2318  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2319  return false;
2320 
2321  TestBit = BW - 1;
2322  IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2323  break;
2324  case CmpInst::ICMP_SGT:
2325  case CmpInst::ICMP_SLE:
2326  if (!isa<ConstantInt>(RHS))
2327  return false;
2328 
2329  if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2330  return false;
2331 
2332  TestBit = BW - 1;
2333  IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2334  break;
2335  } // end switch
2336 
2337  static const unsigned OpcTable[2][2][2] = {
2338  { {AArch64::CBZW, AArch64::CBZX },
2339  {AArch64::CBNZW, AArch64::CBNZX} },
2340  { {AArch64::TBZW, AArch64::TBZX },
2341  {AArch64::TBNZW, AArch64::TBNZX} }
2342  };
2343 
2344  bool IsBitTest = TestBit != -1;
2345  bool Is64Bit = BW == 64;
2346  if (TestBit < 32 && TestBit >= 0)
2347  Is64Bit = false;
2348 
2349  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2350  const MCInstrDesc &II = TII.get(Opc);
2351 
2352  unsigned SrcReg = getRegForValue(LHS);
2353  if (!SrcReg)
2354  return false;
2355  bool SrcIsKill = hasTrivialKill(LHS);
2356 
2357  if (BW == 64 && !Is64Bit)
2358  SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2359  AArch64::sub_32);
2360 
2361  if ((BW < 32) && !IsBitTest)
2362  SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2363 
2364  // Emit the combined compare and branch instruction.
2365  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2366  MachineInstrBuilder MIB =
2367  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2368  .addReg(SrcReg, getKillRegState(SrcIsKill));
2369  if (IsBitTest)
2370  MIB.addImm(TestBit);
2371  MIB.addMBB(TBB);
2372 
2373  finishCondBranch(BI->getParent(), TBB, FBB);
2374  return true;
2375 }
2376 
2377 bool AArch64FastISel::selectBranch(const Instruction *I) {
2378  const BranchInst *BI = cast<BranchInst>(I);
2379  if (BI->isUnconditional()) {
2380  MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2381  fastEmitBranch(MSucc, BI->getDebugLoc());
2382  return true;
2383  }
2384 
2385  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2386  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2387 
2388  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2389  if (CI->hasOneUse() && isValueAvailable(CI)) {
2390  // Try to optimize or fold the cmp.
2391  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2392  switch (Predicate) {
2393  default:
2394  break;
2395  case CmpInst::FCMP_FALSE:
2396  fastEmitBranch(FBB, DbgLoc);
2397  return true;
2398  case CmpInst::FCMP_TRUE:
2399  fastEmitBranch(TBB, DbgLoc);
2400  return true;
2401  }
2402 
2403  // Try to emit a combined compare-and-branch first.
2404  if (emitCompareAndBranch(BI))
2405  return true;
2406 
2407  // Try to take advantage of fallthrough opportunities.
2408  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2409  std::swap(TBB, FBB);
2410  Predicate = CmpInst::getInversePredicate(Predicate);
2411  }
2412 
2413  // Emit the cmp.
2414  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2415  return false;
2416 
2417  // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2418  // instruction.
2419  AArch64CC::CondCode CC = getCompareCC(Predicate);
2421  switch (Predicate) {
2422  default:
2423  break;
2424  case CmpInst::FCMP_UEQ:
2425  ExtraCC = AArch64CC::EQ;
2426  CC = AArch64CC::VS;
2427  break;
2428  case CmpInst::FCMP_ONE:
2429  ExtraCC = AArch64CC::MI;
2430  CC = AArch64CC::GT;
2431  break;
2432  }
2433  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2434 
2435  // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2436  if (ExtraCC != AArch64CC::AL) {
2437  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2438  .addImm(ExtraCC)
2439  .addMBB(TBB);
2440  }
2441 
2442  // Emit the branch.
2443  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2444  .addImm(CC)
2445  .addMBB(TBB);
2446 
2447  finishCondBranch(BI->getParent(), TBB, FBB);
2448  return true;
2449  }
2450  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2451  uint64_t Imm = CI->getZExtValue();
2452  MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2453  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2454  .addMBB(Target);
2455 
2456  // Obtain the branch probability and add the target to the successor list.
2457  if (FuncInfo.BPI) {
2458  auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2459  BI->getParent(), Target->getBasicBlock());
2460  FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2461  } else
2462  FuncInfo.MBB->addSuccessorWithoutProb(Target);
2463  return true;
2464  } else {
2466  if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2467  // Fake request the condition, otherwise the intrinsic might be completely
2468  // optimized away.
2469  unsigned CondReg = getRegForValue(BI->getCondition());
2470  if (!CondReg)
2471  return false;
2472 
2473  // Emit the branch.
2474  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2475  .addImm(CC)
2476  .addMBB(TBB);
2477 
2478  finishCondBranch(BI->getParent(), TBB, FBB);
2479  return true;
2480  }
2481  }
2482 
2483  unsigned CondReg = getRegForValue(BI->getCondition());
2484  if (CondReg == 0)
2485  return false;
2486  bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2487 
2488  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2489  unsigned Opcode = AArch64::TBNZW;
2490  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2491  std::swap(TBB, FBB);
2492  Opcode = AArch64::TBZW;
2493  }
2494 
2495  const MCInstrDesc &II = TII.get(Opcode);
2496  unsigned ConstrainedCondReg
2497  = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2498  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2499  .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2500  .addImm(0)
2501  .addMBB(TBB);
2502 
2503  finishCondBranch(BI->getParent(), TBB, FBB);
2504  return true;
2505 }
2506 
2507 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2508  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2509  unsigned AddrReg = getRegForValue(BI->getOperand(0));
2510  if (AddrReg == 0)
2511  return false;
2512 
2513  // Emit the indirect branch.
2514  const MCInstrDesc &II = TII.get(AArch64::BR);
2515  AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2516  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2517 
2518  // Make sure the CFG is up-to-date.
2519  for (auto *Succ : BI->successors())
2520  FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2521 
2522  return true;
2523 }
2524 
2525 bool AArch64FastISel::selectCmp(const Instruction *I) {
2526  const CmpInst *CI = cast<CmpInst>(I);
2527 
2528  // Vectors of i1 are weird: bail out.
2529  if (CI->getType()->isVectorTy())
2530  return false;
2531 
2532  // Try to optimize or fold the cmp.
2533  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2534  unsigned ResultReg = 0;
2535  switch (Predicate) {
2536  default:
2537  break;
2538  case CmpInst::FCMP_FALSE:
2539  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2540  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2541  TII.get(TargetOpcode::COPY), ResultReg)
2542  .addReg(AArch64::WZR, getKillRegState(true));
2543  break;
2544  case CmpInst::FCMP_TRUE:
2545  ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2546  break;
2547  }
2548 
2549  if (ResultReg) {
2550  updateValueMap(I, ResultReg);
2551  return true;
2552  }
2553 
2554  // Emit the cmp.
2555  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2556  return false;
2557 
2558  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2559 
2560  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2561  // condition codes are inverted, because they are used by CSINC.
2562  static unsigned CondCodeTable[2][2] = {
2565  };
2566  unsigned *CondCodes = nullptr;
2567  switch (Predicate) {
2568  default:
2569  break;
2570  case CmpInst::FCMP_UEQ:
2571  CondCodes = &CondCodeTable[0][0];
2572  break;
2573  case CmpInst::FCMP_ONE:
2574  CondCodes = &CondCodeTable[1][0];
2575  break;
2576  }
2577 
2578  if (CondCodes) {
2579  unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2580  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2581  TmpReg1)
2582  .addReg(AArch64::WZR, getKillRegState(true))
2583  .addReg(AArch64::WZR, getKillRegState(true))
2584  .addImm(CondCodes[0]);
2585  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2586  ResultReg)
2587  .addReg(TmpReg1, getKillRegState(true))
2588  .addReg(AArch64::WZR, getKillRegState(true))
2589  .addImm(CondCodes[1]);
2590 
2591  updateValueMap(I, ResultReg);
2592  return true;
2593  }
2594 
2595  // Now set a register based on the comparison.
2596  AArch64CC::CondCode CC = getCompareCC(Predicate);
2597  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2598  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2599  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2600  ResultReg)
2601  .addReg(AArch64::WZR, getKillRegState(true))
2602  .addReg(AArch64::WZR, getKillRegState(true))
2603  .addImm(invertedCC);
2604 
2605  updateValueMap(I, ResultReg);
2606  return true;
2607 }
2608 
2609 /// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
2610 /// value.
2611 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2612  if (!SI->getType()->isIntegerTy(1))
2613  return false;
2614 
2615  const Value *Src1Val, *Src2Val;
2616  unsigned Opc = 0;
2617  bool NeedExtraOp = false;
2618  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2619  if (CI->isOne()) {
2620  Src1Val = SI->getCondition();
2621  Src2Val = SI->getFalseValue();
2622  Opc = AArch64::ORRWrr;
2623  } else {
2624  assert(CI->isZero());
2625  Src1Val = SI->getFalseValue();
2626  Src2Val = SI->getCondition();
2627  Opc = AArch64::BICWrr;
2628  }
2629  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2630  if (CI->isOne()) {
2631  Src1Val = SI->getCondition();
2632  Src2Val = SI->getTrueValue();
2633  Opc = AArch64::ORRWrr;
2634  NeedExtraOp = true;
2635  } else {
2636  assert(CI->isZero());
2637  Src1Val = SI->getCondition();
2638  Src2Val = SI->getTrueValue();
2639  Opc = AArch64::ANDWrr;
2640  }
2641  }
2642 
2643  if (!Opc)
2644  return false;
2645 
2646  unsigned Src1Reg = getRegForValue(Src1Val);
2647  if (!Src1Reg)
2648  return false;
2649  bool Src1IsKill = hasTrivialKill(Src1Val);
2650 
2651  unsigned Src2Reg = getRegForValue(Src2Val);
2652  if (!Src2Reg)
2653  return false;
2654  bool Src2IsKill = hasTrivialKill(Src2Val);
2655 
2656  if (NeedExtraOp) {
2657  Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2658  Src1IsKill = true;
2659  }
2660  unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2661  Src1IsKill, Src2Reg, Src2IsKill);
2662  updateValueMap(SI, ResultReg);
2663  return true;
2664 }
2665 
2666 bool AArch64FastISel::selectSelect(const Instruction *I) {
2667  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2668  MVT VT;
2669  if (!isTypeSupported(I->getType(), VT))
2670  return false;
2671 
2672  unsigned Opc;
2673  const TargetRegisterClass *RC;
2674  switch (VT.SimpleTy) {
2675  default:
2676  return false;
2677  case MVT::i1:
2678  case MVT::i8:
2679  case MVT::i16:
2680  case MVT::i32:
2681  Opc = AArch64::CSELWr;
2682  RC = &AArch64::GPR32RegClass;
2683  break;
2684  case MVT::i64:
2685  Opc = AArch64::CSELXr;
2686  RC = &AArch64::GPR64RegClass;
2687  break;
2688  case MVT::f32:
2689  Opc = AArch64::FCSELSrrr;
2690  RC = &AArch64::FPR32RegClass;
2691  break;
2692  case MVT::f64:
2693  Opc = AArch64::FCSELDrrr;
2694  RC = &AArch64::FPR64RegClass;
2695  break;
2696  }
2697 
2698  const SelectInst *SI = cast<SelectInst>(I);
2699  const Value *Cond = SI->getCondition();
2702 
2703  if (optimizeSelect(SI))
2704  return true;
2705 
2706  // Try to pickup the flags, so we don't have to emit another compare.
2707  if (foldXALUIntrinsic(CC, I, Cond)) {
2708  // Fake request the condition to force emission of the XALU intrinsic.
2709  unsigned CondReg = getRegForValue(Cond);
2710  if (!CondReg)
2711  return false;
2712  } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2713  isValueAvailable(Cond)) {
2714  const auto *Cmp = cast<CmpInst>(Cond);
2715  // Try to optimize or fold the cmp.
2716  CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2717  const Value *FoldSelect = nullptr;
2718  switch (Predicate) {
2719  default:
2720  break;
2721  case CmpInst::FCMP_FALSE:
2722  FoldSelect = SI->getFalseValue();
2723  break;
2724  case CmpInst::FCMP_TRUE:
2725  FoldSelect = SI->getTrueValue();
2726  break;
2727  }
2728 
2729  if (FoldSelect) {
2730  unsigned SrcReg = getRegForValue(FoldSelect);
2731  if (!SrcReg)
2732  return false;
2733  unsigned UseReg = lookUpRegForValue(SI);
2734  if (UseReg)
2735  MRI.clearKillFlags(UseReg);
2736 
2737  updateValueMap(I, SrcReg);
2738  return true;
2739  }
2740 
2741  // Emit the cmp.
2742  if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2743  return false;
2744 
2745  // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2746  CC = getCompareCC(Predicate);
2747  switch (Predicate) {
2748  default:
2749  break;
2750  case CmpInst::FCMP_UEQ:
2751  ExtraCC = AArch64CC::EQ;
2752  CC = AArch64CC::VS;
2753  break;
2754  case CmpInst::FCMP_ONE:
2755  ExtraCC = AArch64CC::MI;
2756  CC = AArch64CC::GT;
2757  break;
2758  }
2759  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2760  } else {
2761  unsigned CondReg = getRegForValue(Cond);
2762  if (!CondReg)
2763  return false;
2764  bool CondIsKill = hasTrivialKill(Cond);
2765 
2766  const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2767  CondReg = constrainOperandRegClass(II, CondReg, 1);
2768 
2769  // Emit a TST instruction (ANDS wzr, reg, #imm).
2770  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2771  AArch64::WZR)
2772  .addReg(CondReg, getKillRegState(CondIsKill))
2774  }
2775 
2776  unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2777  bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2778 
2779  unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2780  bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2781 
2782  if (!Src1Reg || !Src2Reg)
2783  return false;
2784 
2785  if (ExtraCC != AArch64CC::AL) {
2786  Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2787  Src2IsKill, ExtraCC);
2788  Src2IsKill = true;
2789  }
2790  unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2791  Src2IsKill, CC);
2792  updateValueMap(I, ResultReg);
2793  return true;
2794 }
2795 
2796 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2797  Value *V = I->getOperand(0);
2798  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2799  return false;
2800 
2801  unsigned Op = getRegForValue(V);
2802  if (Op == 0)
2803  return false;
2804 
2805  unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2806  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2807  ResultReg).addReg(Op);
2808  updateValueMap(I, ResultReg);
2809  return true;
2810 }
2811 
2812 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2813  Value *V = I->getOperand(0);
2814  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2815  return false;
2816 
2817  unsigned Op = getRegForValue(V);
2818  if (Op == 0)
2819  return false;
2820 
2821  unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2822  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2823  ResultReg).addReg(Op);
2824  updateValueMap(I, ResultReg);
2825  return true;
2826 }
2827 
2828 // FPToUI and FPToSI
2829 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2830  MVT DestVT;
2831  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2832  return false;
2833 
2834  unsigned SrcReg = getRegForValue(I->getOperand(0));
2835  if (SrcReg == 0)
2836  return false;
2837 
2838  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2839  if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2840  return false;
2841 
2842  unsigned Opc;
2843  if (SrcVT == MVT::f64) {
2844  if (Signed)
2845  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2846  else
2847  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2848  } else {
2849  if (Signed)
2850  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2851  else
2852  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2853  }
2854  unsigned ResultReg = createResultReg(
2855  DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2856  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2857  .addReg(SrcReg);
2858  updateValueMap(I, ResultReg);
2859  return true;
2860 }
2861 
2862 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2863  MVT DestVT;
2864  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2865  return false;
2866  // Let regular ISEL handle FP16
2867  if (DestVT == MVT::f16)
2868  return false;
2869 
2870  assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2871  "Unexpected value type.");
2872 
2873  unsigned SrcReg = getRegForValue(I->getOperand(0));
2874  if (!SrcReg)
2875  return false;
2876  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2877 
2878  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2879 
2880  // Handle sign-extension.
2881  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2882  SrcReg =
2883  emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2884  if (!SrcReg)
2885  return false;
2886  SrcIsKill = true;
2887  }
2888 
2889  unsigned Opc;
2890  if (SrcVT == MVT::i64) {
2891  if (Signed)
2892  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2893  else
2894  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2895  } else {
2896  if (Signed)
2897  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2898  else
2899  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2900  }
2901 
2902  unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2903  SrcIsKill);
2904  updateValueMap(I, ResultReg);
2905  return true;
2906 }
2907 
2908 bool AArch64FastISel::fastLowerArguments() {
2909  if (!FuncInfo.CanLowerReturn)
2910  return false;
2911 
2912  const Function *F = FuncInfo.Fn;
2913  if (F->isVarArg())
2914  return false;
2915 
2916  CallingConv::ID CC = F->getCallingConv();
2917  if (CC != CallingConv::C && CC != CallingConv::Swift)
2918  return false;
2919 
2920  // Only handle simple cases of up to 8 GPR and FPR each.
2921  unsigned GPRCnt = 0;
2922  unsigned FPRCnt = 0;
2923  for (auto const &Arg : F->args()) {
2924  if (Arg.hasAttribute(Attribute::ByVal) ||
2925  Arg.hasAttribute(Attribute::InReg) ||
2926  Arg.hasAttribute(Attribute::StructRet) ||
2927  Arg.hasAttribute(Attribute::SwiftSelf) ||
2928  Arg.hasAttribute(Attribute::SwiftError) ||
2929  Arg.hasAttribute(Attribute::Nest))
2930  return false;
2931 
2932  Type *ArgTy = Arg.getType();
2933  if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2934  return false;
2935 
2936  EVT ArgVT = TLI.getValueType(DL, ArgTy);
2937  if (!ArgVT.isSimple())
2938  return false;
2939 
2940  MVT VT = ArgVT.getSimpleVT().SimpleTy;
2941  if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2942  return false;
2943 
2944  if (VT.isVector() &&
2945  (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2946  return false;
2947 
2948  if (VT >= MVT::i1 && VT <= MVT::i64)
2949  ++GPRCnt;
2950  else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2951  VT.is128BitVector())
2952  ++FPRCnt;
2953  else
2954  return false;
2955 
2956  if (GPRCnt > 8 || FPRCnt > 8)
2957  return false;
2958  }
2959 
2960  static const MCPhysReg Registers[6][8] = {
2961  { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2962  AArch64::W5, AArch64::W6, AArch64::W7 },
2963  { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2964  AArch64::X5, AArch64::X6, AArch64::X7 },
2965  { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2966  AArch64::H5, AArch64::H6, AArch64::H7 },
2967  { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2968  AArch64::S5, AArch64::S6, AArch64::S7 },
2969  { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2970  AArch64::D5, AArch64::D6, AArch64::D7 },
2971  { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2972  AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2973  };
2974 
2975  unsigned GPRIdx = 0;
2976  unsigned FPRIdx = 0;
2977  for (auto const &Arg : F->args()) {
2978  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2979  unsigned SrcReg;
2980  const TargetRegisterClass *RC;
2981  if (VT >= MVT::i1 && VT <= MVT::i32) {
2982  SrcReg = Registers[0][GPRIdx++];
2983  RC = &AArch64::GPR32RegClass;
2984  VT = MVT::i32;
2985  } else if (VT == MVT::i64) {
2986  SrcReg = Registers[1][GPRIdx++];
2987  RC = &AArch64::GPR64RegClass;
2988  } else if (VT == MVT::f16) {
2989  SrcReg = Registers[2][FPRIdx++];
2990  RC = &AArch64::FPR16RegClass;
2991  } else if (VT == MVT::f32) {
2992  SrcReg = Registers[3][FPRIdx++];
2993  RC = &AArch64::FPR32RegClass;
2994  } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2995  SrcReg = Registers[4][FPRIdx++];
2996  RC = &AArch64::FPR64RegClass;
2997  } else if (VT.is128BitVector()) {
2998  SrcReg = Registers[5][FPRIdx++];
2999  RC = &AArch64::FPR128RegClass;
3000  } else
3001  llvm_unreachable("Unexpected value type.");
3002 
3003  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3004  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3005  // Without this, EmitLiveInCopies may eliminate the livein if its only
3006  // use is a bitcast (which isn't turned into an instruction).
3007  unsigned ResultReg = createResultReg(RC);
3008  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3009  TII.get(TargetOpcode::COPY), ResultReg)
3010  .addReg(DstReg, getKillRegState(true));
3011  updateValueMap(&Arg, ResultReg);
3012  }
3013  return true;
3014 }
3015 
3016 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3017  SmallVectorImpl<MVT> &OutVTs,
3018  unsigned &NumBytes) {
3019  CallingConv::ID CC = CLI.CallConv;
3021  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3022  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3023 
3024  // Get a count of how many bytes are to be pushed on the stack.
3025  NumBytes = CCInfo.getNextStackOffset();
3026 
3027  // Issue CALLSEQ_START
3028  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3029  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3030  .addImm(NumBytes).addImm(0);
3031 
3032  // Process the args.
3033  for (CCValAssign &VA : ArgLocs) {
3034  const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3035  MVT ArgVT = OutVTs[VA.getValNo()];
3036 
3037  unsigned ArgReg = getRegForValue(ArgVal);
3038  if (!ArgReg)
3039  return false;
3040 
3041  // Handle arg promotion: SExt, ZExt, AExt.
3042  switch (VA.getLocInfo()) {
3043  case CCValAssign::Full:
3044  break;
3045  case CCValAssign::SExt: {
3046  MVT DestVT = VA.getLocVT();
3047  MVT SrcVT = ArgVT;
3048  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3049  if (!ArgReg)
3050  return false;
3051  break;
3052  }
3053  case CCValAssign::AExt:
3054  // Intentional fall-through.
3055  case CCValAssign::ZExt: {
3056  MVT DestVT = VA.getLocVT();
3057  MVT SrcVT = ArgVT;
3058  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3059  if (!ArgReg)
3060  return false;
3061  break;
3062  }
3063  default:
3064  llvm_unreachable("Unknown arg promotion!");
3065  }
3066 
3067  // Now copy/store arg to correct locations.
3068  if (VA.isRegLoc() && !VA.needsCustom()) {
3069  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3070  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3071  CLI.OutRegs.push_back(VA.getLocReg());
3072  } else if (VA.needsCustom()) {
3073  // FIXME: Handle custom args.
3074  return false;
3075  } else {
3076  assert(VA.isMemLoc() && "Assuming store on stack.");
3077 
3078  // Don't emit stores for undef values.
3079  if (isa<UndefValue>(ArgVal))
3080  continue;
3081 
3082  // Need to store on the stack.
3083  unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3084 
3085  unsigned BEAlign = 0;
3086  if (ArgSize < 8 && !Subtarget->isLittleEndian())
3087  BEAlign = 8 - ArgSize;
3088 
3089  Address Addr;
3090  Addr.setKind(Address::RegBase);
3091  Addr.setReg(AArch64::SP);
3092  Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3093 
3094  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3095  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3096  MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3097  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3098 
3099  if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3100  return false;
3101  }
3102  }
3103  return true;
3104 }
3105 
3106 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3107  unsigned NumBytes) {
3108  CallingConv::ID CC = CLI.CallConv;
3109 
3110  // Issue CALLSEQ_END
3111  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3112  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3113  .addImm(NumBytes).addImm(0);
3114 
3115  // Now the return value.
3116  if (RetVT != MVT::isVoid) {
3118  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3119  CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3120 
3121  // Only handle a single return value.
3122  if (RVLocs.size() != 1)
3123  return false;
3124 
3125  // Copy all of the result registers out of their specified physreg.
3126  MVT CopyVT = RVLocs[0].getValVT();
3127 
3128  // TODO: Handle big-endian results
3129  if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3130  return false;
3131 
3132  unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3133  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3134  TII.get(TargetOpcode::COPY), ResultReg)
3135  .addReg(RVLocs[0].getLocReg());
3136  CLI.InRegs.push_back(RVLocs[0].getLocReg());
3137 
3138  CLI.ResultReg = ResultReg;
3139  CLI.NumResultRegs = 1;
3140  }
3141 
3142  return true;
3143 }
3144 
3145 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3146  CallingConv::ID CC = CLI.CallConv;
3147  bool IsTailCall = CLI.IsTailCall;
3148  bool IsVarArg = CLI.IsVarArg;
3149  const Value *Callee = CLI.Callee;
3150  MCSymbol *Symbol = CLI.Symbol;
3151 
3152  if (!Callee && !Symbol)
3153  return false;
3154 
3155  // Allow SelectionDAG isel to handle tail calls.
3156  if (IsTailCall)
3157  return false;
3158 
3159  CodeModel::Model CM = TM.getCodeModel();
3160  // Only support the small-addressing and large code models.
3161  if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3162  return false;
3163 
3164  // FIXME: Add large code model support for ELF.
3165  if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3166  return false;
3167 
3168  // Let SDISel handle vararg functions.
3169  if (IsVarArg)
3170  return false;
3171 
3172  // FIXME: Only handle *simple* calls for now.
3173  MVT RetVT;
3174  if (CLI.RetTy->isVoidTy())
3175  RetVT = MVT::isVoid;
3176  else if (!isTypeLegal(CLI.RetTy, RetVT))
3177  return false;
3178 
3179  for (auto Flag : CLI.OutFlags)
3180  if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3181  Flag.isSwiftSelf() || Flag.isSwiftError())
3182  return false;
3183 
3184  // Set up the argument vectors.
3185  SmallVector<MVT, 16> OutVTs;
3186  OutVTs.reserve(CLI.OutVals.size());
3187 
3188  for (auto *Val : CLI.OutVals) {
3189  MVT VT;
3190  if (!isTypeLegal(Val->getType(), VT) &&
3191  !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3192  return false;
3193 
3194  // We don't handle vector parameters yet.
3195  if (VT.isVector() || VT.getSizeInBits() > 64)
3196  return false;
3197 
3198  OutVTs.push_back(VT);
3199  }
3200 
3201  Address Addr;
3202  if (Callee && !computeCallAddress(Callee, Addr))
3203  return false;
3204 
3205  // Handle the arguments now that we've gotten them.
3206  unsigned NumBytes;
3207  if (!processCallArgs(CLI, OutVTs, NumBytes))
3208  return false;
3209 
3210  // Issue the call.
3211  MachineInstrBuilder MIB;
3212  if (Subtarget->useSmallAddressing()) {
3213  const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3214  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3215  if (Symbol)
3216  MIB.addSym(Symbol, 0);
3217  else if (Addr.getGlobalValue())
3218  MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3219  else if (Addr.getReg()) {
3220  unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3221  MIB.addReg(Reg);
3222  } else
3223  return false;
3224  } else {
3225  unsigned CallReg = 0;
3226  if (Symbol) {
3227  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3228  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3229  ADRPReg)
3230  .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3231 
3232  CallReg = createResultReg(&AArch64::GPR64RegClass);
3233  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3234  TII.get(AArch64::LDRXui), CallReg)
3235  .addReg(ADRPReg)
3236  .addSym(Symbol,
3238  } else if (Addr.getGlobalValue())
3239  CallReg = materializeGV(Addr.getGlobalValue());
3240  else if (Addr.getReg())
3241  CallReg = Addr.getReg();
3242 
3243  if (!CallReg)
3244  return false;
3245 
3246  const MCInstrDesc &II = TII.get(AArch64::BLR);
3247  CallReg = constrainOperandRegClass(II, CallReg, 0);
3248  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3249  }
3250 
3251  // Add implicit physical register uses to the call.
3252  for (auto Reg : CLI.OutRegs)
3253  MIB.addReg(Reg, RegState::Implicit);
3254 
3255  // Add a register mask with the call-preserved registers.
3256  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3257  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3258 
3259  CLI.Call = MIB;
3260 
3261  // Finish off the call including any return values.
3262  return finishCall(CLI, RetVT, NumBytes);
3263 }
3264 
3265 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3266  if (Alignment)
3267  return Len / Alignment <= 4;
3268  else
3269  return Len < 32;
3270 }
3271 
3272 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3273  uint64_t Len, unsigned Alignment) {
3274  // Make sure we don't bloat code by inlining very large memcpy's.
3275  if (!isMemCpySmall(Len, Alignment))
3276  return false;
3277 
3278  int64_t UnscaledOffset = 0;
3279  Address OrigDest = Dest;
3280  Address OrigSrc = Src;
3281 
3282  while (Len) {
3283  MVT VT;
3284  if (!Alignment || Alignment >= 8) {
3285  if (Len >= 8)
3286  VT = MVT::i64;
3287  else if (Len >= 4)
3288  VT = MVT::i32;
3289  else if (Len >= 2)
3290  VT = MVT::i16;
3291  else {
3292  VT = MVT::i8;
3293  }
3294  } else {
3295  // Bound based on alignment.
3296  if (Len >= 4 && Alignment == 4)
3297  VT = MVT::i32;
3298  else if (Len >= 2 && Alignment == 2)
3299  VT = MVT::i16;
3300  else {
3301  VT = MVT::i8;
3302  }
3303  }
3304 
3305  unsigned ResultReg = emitLoad(VT, VT, Src);
3306  if (!ResultReg)
3307  return false;
3308 
3309  if (!emitStore(VT, ResultReg, Dest))
3310  return false;
3311 
3312  int64_t Size = VT.getSizeInBits() / 8;
3313  Len -= Size;
3314  UnscaledOffset += Size;
3315 
3316  // We need to recompute the unscaled offset for each iteration.
3317  Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3318  Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3319  }
3320 
3321  return true;
3322 }
3323 
3324 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
3325 /// into the user. The condition code will only be updated on success.
3326 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3327  const Instruction *I,
3328  const Value *Cond) {
3329  if (!isa<ExtractValueInst>(Cond))
3330  return false;
3331 
3332  const auto *EV = cast<ExtractValueInst>(Cond);
3333  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3334  return false;
3335 
3336  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3337  MVT RetVT;
3338  const Function *Callee = II->getCalledFunction();
3339  Type *RetTy =
3340  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3341  if (!isTypeLegal(RetTy, RetVT))
3342  return false;
3343 
3344  if (RetVT != MVT::i32 && RetVT != MVT::i64)
3345  return false;
3346 
3347  const Value *LHS = II->getArgOperand(0);
3348  const Value *RHS = II->getArgOperand(1);
3349 
3350  // Canonicalize immediate to the RHS.
3351  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3352  isCommutativeIntrinsic(II))
3353  std::swap(LHS, RHS);
3354 
3355  // Simplify multiplies.
3356  Intrinsic::ID IID = II->getIntrinsicID();
3357  switch (IID) {
3358  default:
3359  break;
3360  case Intrinsic::smul_with_overflow:
3361  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3362  if (C->getValue() == 2)
3363  IID = Intrinsic::sadd_with_overflow;
3364  break;
3365  case Intrinsic::umul_with_overflow:
3366  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3367  if (C->getValue() == 2)
3368  IID = Intrinsic::uadd_with_overflow;
3369  break;
3370  }
3371 
3372  AArch64CC::CondCode TmpCC;
3373  switch (IID) {
3374  default:
3375  return false;
3376  case Intrinsic::sadd_with_overflow:
3377  case Intrinsic::ssub_with_overflow:
3378  TmpCC = AArch64CC::VS;
3379  break;
3380  case Intrinsic::uadd_with_overflow:
3381  TmpCC = AArch64CC::HS;
3382  break;
3383  case Intrinsic::usub_with_overflow:
3384  TmpCC = AArch64CC::LO;
3385  break;
3386  case Intrinsic::smul_with_overflow:
3387  case Intrinsic::umul_with_overflow:
3388  TmpCC = AArch64CC::NE;
3389  break;
3390  }
3391 
3392  // Check if both instructions are in the same basic block.
3393  if (!isValueAvailable(II))
3394  return false;
3395 
3396  // Make sure nothing is in the way
3397  BasicBlock::const_iterator Start(I);
3399  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3400  // We only expect extractvalue instructions between the intrinsic and the
3401  // instruction to be selected.
3402  if (!isa<ExtractValueInst>(Itr))
3403  return false;
3404 
3405  // Check that the extractvalue operand comes from the intrinsic.
3406  const auto *EVI = cast<ExtractValueInst>(Itr);
3407  if (EVI->getAggregateOperand() != II)
3408  return false;
3409  }
3410 
3411  CC = TmpCC;
3412  return true;
3413 }
3414 
3415 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3416  // FIXME: Handle more intrinsics.
3417  switch (II->getIntrinsicID()) {
3418  default: return false;
3419  case Intrinsic::frameaddress: {
3420  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3421  MFI.setFrameAddressIsTaken(true);
3422 
3423  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3424  unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3425  unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3426  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3427  TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3428  // Recursively load frame address
3429  // ldr x0, [fp]
3430  // ldr x0, [x0]
3431  // ldr x0, [x0]
3432  // ...
3433  unsigned DestReg;
3434  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3435  while (Depth--) {
3436  DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3437  SrcReg, /*IsKill=*/true, 0);
3438  assert(DestReg && "Unexpected LDR instruction emission failure.");
3439  SrcReg = DestReg;
3440  }
3441 
3442  updateValueMap(II, SrcReg);
3443  return true;
3444  }
3445  case Intrinsic::memcpy:
3446  case Intrinsic::memmove: {
3447  const auto *MTI = cast<MemTransferInst>(II);
3448  // Don't handle volatile.
3449  if (MTI->isVolatile())
3450  return false;
3451 
3452  // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3453  // we would emit dead code because we don't currently handle memmoves.
3454  bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3455  if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3456  // Small memcpy's are common enough that we want to do them without a call
3457  // if possible.
3458  uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3459  unsigned Alignment = MTI->getAlignment();
3460  if (isMemCpySmall(Len, Alignment)) {
3461  Address Dest, Src;
3462  if (!computeAddress(MTI->getRawDest(), Dest) ||
3463  !computeAddress(MTI->getRawSource(), Src))
3464  return false;
3465  if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3466  return true;
3467  }
3468  }
3469 
3470  if (!MTI->getLength()->getType()->isIntegerTy(64))
3471  return false;
3472 
3473  if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3474  // Fast instruction selection doesn't support the special
3475  // address spaces.
3476  return false;
3477 
3478  const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3479  return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
3480  }
3481  case Intrinsic::memset: {
3482  const MemSetInst *MSI = cast<MemSetInst>(II);
3483  // Don't handle volatile.
3484  if (MSI->isVolatile())
3485  return false;
3486 
3487  if (!MSI->getLength()->getType()->isIntegerTy(64))
3488  return false;
3489 
3490  if (MSI->getDestAddressSpace() > 255)
3491  // Fast instruction selection doesn't support the special
3492  // address spaces.
3493  return false;
3494 
3495  return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
3496  }
3497  case Intrinsic::sin:
3498  case Intrinsic::cos:
3499  case Intrinsic::pow: {
3500  MVT RetVT;
3501  if (!isTypeLegal(II->getType(), RetVT))
3502  return false;
3503 
3504  if (RetVT != MVT::f32 && RetVT != MVT::f64)
3505  return false;
3506 
3507  static const RTLIB::Libcall LibCallTable[3][2] = {
3508  { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3509  { RTLIB::COS_F32, RTLIB::COS_F64 },
3510  { RTLIB::POW_F32, RTLIB::POW_F64 }
3511  };
3512  RTLIB::Libcall LC;
3513  bool Is64Bit = RetVT == MVT::f64;
3514  switch (II->getIntrinsicID()) {
3515  default:
3516  llvm_unreachable("Unexpected intrinsic.");
3517  case Intrinsic::sin:
3518  LC = LibCallTable[0][Is64Bit];
3519  break;
3520  case Intrinsic::cos:
3521  LC = LibCallTable[1][Is64Bit];
3522  break;
3523  case Intrinsic::pow:
3524  LC = LibCallTable[2][Is64Bit];
3525  break;
3526  }
3527 
3528  ArgListTy Args;
3529  Args.reserve(II->getNumArgOperands());
3530 
3531  // Populate the argument list.
3532  for (auto &Arg : II->arg_operands()) {
3533  ArgListEntry Entry;
3534  Entry.Val = Arg;
3535  Entry.Ty = Arg->getType();
3536  Args.push_back(Entry);
3537  }
3538 
3539  CallLoweringInfo CLI;
3540  MCContext &Ctx = MF->getContext();
3541  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3542  TLI.getLibcallName(LC), std::move(Args));
3543  if (!lowerCallTo(CLI))
3544  return false;
3545  updateValueMap(II, CLI.ResultReg);
3546  return true;
3547  }
3548  case Intrinsic::fabs: {
3549  MVT VT;
3550  if (!isTypeLegal(II->getType(), VT))
3551  return false;
3552 
3553  unsigned Opc;
3554  switch (VT.SimpleTy) {
3555  default:
3556  return false;
3557  case MVT::f32:
3558  Opc = AArch64::FABSSr;
3559  break;
3560  case MVT::f64:
3561  Opc = AArch64::FABSDr;
3562  break;
3563  }
3564  unsigned SrcReg = getRegForValue(II->getOperand(0));
3565  if (!SrcReg)
3566  return false;
3567  bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3568  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3569  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3570  .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3571  updateValueMap(II, ResultReg);
3572  return true;
3573  }
3574  case Intrinsic::trap:
3575  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3576  .addImm(1);
3577  return true;
3578 
3579  case Intrinsic::sqrt: {
3580  Type *RetTy = II->getCalledFunction()->getReturnType();
3581 
3582  MVT VT;
3583  if (!isTypeLegal(RetTy, VT))
3584  return false;
3585 
3586  unsigned Op0Reg = getRegForValue(II->getOperand(0));
3587  if (!Op0Reg)
3588  return false;
3589  bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3590 
3591  unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3592  if (!ResultReg)
3593  return false;
3594 
3595  updateValueMap(II, ResultReg);
3596  return true;
3597  }
3598  case Intrinsic::sadd_with_overflow:
3599  case Intrinsic::uadd_with_overflow:
3600  case Intrinsic::ssub_with_overflow:
3601  case Intrinsic::usub_with_overflow:
3602  case Intrinsic::smul_with_overflow:
3603  case Intrinsic::umul_with_overflow: {
3604  // This implements the basic lowering of the xalu with overflow intrinsics.
3605  const Function *Callee = II->getCalledFunction();
3606  auto *Ty = cast<StructType>(Callee->getReturnType());
3607  Type *RetTy = Ty->getTypeAtIndex(0U);
3608 
3609  MVT VT;
3610  if (!isTypeLegal(RetTy, VT))
3611  return false;
3612 
3613  if (VT != MVT::i32 && VT != MVT::i64)
3614  return false;
3615 
3616  const Value *LHS = II->getArgOperand(0);
3617  const Value *RHS = II->getArgOperand(1);
3618  // Canonicalize immediate to the RHS.
3619  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3620  isCommutativeIntrinsic(II))
3621  std::swap(LHS, RHS);
3622 
3623  // Simplify multiplies.
3624  Intrinsic::ID IID = II->getIntrinsicID();
3625  switch (IID) {
3626  default:
3627  break;
3628  case Intrinsic::smul_with_overflow:
3629  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3630  if (C->getValue() == 2) {
3631  IID = Intrinsic::sadd_with_overflow;
3632  RHS = LHS;
3633  }
3634  break;
3635  case Intrinsic::umul_with_overflow:
3636  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3637  if (C->getValue() == 2) {
3638  IID = Intrinsic::uadd_with_overflow;
3639  RHS = LHS;
3640  }
3641  break;
3642  }
3643 
3644  unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3646  switch (IID) {
3647  default: llvm_unreachable("Unexpected intrinsic!");
3648  case Intrinsic::sadd_with_overflow:
3649  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3650  CC = AArch64CC::VS;
3651  break;
3652  case Intrinsic::uadd_with_overflow:
3653  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3654  CC = AArch64CC::HS;
3655  break;
3656  case Intrinsic::ssub_with_overflow:
3657  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3658  CC = AArch64CC::VS;
3659  break;
3660  case Intrinsic::usub_with_overflow:
3661  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3662  CC = AArch64CC::LO;
3663  break;
3664  case Intrinsic::smul_with_overflow: {
3665  CC = AArch64CC::NE;
3666  unsigned LHSReg = getRegForValue(LHS);
3667  if (!LHSReg)
3668  return false;
3669  bool LHSIsKill = hasTrivialKill(LHS);
3670 
3671  unsigned RHSReg = getRegForValue(RHS);
3672  if (!RHSReg)
3673  return false;
3674  bool RHSIsKill = hasTrivialKill(RHS);
3675 
3676  if (VT == MVT::i32) {
3677  MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3678  unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3679  /*IsKill=*/false, 32);
3680  MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3681  AArch64::sub_32);
3682  ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3683  AArch64::sub_32);
3684  emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3685  AArch64_AM::ASR, 31, /*WantResult=*/false);
3686  } else {
3687  assert(VT == MVT::i64 && "Unexpected value type.");
3688  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3689  // reused in the next instruction.
3690  MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3691  /*IsKill=*/false);
3692  unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3693  RHSReg, RHSIsKill);
3694  emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3695  AArch64_AM::ASR, 63, /*WantResult=*/false);
3696  }
3697  break;
3698  }
3699  case Intrinsic::umul_with_overflow: {
3700  CC = AArch64CC::NE;
3701  unsigned LHSReg = getRegForValue(LHS);
3702  if (!LHSReg)
3703  return false;
3704  bool LHSIsKill = hasTrivialKill(LHS);
3705 
3706  unsigned RHSReg = getRegForValue(RHS);
3707  if (!RHSReg)
3708  return false;
3709  bool RHSIsKill = hasTrivialKill(RHS);
3710 
3711  if (VT == MVT::i32) {
3712  MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3713  emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3714  /*IsKill=*/false, AArch64_AM::LSR, 32,
3715  /*WantResult=*/false);
3716  MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3717  AArch64::sub_32);
3718  } else {
3719  assert(VT == MVT::i64 && "Unexpected value type.");
3720  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3721  // reused in the next instruction.
3722  MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3723  /*IsKill=*/false);
3724  unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3725  RHSReg, RHSIsKill);
3726  emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3727  /*IsKill=*/false, /*WantResult=*/false);
3728  }
3729  break;
3730  }
3731  }
3732 
3733  if (MulReg) {
3734  ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3735  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3736  TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3737  }
3738 
3739  ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3740  AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3741  /*IsKill=*/true, getInvertedCondCode(CC));
3742  (void)ResultReg2;
3743  assert((ResultReg1 + 1) == ResultReg2 &&
3744  "Nonconsecutive result registers.");
3745  updateValueMap(II, ResultReg1, 2);
3746  return true;
3747  }
3748  }
3749  return false;
3750 }
3751 
3752 bool AArch64FastISel::selectRet(const Instruction *I) {
3753  const ReturnInst *Ret = cast<ReturnInst>(I);
3754  const Function &F = *I->getParent()->getParent();
3755 
3756  if (!FuncInfo.CanLowerReturn)
3757  return false;
3758 
3759  if (F.isVarArg())
3760  return false;
3761 
3762  if (TLI.supportSwiftError() &&
3763  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3764  return false;
3765 
3766  if (TLI.supportSplitCSR(FuncInfo.MF))
3767  return false;
3768 
3769  // Build a list of return value registers.
3770  SmallVector<unsigned, 4> RetRegs;
3771 
3772  if (Ret->getNumOperands() > 0) {
3773  CallingConv::ID CC = F.getCallingConv();
3775  GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3776 
3777  // Analyze operands of the call, assigning locations to each operand.
3779  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3780  CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3781  : RetCC_AArch64_AAPCS;
3782  CCInfo.AnalyzeReturn(Outs, RetCC);
3783 
3784  // Only handle a single return value for now.
3785  if (ValLocs.size() != 1)
3786  return false;
3787 
3788  CCValAssign &VA = ValLocs[0];
3789  const Value *RV = Ret->getOperand(0);
3790 
3791  // Don't bother handling odd stuff for now.
3792  if ((VA.getLocInfo() != CCValAssign::Full) &&
3793  (VA.getLocInfo() != CCValAssign::BCvt))
3794  return false;
3795 
3796  // Only handle register returns for now.
3797  if (!VA.isRegLoc())
3798  return false;
3799 
3800  unsigned Reg = getRegForValue(RV);
3801  if (Reg == 0)
3802  return false;
3803 
3804  unsigned SrcReg = Reg + VA.getValNo();
3805  unsigned DestReg = VA.getLocReg();
3806  // Avoid a cross-class copy. This is very unlikely.
3807  if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3808  return false;
3809 
3810  EVT RVEVT = TLI.getValueType(DL, RV->getType());
3811  if (!RVEVT.isSimple())
3812  return false;
3813 
3814  // Vectors (of > 1 lane) in big endian need tricky handling.
3815  if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3816  !Subtarget->isLittleEndian())
3817  return false;
3818 
3819  MVT RVVT = RVEVT.getSimpleVT();
3820  if (RVVT == MVT::f128)
3821  return false;
3822 
3823  MVT DestVT = VA.getValVT();
3824  // Special handling for extended integers.
3825  if (RVVT != DestVT) {
3826  if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3827  return false;
3828 
3829  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3830  return false;
3831 
3832  bool IsZExt = Outs[0].Flags.isZExt();
3833  SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3834  if (SrcReg == 0)
3835  return false;
3836  }
3837 
3838  // Make the copy.
3839  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3840  TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3841 
3842  // Add register to return instruction.
3843  RetRegs.push_back(VA.getLocReg());
3844  }
3845 
3846  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3847  TII.get(AArch64::RET_ReallyLR));
3848  for (unsigned RetReg : RetRegs)
3849  MIB.addReg(RetReg, RegState::Implicit);
3850  return true;
3851 }
3852 
3853 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3854  Type *DestTy = I->getType();
3855  Value *Op = I->getOperand(0);
3856  Type *SrcTy = Op->getType();
3857 
3858  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3859  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3860  if (!SrcEVT.isSimple())
3861  return false;
3862  if (!DestEVT.isSimple())
3863  return false;
3864 
3865  MVT SrcVT = SrcEVT.getSimpleVT();
3866  MVT DestVT = DestEVT.getSimpleVT();
3867 
3868  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3869  SrcVT != MVT::i8)
3870  return false;
3871  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3872  DestVT != MVT::i1)
3873  return false;
3874 
3875  unsigned SrcReg = getRegForValue(Op);
3876  if (!SrcReg)
3877  return false;
3878  bool SrcIsKill = hasTrivialKill(Op);
3879 
3880  // If we're truncating from i64 to a smaller non-legal type then generate an
3881  // AND. Otherwise, we know the high bits are undefined and a truncate only
3882  // generate a COPY. We cannot mark the source register also as result
3883  // register, because this can incorrectly transfer the kill flag onto the
3884  // source register.
3885  unsigned ResultReg;
3886  if (SrcVT == MVT::i64) {
3887  uint64_t Mask = 0;
3888  switch (DestVT.SimpleTy) {
3889  default:
3890  // Trunc i64 to i32 is handled by the target-independent fast-isel.
3891  return false;
3892  case MVT::i1:
3893  Mask = 0x1;
3894  break;
3895  case MVT::i8:
3896  Mask = 0xff;
3897  break;
3898  case MVT::i16:
3899  Mask = 0xffff;
3900  break;
3901  }
3902  // Issue an extract_subreg to get the lower 32-bits.
3903  unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3904  AArch64::sub_32);
3905  // Create the AND instruction which performs the actual truncation.
3906  ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3907  assert(ResultReg && "Unexpected AND instruction emission failure.");
3908  } else {
3909  ResultReg = createResultReg(&AArch64::GPR32RegClass);
3910  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3911  TII.get(TargetOpcode::COPY), ResultReg)
3912  .addReg(SrcReg, getKillRegState(SrcIsKill));
3913  }
3914 
3915  updateValueMap(I, ResultReg);
3916  return true;
3917 }
3918 
3919 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3920  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3921  DestVT == MVT::i64) &&
3922  "Unexpected value type.");
3923  // Handle i8 and i16 as i32.
3924  if (DestVT == MVT::i8 || DestVT == MVT::i16)
3925  DestVT = MVT::i32;
3926 
3927  if (IsZExt) {
3928  unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3929  assert(ResultReg && "Unexpected AND instruction emission failure.");
3930  if (DestVT == MVT::i64) {
3931  // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
3932  // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
3933  unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3934  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3935  TII.get(AArch64::SUBREG_TO_REG), Reg64)
3936  .addImm(0)
3937  .addReg(ResultReg)
3938  .addImm(AArch64::sub_32);
3939  ResultReg = Reg64;
3940  }
3941  return ResultReg;
3942  } else {
3943  if (DestVT == MVT::i64) {
3944  // FIXME: We're SExt i1 to i64.
3945  return 0;
3946  }
3947  return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3948  /*TODO:IsKill=*/false, 0, 0);
3949  }
3950 }
3951 
3952 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3953  unsigned Op1, bool Op1IsKill) {
3954  unsigned Opc, ZReg;
3955  switch (RetVT.SimpleTy) {
3956  default: return 0;
3957  case MVT::i8:
3958  case MVT::i16:
3959  case MVT::i32:
3960  RetVT = MVT::i32;
3961  Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3962  case MVT::i64:
3963  Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3964  }
3965 
3966  const TargetRegisterClass *RC =
3967  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3968  return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3969  /*IsKill=*/ZReg, true);
3970 }
3971 
3972 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3973  unsigned Op1, bool Op1IsKill) {
3974  if (RetVT != MVT::i64)
3975  return 0;
3976 
3977  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3978  Op0, Op0IsKill, Op1, Op1IsKill,
3979  AArch64::XZR, /*IsKill=*/true);
3980 }
3981 
3982 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3983  unsigned Op1, bool Op1IsKill) {
3984  if (RetVT != MVT::i64)
3985  return 0;
3986 
3987  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3988  Op0, Op0IsKill, Op1, Op1IsKill,
3989  AArch64::XZR, /*IsKill=*/true);
3990 }
3991 
3992 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3993  unsigned Op1Reg, bool Op1IsKill) {
3994  unsigned Opc = 0;
3995  bool NeedTrunc = false;
3996  uint64_t Mask = 0;
3997  switch (RetVT.SimpleTy) {
3998  default: return 0;
3999  case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4000  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4001  case MVT::i32: Opc = AArch64::LSLVWr; break;
4002  case MVT::i64: Opc = AArch64::LSLVXr; break;
4003  }
4004 
4005  const TargetRegisterClass *RC =
4006  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4007  if (NeedTrunc) {
4008  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4009  Op1IsKill = true;
4010  }
4011  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4012  Op1IsKill);
4013  if (NeedTrunc)
4014  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4015  return ResultReg;
4016 }
4017 
4018 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4019  bool Op0IsKill, uint64_t Shift,
4020  bool IsZExt) {
4021  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4022  "Unexpected source/return type pair.");
4023  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4024  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4025  "Unexpected source value type.");
4026  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4027  RetVT == MVT::i64) && "Unexpected return value type.");
4028 
4029  bool Is64Bit = (RetVT == MVT::i64);
4030  unsigned RegSize = Is64Bit ? 64 : 32;
4031  unsigned DstBits = RetVT.getSizeInBits();
4032  unsigned SrcBits = SrcVT.getSizeInBits();
4033  const TargetRegisterClass *RC =
4034  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4035 
4036  // Just emit a copy for "zero" shifts.
4037  if (Shift == 0) {
4038  if (RetVT == SrcVT) {
4039  unsigned ResultReg = createResultReg(RC);
4040  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4041  TII.get(TargetOpcode::COPY), ResultReg)
4042  .addReg(Op0, getKillRegState(Op0IsKill));
4043  return ResultReg;
4044  } else
4045  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4046  }
4047 
4048  // Don't deal with undefined shifts.
4049  if (Shift >= DstBits)
4050  return 0;
4051 
4052  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4053  // {S|U}BFM Wd, Wn, #r, #s
4054  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4055 
4056  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4057  // %2 = shl i16 %1, 4
4058  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4059  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4060  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4061  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4062 
4063  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4064  // %2 = shl i16 %1, 8
4065  // Wd<32+7-24,32-24> = Wn<7:0>
4066  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4067  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4068  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4069 
4070  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4071  // %2 = shl i16 %1, 12
4072  // Wd<32+3-20,32-20> = Wn<3:0>
4073  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4074  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4075  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4076 
4077  unsigned ImmR = RegSize - Shift;
4078  // Limit the width to the length of the source type.
4079  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4080  static const unsigned OpcTable[2][2] = {
4081  {AArch64::SBFMWri, AArch64::SBFMXri},
4082  {AArch64::UBFMWri, AArch64::UBFMXri}
4083  };
4084  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4085  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4086  unsigned TmpReg = MRI.createVirtualRegister(RC);
4087  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4088  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4089  .addImm(0)
4090  .addReg(Op0, getKillRegState(Op0IsKill))
4091  .addImm(AArch64::sub_32);
4092  Op0 = TmpReg;
4093  Op0IsKill = true;
4094  }
4095  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4096 }
4097 
4098 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4099  unsigned Op1Reg, bool Op1IsKill) {
4100  unsigned Opc = 0;
4101  bool NeedTrunc = false;
4102  uint64_t Mask = 0;
4103  switch (RetVT.SimpleTy) {
4104  default: return 0;
4105  case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4106  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4107  case MVT::i32: Opc = AArch64::LSRVWr; break;
4108  case MVT::i64: Opc = AArch64::LSRVXr; break;
4109  }
4110 
4111  const TargetRegisterClass *RC =
4112  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4113  if (NeedTrunc) {
4114  Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4115  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4116  Op0IsKill = Op1IsKill = true;
4117  }
4118  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4119  Op1IsKill);
4120  if (NeedTrunc)
4121  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4122  return ResultReg;
4123 }
4124 
4125 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4126  bool Op0IsKill, uint64_t Shift,
4127  bool IsZExt) {
4128  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4129  "Unexpected source/return type pair.");
4130  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4131  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4132  "Unexpected source value type.");
4133  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4134  RetVT == MVT::i64) && "Unexpected return value type.");
4135 
4136  bool Is64Bit = (RetVT == MVT::i64);
4137  unsigned RegSize = Is64Bit ? 64 : 32;
4138  unsigned DstBits = RetVT.getSizeInBits();
4139  unsigned SrcBits = SrcVT.getSizeInBits();
4140  const TargetRegisterClass *RC =
4141  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4142 
4143  // Just emit a copy for "zero" shifts.
4144  if (Shift == 0) {
4145  if (RetVT == SrcVT) {
4146  unsigned ResultReg = createResultReg(RC);
4147  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4148  TII.get(TargetOpcode::COPY), ResultReg)
4149  .addReg(Op0, getKillRegState(Op0IsKill));
4150  return ResultReg;
4151  } else
4152  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4153  }
4154 
4155  // Don't deal with undefined shifts.
4156  if (Shift >= DstBits)
4157  return 0;
4158 
4159  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4160  // {S|U}BFM Wd, Wn, #r, #s
4161  // Wd<s-r:0> = Wn<s:r> when r <= s
4162 
4163  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4164  // %2 = lshr i16 %1, 4
4165  // Wd<7-4:0> = Wn<7:4>
4166  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4167  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4168  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4169 
4170  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4171  // %2 = lshr i16 %1, 8
4172  // Wd<7-7,0> = Wn<7:7>
4173  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4174  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4175  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4176 
4177  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4178  // %2 = lshr i16 %1, 12
4179  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4180  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4181  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4182  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4183 
4184  if (Shift >= SrcBits && IsZExt)
4185  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4186 
4187  // It is not possible to fold a sign-extend into the LShr instruction. In this
4188  // case emit a sign-extend.
4189  if (!IsZExt) {
4190  Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4191  if (!Op0)
4192  return 0;
4193  Op0IsKill = true;
4194  SrcVT = RetVT;
4195  SrcBits = SrcVT.getSizeInBits();
4196  IsZExt = true;
4197  }
4198 
4199  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4200  unsigned ImmS = SrcBits - 1;
4201  static const unsigned OpcTable[2][2] = {
4202  {AArch64::SBFMWri, AArch64::SBFMXri},
4203  {AArch64::UBFMWri, AArch64::UBFMXri}
4204  };
4205  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4206  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4207  unsigned TmpReg = MRI.createVirtualRegister(RC);
4208  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4209  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4210  .addImm(0)
4211  .addReg(Op0, getKillRegState(Op0IsKill))
4212  .addImm(AArch64::sub_32);
4213  Op0 = TmpReg;
4214  Op0IsKill = true;
4215  }
4216  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4217 }
4218 
4219 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4220  unsigned Op1Reg, bool Op1IsKill) {
4221  unsigned Opc = 0;
4222  bool NeedTrunc = false;
4223  uint64_t Mask = 0;
4224  switch (RetVT.SimpleTy) {
4225  default: return 0;
4226  case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4227  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4228  case MVT::i32: Opc = AArch64::ASRVWr; break;
4229  case MVT::i64: Opc = AArch64::ASRVXr; break;
4230  }
4231 
4232  const TargetRegisterClass *RC =
4233  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4234  if (NeedTrunc) {
4235  Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4236  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4237  Op0IsKill = Op1IsKill = true;
4238  }
4239  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4240  Op1IsKill);
4241  if (NeedTrunc)
4242  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4243  return ResultReg;
4244 }
4245 
4246 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4247  bool Op0IsKill, uint64_t Shift,
4248  bool IsZExt) {
4249  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4250  "Unexpected source/return type pair.");
4251  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4252  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4253  "Unexpected source value type.");
4254  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4255  RetVT == MVT::i64) && "Unexpected return value type.");
4256 
4257  bool Is64Bit = (RetVT == MVT::i64);
4258  unsigned RegSize = Is64Bit ? 64 : 32;
4259  unsigned DstBits = RetVT.getSizeInBits();
4260  unsigned SrcBits = SrcVT.getSizeInBits();
4261  const TargetRegisterClass *RC =
4262  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4263 
4264  // Just emit a copy for "zero" shifts.
4265  if (Shift == 0) {
4266  if (RetVT == SrcVT) {
4267  unsigned ResultReg = createResultReg(RC);
4268  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4269  TII.get(TargetOpcode::COPY), ResultReg)
4270  .addReg(Op0, getKillRegState(Op0IsKill));
4271  return ResultReg;
4272  } else
4273  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4274  }
4275 
4276  // Don't deal with undefined shifts.
4277  if (Shift >= DstBits)
4278  return 0;
4279 
4280  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4281  // {S|U}BFM Wd, Wn, #r, #s
4282  // Wd<s-r:0> = Wn<s:r> when r <= s
4283 
4284  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4285  // %2 = ashr i16 %1, 4
4286  // Wd<7-4:0> = Wn<7:4>
4287  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4288  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4289  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4290 
4291  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4292  // %2 = ashr i16 %1, 8
4293  // Wd<7-7,0> = Wn<7:7>
4294  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4295  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4296  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4297 
4298  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4299  // %2 = ashr i16 %1, 12
4300  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4301  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4302  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4303  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4304 
4305  if (Shift >= SrcBits && IsZExt)
4306  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4307 
4308  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4309  unsigned ImmS = SrcBits - 1;
4310  static const unsigned OpcTable[2][2] = {
4311  {AArch64::SBFMWri, AArch64::SBFMXri},
4312  {AArch64::UBFMWri, AArch64::UBFMXri}
4313  };
4314  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4315  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4316  unsigned TmpReg = MRI.createVirtualRegister(RC);
4317  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4318  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4319  .addImm(0)
4320  .addReg(Op0, getKillRegState(Op0IsKill))
4321  .addImm(AArch64::sub_32);
4322  Op0 = TmpReg;
4323  Op0IsKill = true;
4324  }
4325  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4326 }
4327 
4328 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4329  bool IsZExt) {
4330  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4331 
4332  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4333  // DestVT are odd things, so test to make sure that they are both types we can
4334  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4335  // bail out to SelectionDAG.
4336  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4337  (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4338  ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4339  (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4340  return 0;
4341 
4342  unsigned Opc;
4343  unsigned Imm = 0;
4344 
4345  switch (SrcVT.SimpleTy) {
4346  default:
4347  return 0;
4348  case MVT::i1:
4349  return emiti1Ext(SrcReg, DestVT, IsZExt);
4350  case MVT::i8:
4351  if (DestVT == MVT::i64)
4352  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4353  else
4354  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4355  Imm = 7;
4356  break;
4357  case MVT::i16:
4358  if (DestVT == MVT::i64)
4359  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4360  else
4361  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4362  Imm = 15;
4363  break;
4364  case MVT::i32:
4365  assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4366  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4367  Imm = 31;
4368  break;
4369  }
4370 
4371  // Handle i8 and i16 as i32.
4372  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4373  DestVT = MVT::i32;
4374  else if (DestVT == MVT::i64) {
4375  unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4376  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4377  TII.get(AArch64::SUBREG_TO_REG), Src64)
4378  .addImm(0)
4379  .addReg(SrcReg)
4380  .addImm(AArch64::sub_32);
4381  SrcReg = Src64;
4382  }
4383 
4384  const TargetRegisterClass *RC =
4385  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4386  return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4387 }
4388 
4389 static bool isZExtLoad(const MachineInstr *LI) {
4390  switch (LI->getOpcode()) {
4391  default:
4392  return false;
4393  case AArch64::LDURBBi:
4394  case AArch64::LDURHHi:
4395  case AArch64::LDURWi:
4396  case AArch64::LDRBBui:
4397  case AArch64::LDRHHui:
4398  case AArch64::LDRWui:
4399  case AArch64::LDRBBroX:
4400  case AArch64::LDRHHroX:
4401  case AArch64::LDRWroX:
4402  case AArch64::LDRBBroW:
4403  case AArch64::LDRHHroW:
4404  case AArch64::LDRWroW:
4405  return true;
4406  }
4407 }
4408 
4409 static bool isSExtLoad(const MachineInstr *LI) {
4410  switch (LI->getOpcode()) {
4411  default:
4412  return false;
4413  case AArch64::LDURSBWi:
4414  case AArch64::LDURSHWi:
4415  case AArch64::LDURSBXi:
4416  case AArch64::LDURSHXi:
4417  case AArch64::LDURSWi:
4418  case AArch64::LDRSBWui:
4419  case AArch64::LDRSHWui:
4420  case AArch64::LDRSBXui:
4421  case AArch64::LDRSHXui:
4422  case AArch64::LDRSWui:
4423  case AArch64::LDRSBWroX:
4424  case AArch64::LDRSHWroX:
4425  case AArch64::LDRSBXroX:
4426  case AArch64::LDRSHXroX:
4427  case AArch64::LDRSWroX:
4428  case AArch64::LDRSBWroW:
4429  case AArch64::LDRSHWroW:
4430  case AArch64::LDRSBXroW:
4431  case AArch64::LDRSHXroW:
4432  case AArch64::LDRSWroW:
4433  return true;
4434  }
4435 }
4436 
4437 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4438  MVT SrcVT) {
4439  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4440  if (!LI || !LI->hasOneUse())
4441  return false;
4442 
4443  // Check if the load instruction has already been selected.
4444  unsigned Reg = lookUpRegForValue(LI);
4445  if (!Reg)
4446  return false;
4447 
4448  MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4449  if (!MI)
4450  return false;
4451 
4452  // Check if the correct load instruction has been emitted - SelectionDAG might
4453  // have emitted a zero-extending load, but we need a sign-extending load.
4454  bool IsZExt = isa<ZExtInst>(I);
4455  const auto *LoadMI = MI;
4456  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4457  LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4458  unsigned LoadReg = MI->getOperand(1).getReg();
4459  LoadMI = MRI.getUniqueVRegDef(LoadReg);
4460  assert(LoadMI && "Expected valid instruction");
4461  }
4462  if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4463  return false;
4464 
4465  // Nothing to be done.
4466  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4467  updateValueMap(I, Reg);
4468  return true;
4469  }
4470 
4471  if (IsZExt) {
4472  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4473  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4474  TII.get(AArch64::SUBREG_TO_REG), Reg64)
4475  .addImm(0)
4476  .addReg(Reg, getKillRegState(true))
4477  .addImm(AArch64::sub_32);
4478  Reg = Reg64;
4479  } else {
4480  assert((MI->getOpcode() == TargetOpcode::COPY &&
4481  MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4482  "Expected copy instruction");
4483  Reg = MI->getOperand(1).getReg();
4484  MI->eraseFromParent();
4485  }
4486  updateValueMap(I, Reg);
4487  return true;
4488 }
4489 
4490 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4491  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4492  "Unexpected integer extend instruction.");
4493  MVT RetVT;
4494  MVT SrcVT;
4495  if (!isTypeSupported(I->getType(), RetVT))
4496  return false;
4497 
4498  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4499  return false;
4500 
4501  // Try to optimize already sign-/zero-extended values from load instructions.
4502  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4503  return true;
4504 
4505  unsigned SrcReg = getRegForValue(I->getOperand(0));
4506  if (!SrcReg)
4507  return false;
4508  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4509 
4510  // Try to optimize already sign-/zero-extended values from function arguments.
4511  bool IsZExt = isa<ZExtInst>(I);
4512  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4513  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4514  if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4515  unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4516  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4517  TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4518  .addImm(0)
4519  .addReg(SrcReg, getKillRegState(SrcIsKill))
4520  .addImm(AArch64::sub_32);
4521  SrcReg = ResultReg;
4522  }
4523  // Conservatively clear all kill flags from all uses, because we are
4524  // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4525  // level. The result of the instruction at IR level might have been
4526  // trivially dead, which is now not longer true.
4527  unsigned UseReg = lookUpRegForValue(I);
4528  if (UseReg)
4529  MRI.clearKillFlags(UseReg);
4530 
4531  updateValueMap(I, SrcReg);
4532  return true;
4533  }
4534  }
4535 
4536  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4537  if (!ResultReg)
4538  return false;
4539 
4540  updateValueMap(I, ResultReg);
4541  return true;
4542 }
4543 
4544 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4545  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4546  if (!DestEVT.isSimple())
4547  return false;
4548 
4549  MVT DestVT = DestEVT.getSimpleVT();
4550  if (DestVT != MVT::i64 && DestVT != MVT::i32)
4551  return false;
4552 
4553  unsigned DivOpc;
4554  bool Is64bit = (DestVT == MVT::i64);
4555  switch (ISDOpcode) {
4556  default:
4557  return false;
4558  case ISD::SREM:
4559  DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4560  break;
4561  case ISD::UREM:
4562  DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4563  break;
4564  }
4565  unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4566  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4567  if (!Src0Reg)
4568  return false;
4569  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4570 
4571  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4572  if (!Src1Reg)
4573  return false;
4574  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4575 
4576  const TargetRegisterClass *RC =
4577  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4578  unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4579  Src1Reg, /*IsKill=*/false);
4580  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4581  // The remainder is computed as numerator - (quotient * denominator) using the
4582  // MSUB instruction.
4583  unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4584  Src1Reg, Src1IsKill, Src0Reg,
4585  Src0IsKill);
4586  updateValueMap(I, ResultReg);
4587  return true;
4588 }
4589 
4590 bool AArch64FastISel::selectMul(const Instruction *I) {
4591  MVT VT;
4592  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4593  return false;
4594 
4595  if (VT.isVector())
4596  return selectBinaryOp(I, ISD::MUL);
4597 
4598  const Value *Src0 = I->getOperand(0);
4599  const Value *Src1 = I->getOperand(1);
4600  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4601  if (C->getValue().isPowerOf2())
4602  std::swap(Src0, Src1);
4603 
4604  // Try to simplify to a shift instruction.
4605  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4606  if (C->getValue().isPowerOf2()) {
4607  uint64_t ShiftVal = C->getValue().logBase2();
4608  MVT SrcVT = VT;
4609  bool IsZExt = true;
4610  if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4611  if (!isIntExtFree(ZExt)) {
4612  MVT VT;
4613  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4614  SrcVT = VT;
4615  IsZExt = true;
4616  Src0 = ZExt->getOperand(0);
4617  }
4618  }
4619  } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4620  if (!isIntExtFree(SExt)) {
4621  MVT VT;
4622  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4623  SrcVT = VT;
4624  IsZExt = false;
4625  Src0 = SExt->getOperand(0);
4626  }
4627  }
4628  }
4629 
4630  unsigned Src0Reg = getRegForValue(Src0);
4631  if (!Src0Reg)
4632  return false;
4633  bool Src0IsKill = hasTrivialKill(Src0);
4634 
4635  unsigned ResultReg =
4636  emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4637 
4638  if (ResultReg) {
4639  updateValueMap(I, ResultReg);
4640  return true;
4641  }
4642  }
4643 
4644  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4645  if (!Src0Reg)
4646  return false;
4647  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4648 
4649  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4650  if (!Src1Reg)
4651  return false;
4652  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4653 
4654  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4655 
4656  if (!ResultReg)
4657  return false;
4658 
4659  updateValueMap(I, ResultReg);
4660  return true;
4661 }
4662 
4663 bool AArch64FastISel::selectShift(const Instruction *I) {
4664  MVT RetVT;
4665  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4666  return false;
4667 
4668  if (RetVT.isVector())
4669  return selectOperator(I, I->getOpcode());
4670 
4671  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4672  unsigned ResultReg = 0;
4673  uint64_t ShiftVal = C->getZExtValue();
4674  MVT SrcVT = RetVT;
4675  bool IsZExt = I->getOpcode() != Instruction::AShr;
4676  const Value *Op0 = I->getOperand(0);
4677  if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4678  if (!isIntExtFree(ZExt)) {
4679  MVT TmpVT;
4680  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4681  SrcVT = TmpVT;
4682  IsZExt = true;
4683  Op0 = ZExt->getOperand(0);
4684  }
4685  }
4686  } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4687  if (!isIntExtFree(SExt)) {
4688  MVT TmpVT;
4689  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4690  SrcVT = TmpVT;
4691  IsZExt = false;
4692  Op0 = SExt->getOperand(0);
4693  }
4694  }
4695  }
4696 
4697  unsigned Op0Reg = getRegForValue(Op0);
4698  if (!Op0Reg)
4699  return false;
4700  bool Op0IsKill = hasTrivialKill(Op0);
4701 
4702  switch (I->getOpcode()) {
4703  default: llvm_unreachable("Unexpected instruction.");
4704  case Instruction::Shl:
4705  ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4706  break;
4707  case Instruction::AShr:
4708  ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4709  break;
4710  case Instruction::LShr:
4711  ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4712  break;
4713  }
4714  if (!ResultReg)
4715  return false;
4716 
4717  updateValueMap(I, ResultReg);
4718  return true;
4719  }
4720 
4721  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4722  if (!Op0Reg)
4723  return false;
4724  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4725 
4726  unsigned Op1Reg = getRegForValue(I->getOperand(1));
4727  if (!Op1Reg)
4728  return false;
4729  bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4730 
4731  unsigned ResultReg = 0;
4732  switch (I->getOpcode()) {
4733  default: llvm_unreachable("Unexpected instruction.");
4734  case Instruction::Shl:
4735  ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4736  break;
4737  case Instruction::AShr:
4738  ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4739  break;
4740  case Instruction::LShr:
4741  ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4742  break;
4743  }
4744 
4745  if (!ResultReg)
4746  return false;
4747 
4748  updateValueMap(I, ResultReg);
4749  return true;
4750 }
4751 
4752 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4753  MVT RetVT, SrcVT;
4754 
4755  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4756  return false;
4757  if (!isTypeLegal(I->getType(), RetVT))
4758  return false;
4759 
4760  unsigned Opc;
4761  if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4762  Opc = AArch64::FMOVWSr;
4763  else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4764  Opc = AArch64::FMOVXDr;
4765  else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4766  Opc = AArch64::FMOVSWr;
4767  else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4768  Opc = AArch64::FMOVDXr;
4769  else
4770  return false;
4771 
4772  const TargetRegisterClass *RC = nullptr;
4773  switch (RetVT.SimpleTy) {
4774  default: llvm_unreachable("Unexpected value type.");
4775  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4776  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4777  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4778  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4779  }
4780  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4781  if (!Op0Reg)
4782  return false;
4783  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4784  unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4785 
4786  if (!ResultReg)
4787  return false;
4788 
4789  updateValueMap(I, ResultReg);
4790  return true;
4791 }
4792 
4793 bool AArch64FastISel::selectFRem(const Instruction *I) {
4794  MVT RetVT;
4795  if (!isTypeLegal(I->getType(), RetVT))
4796  return false;
4797 
4798  RTLIB::Libcall LC;
4799  switch (RetVT.SimpleTy) {
4800  default:
4801  return false;
4802  case MVT::f32:
4803  LC = RTLIB::REM_F32;
4804  break;
4805  case MVT::f64:
4806  LC = RTLIB::REM_F64;
4807  break;
4808  }
4809 
4810  ArgListTy Args;
4811  Args.reserve(I->getNumOperands());
4812 
4813  // Populate the argument list.
4814  for (auto &Arg : I->operands()) {
4815  ArgListEntry Entry;
4816  Entry.Val = Arg;
4817  Entry.Ty = Arg->getType();
4818  Args.push_back(Entry);
4819  }
4820 
4821  CallLoweringInfo CLI;
4822  MCContext &Ctx = MF->getContext();
4823  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4824  TLI.getLibcallName(LC), std::move(Args));
4825  if (!lowerCallTo(CLI))
4826  return false;
4827  updateValueMap(I, CLI.ResultReg);
4828  return true;
4829 }
4830 
4831 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4832  MVT VT;
4833  if (!isTypeLegal(I->getType(), VT))
4834  return false;
4835 
4836  if (!isa<ConstantInt>(I->getOperand(1)))
4837  return selectBinaryOp(I, ISD::SDIV);
4838 
4839  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4840  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4841  !(C.isPowerOf2() || (-C).isPowerOf2()))
4842  return selectBinaryOp(I, ISD::SDIV);
4843 
4844  unsigned Lg2 = C.countTrailingZeros();
4845  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4846  if (!Src0Reg)
4847  return false;
4848  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4849 
4850  if (cast<BinaryOperator>(I)->isExact()) {
4851  unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4852  if (!ResultReg)
4853  return false;
4854  updateValueMap(I, ResultReg);
4855  return true;
4856  }
4857 
4858  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4859  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4860  if (!AddReg)
4861  return false;
4862 
4863  // (Src0 < 0) ? Pow2 - 1 : 0;
4864  if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4865  return false;
4866 
4867  unsigned SelectOpc;
4868  const TargetRegisterClass *RC;
4869  if (VT == MVT::i64) {
4870  SelectOpc = AArch64::CSELXr;
4871  RC = &AArch64::GPR64RegClass;
4872  } else {
4873  SelectOpc = AArch64::CSELWr;
4874  RC = &AArch64::GPR32RegClass;
4875  }
4876  unsigned SelectReg =
4877  fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4878  Src0IsKill, AArch64CC::LT);
4879  if (!SelectReg)
4880  return false;
4881 
4882  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4883  // negate the result.
4884  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4885  unsigned ResultReg;
4886  if (C.isNegative())
4887  ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4888  SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4889  else
4890  ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4891 
4892  if (!ResultReg)
4893  return false;
4894 
4895  updateValueMap(I, ResultReg);
4896  return true;
4897 }
4898 
4899 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4900 /// have to duplicate it for AArch64, because otherwise we would fail during the
4901 /// sign-extend emission.
4902 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4903  unsigned IdxN = getRegForValue(Idx);
4904  if (IdxN == 0)
4905  // Unhandled operand. Halt "fast" selection and bail.
4906  return std::pair<unsigned, bool>(0, false);
4907 
4908  bool IdxNIsKill = hasTrivialKill(Idx);
4909 
4910  // If the index is smaller or larger than intptr_t, truncate or extend it.
4911  MVT PtrVT = TLI.getPointerTy(DL);
4912  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4913  if (IdxVT.bitsLT(PtrVT)) {
4914  IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4915  IdxNIsKill = true;
4916  } else if (IdxVT.bitsGT(PtrVT))
4917  llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4918  return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4919 }
4920 
4921 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4922 /// duplicate it for AArch64, because otherwise we would bail out even for
4923 /// simple cases. This is because the standard fastEmit functions don't cover
4924 /// MUL at all and ADD is lowered very inefficientily.
4925 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4926  unsigned N = getRegForValue(I->getOperand(0));
4927  if (!N)
4928  return false;
4929  bool NIsKill = hasTrivialKill(I->getOperand(0));
4930 
4931  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4932  // into a single N = N + TotalOffset.
4933  uint64_t TotalOffs = 0;
4934  MVT VT = TLI.getPointerTy(DL);
4935  for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4936  GTI != E; ++GTI) {
4937  const Value *Idx = GTI.getOperand();
4938  if (auto *StTy = GTI.getStructTypeOrNull()) {
4939  unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4940  // N = N + Offset
4941  if (Field)
4942  TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4943  } else {
4944  Type *Ty = GTI.getIndexedType();
4945 
4946  // If this is a constant subscript, handle it quickly.
4947  if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4948  if (CI->isZero())
4949  continue;
4950  // N = N + Offset
4951  TotalOffs +=
4952  DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4953  continue;
4954  }
4955  if (TotalOffs) {
4956  N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4957  if (!N)
4958  return false;
4959  NIsKill = true;
4960  TotalOffs = 0;
4961  }
4962 
4963  // N = N + Idx * ElementSize;
4964  uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4965  std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
4966  unsigned IdxN = Pair.first;
4967  bool IdxNIsKill = Pair.second;
4968  if (!IdxN)
4969  return false;
4970 
4971  if (ElementSize != 1) {
4972  unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4973  if (!C)
4974  return false;
4975  IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
4976  if (!IdxN)
4977  return false;
4978  IdxNIsKill = true;
4979  }
4980  N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
4981  if (!N)
4982  return false;
4983  }
4984  }
4985  if (TotalOffs) {
4986  N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4987  if (!N)
4988  return false;
4989  }
4990  updateValueMap(I, N);
4991  return true;
4992 }
4993 
4994 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4995  assert(TM.getOptLevel() == CodeGenOpt::None &&
4996  "cmpxchg survived AtomicExpand at optlevel > -O0");
4997 
4998  auto *RetPairTy = cast<StructType>(I->getType());
4999  Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5000  assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5001  "cmpxchg has a non-i1 status result");
5002 
5003  MVT VT;
5004  if (!isTypeLegal(RetTy, VT))
5005  return false;
5006 
5007  const TargetRegisterClass *ResRC;
5008  unsigned Opc, CmpOpc;
5009  // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5010  // extractvalue selection doesn't support that.
5011  if (VT == MVT::i32) {
5012  Opc = AArch64::CMP_SWAP_32;
5013  CmpOpc = AArch64::SUBSWrs;
5014  ResRC = &AArch64::GPR32RegClass;
5015  } else if (VT == MVT::i64) {
5016  Opc = AArch64::CMP_SWAP_64;
5017  CmpOpc = AArch64::SUBSXrs;
5018  ResRC = &AArch64::GPR64RegClass;
5019  } else {
5020  return false;
5021  }
5022 
5023  const MCInstrDesc &II = TII.get(Opc);
5024 
5025  const unsigned AddrReg = constrainOperandRegClass(
5026  II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5027  const unsigned DesiredReg = constrainOperandRegClass(
5028  II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5029  const unsigned NewReg = constrainOperandRegClass(
5030  II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5031 
5032  const unsigned ResultReg1 = createResultReg(ResRC);
5033  const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5034  const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5035 
5036  // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5037  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5038  .addDef(ResultReg1)
5039  .addDef(ScratchReg)
5040  .addUse(AddrReg)
5041  .addUse(DesiredReg)
5042  .addUse(NewReg);
5043 
5044  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5045  .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5046  .addUse(ResultReg1)
5047  .addUse(DesiredReg)
5048  .addImm(0);
5049 
5050  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5051  .addDef(ResultReg2)
5052  .addUse(AArch64::WZR)
5053  .addUse(AArch64::WZR)
5055 
5056  assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5057  updateValueMap(I, ResultReg1, 2);
5058  return true;
5059 }
5060 
5061 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5062  switch (I->getOpcode()) {
5063  default:
5064  break;
5065  case Instruction::Add:
5066  case Instruction::Sub:
5067  return selectAddSub(I);
5068  case Instruction::Mul:
5069  return selectMul(I);
5070  case Instruction::SDiv:
5071  return selectSDiv(I);
5072  case Instruction::SRem:
5073  if (!selectBinaryOp(I, ISD::SREM))
5074  return selectRem(I, ISD::SREM);
5075  return true;
5076  case Instruction::URem:
5077  if (!selectBinaryOp(I, ISD::UREM))
5078  return selectRem(I, ISD::UREM);
5079  return true;
5080  case Instruction::Shl:
5081  case Instruction::LShr:
5082  case Instruction::AShr:
5083  return selectShift(I);
5084  case Instruction::And:
5085  case Instruction::Or:
5086  case Instruction::Xor:
5087  return selectLogicalOp(I);
5088  case Instruction::Br:
5089  return selectBranch(I);
5090  case Instruction::IndirectBr:
5091  return selectIndirectBr(I);
5092  case Instruction::BitCast:
5093  if (!FastISel::selectBitCast(I))
5094  return selectBitCast(I);
5095  return true;
5096  case Instruction::FPToSI:
5097  if (!selectCast(I, ISD::FP_TO_SINT))
5098  return selectFPToInt(I, /*Signed=*/true);
5099  return true;
5100  case Instruction::FPToUI:
5101  return selectFPToInt(I, /*Signed=*/false);
5102  case Instruction::ZExt:
5103  case Instruction::SExt:
5104  return selectIntExt(I);
5105  case Instruction::Trunc:
5106  if (!selectCast(I, ISD::TRUNCATE))
5107  return selectTrunc(I);
5108  return true;
5109  case Instruction::FPExt:
5110  return selectFPExt(I);
5111  case Instruction::FPTrunc:
5112  return selectFPTrunc(I);
5113  case Instruction::SIToFP:
5114  if (!selectCast(I, ISD::SINT_TO_FP))
5115  return selectIntToFP(I, /*Signed=*/true);
5116  return true;
5117  case Instruction::UIToFP:
5118  return selectIntToFP(I, /*Signed=*/false);
5119  case Instruction::Load:
5120  return selectLoad(I);
5121  case Instruction::Store:
5122  return selectStore(I);
5123  case Instruction::FCmp:
5124  case Instruction::ICmp:
5125  return selectCmp(I);
5126  case Instruction::Select:
5127  return selectSelect(I);
5128  case Instruction::Ret:
5129  return selectRet(I);
5130  case Instruction::FRem:
5131  return selectFRem(I);
5132  case Instruction::GetElementPtr:
5133  return selectGetElementPtr(I);
5134  case Instruction::AtomicCmpXchg:
5135  return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5136  }
5137 
5138  // fall-back to target-independent instruction selection.
5139  return selectOperator(I, I->getOpcode());
5140  // Silence warnings.
5141  (void)&CC_AArch64_DarwinPCS_VarArg;
5142  (void)&CC_AArch64_Win64_VarArg;
5143 }
5144 
5145 namespace llvm {
5146 
5148  const TargetLibraryInfo *LibInfo) {
5149  return new AArch64FastISel(FuncInfo, LibInfo);
5150 }
5151 
5152 } // end namespace llvm
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:158
void setFrameAddressIsTaken(bool T)
uint64_t CallInst * C
Return a value (possibly void), from a function.
static MVT getIntegerVT(unsigned BitWidth)
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:843
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1542
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
LLVMContext & Context
Atomic ordering constants.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
unsigned constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, unsigned Reg, unsigned OpIdx)
Try to constrain Reg so that it is usable by argument OpIdx of the provided MCInstrDesc II...
Definition: Utils.cpp:46
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:262
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:514
bool isVector() const
Return true if this is a vector value type.
static CondCode getInvertedCondCode(CondCode Code)
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
unsigned char ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
unsigned getSubReg() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
unsigned less or equal
Definition: InstrTypes.h:879
unsigned less than
Definition: InstrTypes.h:878
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:859
unsigned getValNo() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:728
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:869
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
Value * getCondition() const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
void reserve(size_type N)
Definition: SmallVector.h:380
Value * getLength() const
unsigned getFrameRegister(const MachineFunction &MF) const override
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:864
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1611
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:491
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:863
A description of a memory reference used in the backend.
This class represents the LLVM &#39;select&#39; instruction.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:951
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
unsigned getNumArgOperands() const
Return the number of call arguments.
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:97
Class to represent struct types.
Definition: DerivedTypes.h:201
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool isUnsigned() const
Determine if this instruction is using an unsigned comparison.
Definition: InstrTypes.h:1000
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
Reg
All possible values of the reg field in the ModR/M byte.
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:860
This file contains the simple types necessary to represent the attributes associated with functions a...
SimpleValueType SimpleTy
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:290
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
LocInfo getLocInfo() const
This file implements a class to represent arbitrary precision integral constant values and operations...
bool is64BitVector() const
Return true if this is a 64-bit vector type.
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
unsigned getSizeInBits() const
Context object for machine code objects.
Definition: MCContext.h:59
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:201
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:86
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:67
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:862
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:287
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:455
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
const AArch64RegisterInfo * getRegisterInfo() const override
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
static bool isZExtLoad(const MachineInstr *LI)
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:300
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
amdgpu Simplify well known AMD library false Value * Callee
Value * getOperand(unsigned i) const
Definition: User.h:154
unsigned getKillRegState(bool B)
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:498
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:121
bool is128BitVector() const
Return true if this is a 128-bit vector type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:147
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV)...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
succ_range successors()
Definition: InstrTypes.h:267
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:357
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:150
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:596
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Conditional or Unconditional Branch instruction.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Indirect Branch Instruction.
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:426
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool useSmallAddressing() const
static bool isSExtLoad(const MachineInstr *LI)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:853
static const unsigned End
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
op_range operands()
Definition: User.h:222
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:862
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const Value * getCondition() const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:870
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:194
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type...
Extended Value Type.
Definition: ValueTypes.h:34
bool isVolatile() const
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:868
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
signed greater than
Definition: InstrTypes.h:880
The memory access writes data.
const APFloat & getValueAPF() const
Definition: Constants.h:294
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
bool isReleaseOrStronger(AtomicOrdering ao)
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:857
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static AArch64_AM::ShiftExtendType getExtendType(unsigned Imm)
getExtendType - Extract the extend type for operands of arithmetic ops.
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:176
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:194
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:867
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
signed less than
Definition: InstrTypes.h:882
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned char TargetFlags=0) const
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:560
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
signed less or equal
Definition: InstrTypes.h:883
bool selectBitCast(const User *I)
Definition: FastISel.cpp:1313
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:69
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
This file defines the FastISel class.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:241
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:457
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...