LLVM  10.0.0svn
AArch64FastISel.cpp
Go to the documentation of this file.
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
20 #include "Utils/AArch64BaseInfo.h"
21 #include "llvm/ADT/APFloat.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/CodeGen/FastISel.h"
39 #include "llvm/IR/Argument.h"
40 #include "llvm/IR/Attributes.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/Constant.h"
44 #include "llvm/IR/Constants.h"
45 #include "llvm/IR/DataLayout.h"
46 #include "llvm/IR/DerivedTypes.h"
47 #include "llvm/IR/Function.h"
49 #include "llvm/IR/GlobalValue.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/IntrinsicInst.h"
54 #include "llvm/IR/Intrinsics.h"
55 #include "llvm/IR/Operator.h"
56 #include "llvm/IR/Type.h"
57 #include "llvm/IR/User.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/MC/MCInstrDesc.h"
60 #include "llvm/MC/MCRegisterInfo.h"
61 #include "llvm/MC/MCSymbol.h"
63 #include "llvm/Support/Casting.h"
64 #include "llvm/Support/CodeGen.h"
65 #include "llvm/Support/Compiler.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <iterator>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 namespace {
78 
79 class AArch64FastISel final : public FastISel {
80  class Address {
81  public:
82  using BaseKind = enum {
83  RegBase,
84  FrameIndexBase
85  };
86 
87  private:
88  BaseKind Kind = RegBase;
90  union {
91  unsigned Reg;
92  int FI;
93  } Base;
94  unsigned OffsetReg = 0;
95  unsigned Shift = 0;
96  int64_t Offset = 0;
97  const GlobalValue *GV = nullptr;
98 
99  public:
100  Address() { Base.Reg = 0; }
101 
102  void setKind(BaseKind K) { Kind = K; }
103  BaseKind getKind() const { return Kind; }
104  void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
106  bool isRegBase() const { return Kind == RegBase; }
107  bool isFIBase() const { return Kind == FrameIndexBase; }
108 
109  void setReg(unsigned Reg) {
110  assert(isRegBase() && "Invalid base register access!");
111  Base.Reg = Reg;
112  }
113 
114  unsigned getReg() const {
115  assert(isRegBase() && "Invalid base register access!");
116  return Base.Reg;
117  }
118 
119  void setOffsetReg(unsigned Reg) {
120  OffsetReg = Reg;
121  }
122 
123  unsigned getOffsetReg() const {
124  return OffsetReg;
125  }
126 
127  void setFI(unsigned FI) {
128  assert(isFIBase() && "Invalid base frame index access!");
129  Base.FI = FI;
130  }
131 
132  unsigned getFI() const {
133  assert(isFIBase() && "Invalid base frame index access!");
134  return Base.FI;
135  }
136 
137  void setOffset(int64_t O) { Offset = O; }
138  int64_t getOffset() { return Offset; }
139  void setShift(unsigned S) { Shift = S; }
140  unsigned getShift() { return Shift; }
141 
142  void setGlobalValue(const GlobalValue *G) { GV = G; }
143  const GlobalValue *getGlobalValue() { return GV; }
144  };
145 
146  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
147  /// make the right decision when generating code for different targets.
148  const AArch64Subtarget *Subtarget;
150 
151  bool fastLowerArguments() override;
152  bool fastLowerCall(CallLoweringInfo &CLI) override;
153  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
154 
155 private:
156  // Selection routines.
157  bool selectAddSub(const Instruction *I);
158  bool selectLogicalOp(const Instruction *I);
159  bool selectLoad(const Instruction *I);
160  bool selectStore(const Instruction *I);
161  bool selectBranch(const Instruction *I);
162  bool selectIndirectBr(const Instruction *I);
163  bool selectCmp(const Instruction *I);
164  bool selectSelect(const Instruction *I);
165  bool selectFPExt(const Instruction *I);
166  bool selectFPTrunc(const Instruction *I);
167  bool selectFPToInt(const Instruction *I, bool Signed);
168  bool selectIntToFP(const Instruction *I, bool Signed);
169  bool selectRem(const Instruction *I, unsigned ISDOpcode);
170  bool selectRet(const Instruction *I);
171  bool selectTrunc(const Instruction *I);
172  bool selectIntExt(const Instruction *I);
173  bool selectMul(const Instruction *I);
174  bool selectShift(const Instruction *I);
175  bool selectBitCast(const Instruction *I);
176  bool selectFRem(const Instruction *I);
177  bool selectSDiv(const Instruction *I);
178  bool selectGetElementPtr(const Instruction *I);
179  bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
180 
181  // Utility helper routines.
182  bool isTypeLegal(Type *Ty, MVT &VT);
183  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
184  bool isValueAvailable(const Value *V) const;
185  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
186  bool computeCallAddress(const Value *V, Address &Addr);
187  bool simplifyAddress(Address &Addr, MVT VT);
188  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
190  unsigned ScaleFactor, MachineMemOperand *MMO);
191  bool isMemCpySmall(uint64_t Len, unsigned Alignment);
192  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
193  unsigned Alignment);
194  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
195  const Value *Cond);
196  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
197  bool optimizeSelect(const SelectInst *SI);
198  std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
199 
200  // Emit helper routines.
201  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
202  const Value *RHS, bool SetFlags = false,
203  bool WantResult = true, bool IsZExt = false);
204  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
205  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
206  bool SetFlags = false, bool WantResult = true);
207  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
208  bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
209  bool WantResult = true);
210  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
211  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
212  AArch64_AM::ShiftExtendType ShiftType,
213  uint64_t ShiftImm, bool SetFlags = false,
214  bool WantResult = true);
215  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
216  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
218  uint64_t ShiftImm, bool SetFlags = false,
219  bool WantResult = true);
220 
221  // Emit functions.
222  bool emitCompareAndBranch(const BranchInst *BI);
223  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
224  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
225  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
226  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
227  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
228  MachineMemOperand *MMO = nullptr);
229  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
230  MachineMemOperand *MMO = nullptr);
231  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
232  MachineMemOperand *MMO = nullptr);
233  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
234  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
235  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
236  bool SetFlags = false, bool WantResult = true,
237  bool IsZExt = false);
238  unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
239  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
240  bool SetFlags = false, bool WantResult = true,
241  bool IsZExt = false);
242  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
243  unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
244  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
245  unsigned RHSReg, bool RHSIsKill,
246  AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
247  bool WantResult = true);
248  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
249  const Value *RHS);
250  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251  bool LHSIsKill, uint64_t Imm);
252  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
253  bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
254  uint64_t ShiftImm);
255  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
256  unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
257  unsigned Op1, bool Op1IsKill);
258  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
259  unsigned Op1, bool Op1IsKill);
260  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
261  unsigned Op1, bool Op1IsKill);
262  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
263  unsigned Op1Reg, bool Op1IsKill);
264  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
265  uint64_t Imm, bool IsZExt = true);
266  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
267  unsigned Op1Reg, bool Op1IsKill);
268  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
269  uint64_t Imm, bool IsZExt = true);
270  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
271  unsigned Op1Reg, bool Op1IsKill);
272  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
273  uint64_t Imm, bool IsZExt = false);
274 
275  unsigned materializeInt(const ConstantInt *CI, MVT VT);
276  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
277  unsigned materializeGV(const GlobalValue *GV);
278 
279  // Call handling routines.
280 private:
281  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
282  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
283  unsigned &NumBytes);
284  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
285 
286 public:
287  // Backend specific FastISel code.
288  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
289  unsigned fastMaterializeConstant(const Constant *C) override;
290  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
291 
292  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
293  const TargetLibraryInfo *LibInfo)
294  : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
295  Subtarget =
296  &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
297  Context = &FuncInfo.Fn->getContext();
298  }
299 
300  bool fastSelectInstruction(const Instruction *I) override;
301 
302 #include "AArch64GenFastISel.inc"
303 };
304 
305 } // end anonymous namespace
306 
307 /// Check if the sign-/zero-extend will be a noop.
308 static bool isIntExtFree(const Instruction *I) {
309  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
310  "Unexpected integer extend instruction.");
311  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
312  "Unexpected value type.");
313  bool IsZExt = isa<ZExtInst>(I);
314 
315  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
316  if (LI->hasOneUse())
317  return true;
318 
319  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
320  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
321  return true;
322 
323  return false;
324 }
325 
326 /// Determine the implicit scale factor that is applied by a memory
327 /// operation for a given value type.
328 static unsigned getImplicitScaleFactor(MVT VT) {
329  switch (VT.SimpleTy) {
330  default:
331  return 0; // invalid
332  case MVT::i1: // fall-through
333  case MVT::i8:
334  return 1;
335  case MVT::i16:
336  return 2;
337  case MVT::i32: // fall-through
338  case MVT::f32:
339  return 4;
340  case MVT::i64: // fall-through
341  case MVT::f64:
342  return 8;
343  }
344 }
345 
346 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
347  if (CC == CallingConv::WebKit_JS)
348  return CC_AArch64_WebKit_JS;
349  if (CC == CallingConv::GHC)
350  return CC_AArch64_GHC;
351  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
352 }
353 
354 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
355  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
356  "Alloca should always return a pointer.");
357 
358  // Don't handle dynamic allocas.
359  if (!FuncInfo.StaticAllocaMap.count(AI))
360  return 0;
361 
363  FuncInfo.StaticAllocaMap.find(AI);
364 
365  if (SI != FuncInfo.StaticAllocaMap.end()) {
366  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
367  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
368  ResultReg)
369  .addFrameIndex(SI->second)
370  .addImm(0)
371  .addImm(0);
372  return ResultReg;
373  }
374 
375  return 0;
376 }
377 
378 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
379  if (VT > MVT::i64)
380  return 0;
381 
382  if (!CI->isZero())
383  return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
384 
385  // Create a copy from the zero register to materialize a "0" value.
386  const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
387  : &AArch64::GPR32RegClass;
388  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
389  unsigned ResultReg = createResultReg(RC);
390  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
391  ResultReg).addReg(ZeroReg, getKillRegState(true));
392  return ResultReg;
393 }
394 
395 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
396  // Positive zero (+0.0) has to be materialized with a fmov from the zero
397  // register, because the immediate version of fmov cannot encode zero.
398  if (CFP->isNullValue())
399  return fastMaterializeFloatZero(CFP);
400 
401  if (VT != MVT::f32 && VT != MVT::f64)
402  return 0;
403 
404  const APFloat Val = CFP->getValueAPF();
405  bool Is64Bit = (VT == MVT::f64);
406  // This checks to see if we can use FMOV instructions to materialize
407  // a constant, otherwise we have to materialize via the constant pool.
408  int Imm =
409  Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
410  if (Imm != -1) {
411  unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
412  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
413  }
414 
415  // For the MachO large code model materialize the FP constant in code.
416  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
417  unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
418  const TargetRegisterClass *RC = Is64Bit ?
419  &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
420 
421  unsigned TmpReg = createResultReg(RC);
422  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
423  .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
424 
425  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
426  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
427  TII.get(TargetOpcode::COPY), ResultReg)
428  .addReg(TmpReg, getKillRegState(true));
429 
430  return ResultReg;
431  }
432 
433  // Materialize via constant pool. MachineConstantPool wants an explicit
434  // alignment.
435  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
436  if (Align == 0)
437  Align = DL.getTypeAllocSize(CFP->getType());
438 
439  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
440  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
441  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
442  ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
443 
444  unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
445  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
446  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
447  .addReg(ADRPReg)
449  return ResultReg;
450 }
451 
452 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
453  // We can't handle thread-local variables quickly yet.
454  if (GV->isThreadLocal())
455  return 0;
456 
457  // MachO still uses GOT for large code-model accesses, but ELF requires
458  // movz/movk sequences, which FastISel doesn't handle yet.
459  if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
460  return 0;
461 
462  unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
463 
464  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
465  if (!DestEVT.isSimple())
466  return 0;
467 
468  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
469  unsigned ResultReg;
470 
471  if (OpFlags & AArch64II::MO_GOT) {
472  // ADRP + LDRX
473  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
474  ADRPReg)
475  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
476 
477  ResultReg = createResultReg(&AArch64::GPR64RegClass);
478  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
479  ResultReg)
480  .addReg(ADRPReg)
481  .addGlobalAddress(GV, 0,
483  } else {
484  // ADRP + ADDX
485  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
486  ADRPReg)
487  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
488 
489  ResultReg = createResultReg(&AArch64::GPR64spRegClass);
490  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
491  ResultReg)
492  .addReg(ADRPReg)
493  .addGlobalAddress(GV, 0,
495  .addImm(0);
496  }
497  return ResultReg;
498 }
499 
500 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
501  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
502 
503  // Only handle simple types.
504  if (!CEVT.isSimple())
505  return 0;
506  MVT VT = CEVT.getSimpleVT();
507 
508  if (const auto *CI = dyn_cast<ConstantInt>(C))
509  return materializeInt(CI, VT);
510  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
511  return materializeFP(CFP, VT);
512  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
513  return materializeGV(GV);
514 
515  return 0;
516 }
517 
518 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
519  assert(CFP->isNullValue() &&
520  "Floating-point constant is not a positive zero.");
521  MVT VT;
522  if (!isTypeLegal(CFP->getType(), VT))
523  return 0;
524 
525  if (VT != MVT::f32 && VT != MVT::f64)
526  return 0;
527 
528  bool Is64Bit = (VT == MVT::f64);
529  unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
530  unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
531  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
532 }
533 
534 /// Check if the multiply is by a power-of-2 constant.
535 static bool isMulPowOf2(const Value *I) {
536  if (const auto *MI = dyn_cast<MulOperator>(I)) {
537  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
538  if (C->getValue().isPowerOf2())
539  return true;
540  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
541  if (C->getValue().isPowerOf2())
542  return true;
543  }
544  return false;
545 }
546 
547 // Computes the address to get to an object.
548 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
549 {
550  const User *U = nullptr;
551  unsigned Opcode = Instruction::UserOp1;
552  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
553  // Don't walk into other basic blocks unless the object is an alloca from
554  // another block, otherwise it may not have a virtual register assigned.
555  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
556  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
557  Opcode = I->getOpcode();
558  U = I;
559  }
560  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
561  Opcode = C->getOpcode();
562  U = C;
563  }
564 
565  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
566  if (Ty->getAddressSpace() > 255)
567  // Fast instruction selection doesn't support the special
568  // address spaces.
569  return false;
570 
571  switch (Opcode) {
572  default:
573  break;
574  case Instruction::BitCast:
575  // Look through bitcasts.
576  return computeAddress(U->getOperand(0), Addr, Ty);
577 
578  case Instruction::IntToPtr:
579  // Look past no-op inttoptrs.
580  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
581  TLI.getPointerTy(DL))
582  return computeAddress(U->getOperand(0), Addr, Ty);
583  break;
584 
585  case Instruction::PtrToInt:
586  // Look past no-op ptrtoints.
587  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
588  return computeAddress(U->getOperand(0), Addr, Ty);
589  break;
590 
591  case Instruction::GetElementPtr: {
592  Address SavedAddr = Addr;
593  uint64_t TmpOffset = Addr.getOffset();
594 
595  // Iterate through the GEP folding the constants into offsets where
596  // we can.
597  for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
598  GTI != E; ++GTI) {
599  const Value *Op = GTI.getOperand();
600  if (StructType *STy = GTI.getStructTypeOrNull()) {
601  const StructLayout *SL = DL.getStructLayout(STy);
602  unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
603  TmpOffset += SL->getElementOffset(Idx);
604  } else {
605  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
606  while (true) {
607  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
608  // Constant-offset addressing.
609  TmpOffset += CI->getSExtValue() * S;
610  break;
611  }
612  if (canFoldAddIntoGEP(U, Op)) {
613  // A compatible add with a constant operand. Fold the constant.
614  ConstantInt *CI =
615  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
616  TmpOffset += CI->getSExtValue() * S;
617  // Iterate on the other operand.
618  Op = cast<AddOperator>(Op)->getOperand(0);
619  continue;
620  }
621  // Unsupported
622  goto unsupported_gep;
623  }
624  }
625  }
626 
627  // Try to grab the base operand now.
628  Addr.setOffset(TmpOffset);
629  if (computeAddress(U->getOperand(0), Addr, Ty))
630  return true;
631 
632  // We failed, restore everything and try the other options.
633  Addr = SavedAddr;
634 
635  unsupported_gep:
636  break;
637  }
638  case Instruction::Alloca: {
639  const AllocaInst *AI = cast<AllocaInst>(Obj);
641  FuncInfo.StaticAllocaMap.find(AI);
642  if (SI != FuncInfo.StaticAllocaMap.end()) {
643  Addr.setKind(Address::FrameIndexBase);
644  Addr.setFI(SI->second);
645  return true;
646  }
647  break;
648  }
649  case Instruction::Add: {
650  // Adds of constants are common and easy enough.
651  const Value *LHS = U->getOperand(0);
652  const Value *RHS = U->getOperand(1);
653 
654  if (isa<ConstantInt>(LHS))
655  std::swap(LHS, RHS);
656 
657  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
658  Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
659  return computeAddress(LHS, Addr, Ty);
660  }
661 
662  Address Backup = Addr;
663  if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
664  return true;
665  Addr = Backup;
666 
667  break;
668  }
669  case Instruction::Sub: {
670  // Subs of constants are common and easy enough.
671  const Value *LHS = U->getOperand(0);
672  const Value *RHS = U->getOperand(1);
673 
674  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
675  Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
676  return computeAddress(LHS, Addr, Ty);
677  }
678  break;
679  }
680  case Instruction::Shl: {
681  if (Addr.getOffsetReg())
682  break;
683 
684  const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
685  if (!CI)
686  break;
687 
688  unsigned Val = CI->getZExtValue();
689  if (Val < 1 || Val > 3)
690  break;
691 
692  uint64_t NumBytes = 0;
693  if (Ty && Ty->isSized()) {
694  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
695  NumBytes = NumBits / 8;
696  if (!isPowerOf2_64(NumBits))
697  NumBytes = 0;
698  }
699 
700  if (NumBytes != (1ULL << Val))
701  break;
702 
703  Addr.setShift(Val);
704  Addr.setExtendType(AArch64_AM::LSL);
705 
706  const Value *Src = U->getOperand(0);
707  if (const auto *I = dyn_cast<Instruction>(Src)) {
708  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
709  // Fold the zext or sext when it won't become a noop.
710  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
711  if (!isIntExtFree(ZE) &&
712  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
713  Addr.setExtendType(AArch64_AM::UXTW);
714  Src = ZE->getOperand(0);
715  }
716  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
717  if (!isIntExtFree(SE) &&
718  SE->getOperand(0)->getType()->isIntegerTy(32)) {
719  Addr.setExtendType(AArch64_AM::SXTW);
720  Src = SE->getOperand(0);
721  }
722  }
723  }
724  }
725 
726  if (const auto *AI = dyn_cast<BinaryOperator>(Src))
727  if (AI->getOpcode() == Instruction::And) {
728  const Value *LHS = AI->getOperand(0);
729  const Value *RHS = AI->getOperand(1);
730 
731  if (const auto *C = dyn_cast<ConstantInt>(LHS))
732  if (C->getValue() == 0xffffffff)
733  std::swap(LHS, RHS);
734 
735  if (const auto *C = dyn_cast<ConstantInt>(RHS))
736  if (C->getValue() == 0xffffffff) {
737  Addr.setExtendType(AArch64_AM::UXTW);
738  unsigned Reg = getRegForValue(LHS);
739  if (!Reg)
740  return false;
741  bool RegIsKill = hasTrivialKill(LHS);
742  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
743  AArch64::sub_32);
744  Addr.setOffsetReg(Reg);
745  return true;
746  }
747  }
748 
749  unsigned Reg = getRegForValue(Src);
750  if (!Reg)
751  return false;
752  Addr.setOffsetReg(Reg);
753  return true;
754  }
755  case Instruction::Mul: {
756  if (Addr.getOffsetReg())
757  break;
758 
759  if (!isMulPowOf2(U))
760  break;
761 
762  const Value *LHS = U->getOperand(0);
763  const Value *RHS = U->getOperand(1);
764 
765  // Canonicalize power-of-2 value to the RHS.
766  if (const auto *C = dyn_cast<ConstantInt>(LHS))
767  if (C->getValue().isPowerOf2())
768  std::swap(LHS, RHS);
769 
770  assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
771  const auto *C = cast<ConstantInt>(RHS);
772  unsigned Val = C->getValue().logBase2();
773  if (Val < 1 || Val > 3)
774  break;
775 
776  uint64_t NumBytes = 0;
777  if (Ty && Ty->isSized()) {
778  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
779  NumBytes = NumBits / 8;
780  if (!isPowerOf2_64(NumBits))
781  NumBytes = 0;
782  }
783 
784  if (NumBytes != (1ULL << Val))
785  break;
786 
787  Addr.setShift(Val);
788  Addr.setExtendType(AArch64_AM::LSL);
789 
790  const Value *Src = LHS;
791  if (const auto *I = dyn_cast<Instruction>(Src)) {
792  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
793  // Fold the zext or sext when it won't become a noop.
794  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
795  if (!isIntExtFree(ZE) &&
796  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
797  Addr.setExtendType(AArch64_AM::UXTW);
798  Src = ZE->getOperand(0);
799  }
800  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
801  if (!isIntExtFree(SE) &&
802  SE->getOperand(0)->getType()->isIntegerTy(32)) {
803  Addr.setExtendType(AArch64_AM::SXTW);
804  Src = SE->getOperand(0);
805  }
806  }
807  }
808  }
809 
810  unsigned Reg = getRegForValue(Src);
811  if (!Reg)
812  return false;
813  Addr.setOffsetReg(Reg);
814  return true;
815  }
816  case Instruction::And: {
817  if (Addr.getOffsetReg())
818  break;
819 
820  if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
821  break;
822 
823  const Value *LHS = U->getOperand(0);
824  const Value *RHS = U->getOperand(1);
825 
826  if (const auto *C = dyn_cast<ConstantInt>(LHS))
827  if (C->getValue() == 0xffffffff)
828  std::swap(LHS, RHS);
829 
830  if (const auto *C = dyn_cast<ConstantInt>(RHS))
831  if (C->getValue() == 0xffffffff) {
832  Addr.setShift(0);
833  Addr.setExtendType(AArch64_AM::LSL);
834  Addr.setExtendType(AArch64_AM::UXTW);
835 
836  unsigned Reg = getRegForValue(LHS);
837  if (!Reg)
838  return false;
839  bool RegIsKill = hasTrivialKill(LHS);
840  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
841  AArch64::sub_32);
842  Addr.setOffsetReg(Reg);
843  return true;
844  }
845  break;
846  }
847  case Instruction::SExt:
848  case Instruction::ZExt: {
849  if (!Addr.getReg() || Addr.getOffsetReg())
850  break;
851 
852  const Value *Src = nullptr;
853  // Fold the zext or sext when it won't become a noop.
854  if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
855  if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
856  Addr.setExtendType(AArch64_AM::UXTW);
857  Src = ZE->getOperand(0);
858  }
859  } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
860  if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
861  Addr.setExtendType(AArch64_AM::SXTW);
862  Src = SE->getOperand(0);
863  }
864  }
865 
866  if (!Src)
867  break;
868 
869  Addr.setShift(0);
870  unsigned Reg = getRegForValue(Src);
871  if (!Reg)
872  return false;
873  Addr.setOffsetReg(Reg);
874  return true;
875  }
876  } // end switch
877 
878  if (Addr.isRegBase() && !Addr.getReg()) {
879  unsigned Reg = getRegForValue(Obj);
880  if (!Reg)
881  return false;
882  Addr.setReg(Reg);
883  return true;
884  }
885 
886  if (!Addr.getOffsetReg()) {
887  unsigned Reg = getRegForValue(Obj);
888  if (!Reg)
889  return false;
890  Addr.setOffsetReg(Reg);
891  return true;
892  }
893 
894  return false;
895 }
896 
897 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
898  const User *U = nullptr;
899  unsigned Opcode = Instruction::UserOp1;
900  bool InMBB = true;
901 
902  if (const auto *I = dyn_cast<Instruction>(V)) {
903  Opcode = I->getOpcode();
904  U = I;
905  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
906  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
907  Opcode = C->getOpcode();
908  U = C;
909  }
910 
911  switch (Opcode) {
912  default: break;
913  case Instruction::BitCast:
914  // Look past bitcasts if its operand is in the same BB.
915  if (InMBB)
916  return computeCallAddress(U->getOperand(0), Addr);
917  break;
918  case Instruction::IntToPtr:
919  // Look past no-op inttoptrs if its operand is in the same BB.
920  if (InMBB &&
921  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
922  TLI.getPointerTy(DL))
923  return computeCallAddress(U->getOperand(0), Addr);
924  break;
925  case Instruction::PtrToInt:
926  // Look past no-op ptrtoints if its operand is in the same BB.
927  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
928  return computeCallAddress(U->getOperand(0), Addr);
929  break;
930  }
931 
932  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
933  Addr.setGlobalValue(GV);
934  return true;
935  }
936 
937  // If all else fails, try to materialize the value in a register.
938  if (!Addr.getGlobalValue()) {
939  Addr.setReg(getRegForValue(V));
940  return Addr.getReg() != 0;
941  }
942 
943  return false;
944 }
945 
946 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
947  EVT evt = TLI.getValueType(DL, Ty, true);
948 
949  // Only handle simple types.
950  if (evt == MVT::Other || !evt.isSimple())
951  return false;
952  VT = evt.getSimpleVT();
953 
954  // This is a legal type, but it's not something we handle in fast-isel.
955  if (VT == MVT::f128)
956  return false;
957 
958  // Handle all other legal types, i.e. a register that will directly hold this
959  // value.
960  return TLI.isTypeLegal(VT);
961 }
962 
963 /// Determine if the value type is supported by FastISel.
964 ///
965 /// FastISel for AArch64 can handle more value types than are legal. This adds
966 /// simple value type such as i1, i8, and i16.
967 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
968  if (Ty->isVectorTy() && !IsVectorAllowed)
969  return false;
970 
971  if (isTypeLegal(Ty, VT))
972  return true;
973 
974  // If this is a type than can be sign or zero-extended to a basic operation
975  // go ahead and accept it now.
976  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
977  return true;
978 
979  return false;
980 }
981 
982 bool AArch64FastISel::isValueAvailable(const Value *V) const {
983  if (!isa<Instruction>(V))
984  return true;
985 
986  const auto *I = cast<Instruction>(V);
987  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
988 }
989 
990 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
991  unsigned ScaleFactor = getImplicitScaleFactor(VT);
992  if (!ScaleFactor)
993  return false;
994 
995  bool ImmediateOffsetNeedsLowering = false;
996  bool RegisterOffsetNeedsLowering = false;
997  int64_t Offset = Addr.getOffset();
998  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
999  ImmediateOffsetNeedsLowering = true;
1000  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1001  !isUInt<12>(Offset / ScaleFactor))
1002  ImmediateOffsetNeedsLowering = true;
1003 
1004  // Cannot encode an offset register and an immediate offset in the same
1005  // instruction. Fold the immediate offset into the load/store instruction and
1006  // emit an additional add to take care of the offset register.
1007  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1008  RegisterOffsetNeedsLowering = true;
1009 
1010  // Cannot encode zero register as base.
1011  if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1012  RegisterOffsetNeedsLowering = true;
1013 
1014  // If this is a stack pointer and the offset needs to be simplified then put
1015  // the alloca address into a register, set the base type back to register and
1016  // continue. This should almost never happen.
1017  if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1018  {
1019  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1020  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1021  ResultReg)
1022  .addFrameIndex(Addr.getFI())
1023  .addImm(0)
1024  .addImm(0);
1025  Addr.setKind(Address::RegBase);
1026  Addr.setReg(ResultReg);
1027  }
1028 
1029  if (RegisterOffsetNeedsLowering) {
1030  unsigned ResultReg = 0;
1031  if (Addr.getReg()) {
1032  if (Addr.getExtendType() == AArch64_AM::SXTW ||
1033  Addr.getExtendType() == AArch64_AM::UXTW )
1034  ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1035  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1036  /*TODO:IsKill=*/false, Addr.getExtendType(),
1037  Addr.getShift());
1038  else
1039  ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1040  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1041  /*TODO:IsKill=*/false, AArch64_AM::LSL,
1042  Addr.getShift());
1043  } else {
1044  if (Addr.getExtendType() == AArch64_AM::UXTW)
1045  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1046  /*Op0IsKill=*/false, Addr.getShift(),
1047  /*IsZExt=*/true);
1048  else if (Addr.getExtendType() == AArch64_AM::SXTW)
1049  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1050  /*Op0IsKill=*/false, Addr.getShift(),
1051  /*IsZExt=*/false);
1052  else
1053  ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1054  /*Op0IsKill=*/false, Addr.getShift());
1055  }
1056  if (!ResultReg)
1057  return false;
1058 
1059  Addr.setReg(ResultReg);
1060  Addr.setOffsetReg(0);
1061  Addr.setShift(0);
1062  Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1063  }
1064 
1065  // Since the offset is too large for the load/store instruction get the
1066  // reg+offset into a register.
1067  if (ImmediateOffsetNeedsLowering) {
1068  unsigned ResultReg;
1069  if (Addr.getReg())
1070  // Try to fold the immediate into the add instruction.
1071  ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1072  else
1073  ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1074 
1075  if (!ResultReg)
1076  return false;
1077  Addr.setReg(ResultReg);
1078  Addr.setOffset(0);
1079  }
1080  return true;
1081 }
1082 
1083 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1084  const MachineInstrBuilder &MIB,
1086  unsigned ScaleFactor,
1087  MachineMemOperand *MMO) {
1088  int64_t Offset = Addr.getOffset() / ScaleFactor;
1089  // Frame base works a bit differently. Handle it separately.
1090  if (Addr.isFIBase()) {
1091  int FI = Addr.getFI();
1092  // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1093  // and alignment should be based on the VT.
1094  MMO = FuncInfo.MF->getMachineMemOperand(
1095  MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1096  MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1097  // Now add the rest of the operands.
1098  MIB.addFrameIndex(FI).addImm(Offset);
1099  } else {
1100  assert(Addr.isRegBase() && "Unexpected address kind.");
1101  const MCInstrDesc &II = MIB->getDesc();
1102  unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1103  Addr.setReg(
1104  constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1105  Addr.setOffsetReg(
1106  constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1107  if (Addr.getOffsetReg()) {
1108  assert(Addr.getOffset() == 0 && "Unexpected offset");
1109  bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1110  Addr.getExtendType() == AArch64_AM::SXTX;
1111  MIB.addReg(Addr.getReg());
1112  MIB.addReg(Addr.getOffsetReg());
1113  MIB.addImm(IsSigned);
1114  MIB.addImm(Addr.getShift() != 0);
1115  } else
1116  MIB.addReg(Addr.getReg()).addImm(Offset);
1117  }
1118 
1119  if (MMO)
1120  MIB.addMemOperand(MMO);
1121 }
1122 
1123 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1124  const Value *RHS, bool SetFlags,
1125  bool WantResult, bool IsZExt) {
1127  bool NeedExtend = false;
1128  switch (RetVT.SimpleTy) {
1129  default:
1130  return 0;
1131  case MVT::i1:
1132  NeedExtend = true;
1133  break;
1134  case MVT::i8:
1135  NeedExtend = true;
1136  ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1137  break;
1138  case MVT::i16:
1139  NeedExtend = true;
1140  ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1141  break;
1142  case MVT::i32: // fall-through
1143  case MVT::i64:
1144  break;
1145  }
1146  MVT SrcVT = RetVT;
1147  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1148 
1149  // Canonicalize immediates to the RHS first.
1150  if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1151  std::swap(LHS, RHS);
1152 
1153  // Canonicalize mul by power of 2 to the RHS.
1154  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1155  if (isMulPowOf2(LHS))
1156  std::swap(LHS, RHS);
1157 
1158  // Canonicalize shift immediate to the RHS.
1159  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1160  if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1161  if (isa<ConstantInt>(SI->getOperand(1)))
1162  if (SI->getOpcode() == Instruction::Shl ||
1163  SI->getOpcode() == Instruction::LShr ||
1164  SI->getOpcode() == Instruction::AShr )
1165  std::swap(LHS, RHS);
1166 
1167  unsigned LHSReg = getRegForValue(LHS);
1168  if (!LHSReg)
1169  return 0;
1170  bool LHSIsKill = hasTrivialKill(LHS);
1171 
1172  if (NeedExtend)
1173  LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1174 
1175  unsigned ResultReg = 0;
1176  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1177  uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1178  if (C->isNegative())
1179  ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1180  SetFlags, WantResult);
1181  else
1182  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1183  WantResult);
1184  } else if (const auto *C = dyn_cast<Constant>(RHS))
1185  if (C->isNullValue())
1186  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1187  WantResult);
1188 
1189  if (ResultReg)
1190  return ResultReg;
1191 
1192  // Only extend the RHS within the instruction if there is a valid extend type.
1193  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1194  isValueAvailable(RHS)) {
1195  if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1196  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1197  if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1198  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1199  if (!RHSReg)
1200  return 0;
1201  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1202  return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1203  RHSIsKill, ExtendType, C->getZExtValue(),
1204  SetFlags, WantResult);
1205  }
1206  unsigned RHSReg = getRegForValue(RHS);
1207  if (!RHSReg)
1208  return 0;
1209  bool RHSIsKill = hasTrivialKill(RHS);
1210  return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1211  ExtendType, 0, SetFlags, WantResult);
1212  }
1213 
1214  // Check if the mul can be folded into the instruction.
1215  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1216  if (isMulPowOf2(RHS)) {
1217  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1218  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1219 
1220  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1221  if (C->getValue().isPowerOf2())
1222  std::swap(MulLHS, MulRHS);
1223 
1224  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1225  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1226  unsigned RHSReg = getRegForValue(MulLHS);
1227  if (!RHSReg)
1228  return 0;
1229  bool RHSIsKill = hasTrivialKill(MulLHS);
1230  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1231  RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1232  WantResult);
1233  if (ResultReg)
1234  return ResultReg;
1235  }
1236  }
1237 
1238  // Check if the shift can be folded into the instruction.
1239  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1240  if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1241  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1243  switch (SI->getOpcode()) {
1244  default: break;
1245  case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1246  case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1247  case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1248  }
1249  uint64_t ShiftVal = C->getZExtValue();
1250  if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1251  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1252  if (!RHSReg)
1253  return 0;
1254  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1255  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1256  RHSIsKill, ShiftType, ShiftVal, SetFlags,
1257  WantResult);
1258  if (ResultReg)
1259  return ResultReg;
1260  }
1261  }
1262  }
1263  }
1264 
1265  unsigned RHSReg = getRegForValue(RHS);
1266  if (!RHSReg)
1267  return 0;
1268  bool RHSIsKill = hasTrivialKill(RHS);
1269 
1270  if (NeedExtend)
1271  RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1272 
1273  return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1274  SetFlags, WantResult);
1275 }
1276 
1277 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1278  bool LHSIsKill, unsigned RHSReg,
1279  bool RHSIsKill, bool SetFlags,
1280  bool WantResult) {
1281  assert(LHSReg && RHSReg && "Invalid register number.");
1282 
1283  if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1284  RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1285  return 0;
1286 
1287  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1288  return 0;
1289 
1290  static const unsigned OpcTable[2][2][2] = {
1291  { { AArch64::SUBWrr, AArch64::SUBXrr },
1292  { AArch64::ADDWrr, AArch64::ADDXrr } },
1293  { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1294  { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1295  };
1296  bool Is64Bit = RetVT == MVT::i64;
1297  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1298  const TargetRegisterClass *RC =
1299  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1300  unsigned ResultReg;
1301  if (WantResult)
1302  ResultReg = createResultReg(RC);
1303  else
1304  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1305 
1306  const MCInstrDesc &II = TII.get(Opc);
1307  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1308  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1309  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1310  .addReg(LHSReg, getKillRegState(LHSIsKill))
1311  .addReg(RHSReg, getKillRegState(RHSIsKill));
1312  return ResultReg;
1313 }
1314 
1315 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1316  bool LHSIsKill, uint64_t Imm,
1317  bool SetFlags, bool WantResult) {
1318  assert(LHSReg && "Invalid register number.");
1319 
1320  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1321  return 0;
1322 
1323  unsigned ShiftImm;
1324  if (isUInt<12>(Imm))
1325  ShiftImm = 0;
1326  else if ((Imm & 0xfff000) == Imm) {
1327  ShiftImm = 12;
1328  Imm >>= 12;
1329  } else
1330  return 0;
1331 
1332  static const unsigned OpcTable[2][2][2] = {
1333  { { AArch64::SUBWri, AArch64::SUBXri },
1334  { AArch64::ADDWri, AArch64::ADDXri } },
1335  { { AArch64::SUBSWri, AArch64::SUBSXri },
1336  { AArch64::ADDSWri, AArch64::ADDSXri } }
1337  };
1338  bool Is64Bit = RetVT == MVT::i64;
1339  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1340  const TargetRegisterClass *RC;
1341  if (SetFlags)
1342  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1343  else
1344  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1345  unsigned ResultReg;
1346  if (WantResult)
1347  ResultReg = createResultReg(RC);
1348  else
1349  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1350 
1351  const MCInstrDesc &II = TII.get(Opc);
1352  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1353  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1354  .addReg(LHSReg, getKillRegState(LHSIsKill))
1355  .addImm(Imm)
1356  .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1357  return ResultReg;
1358 }
1359 
1360 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1361  bool LHSIsKill, unsigned RHSReg,
1362  bool RHSIsKill,
1363  AArch64_AM::ShiftExtendType ShiftType,
1364  uint64_t ShiftImm, bool SetFlags,
1365  bool WantResult) {
1366  assert(LHSReg && RHSReg && "Invalid register number.");
1367  assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1368  RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1369 
1370  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1371  return 0;
1372 
1373  // Don't deal with undefined shifts.
1374  if (ShiftImm >= RetVT.getSizeInBits())
1375  return 0;
1376 
1377  static const unsigned OpcTable[2][2][2] = {
1378  { { AArch64::SUBWrs, AArch64::SUBXrs },
1379  { AArch64::ADDWrs, AArch64::ADDXrs } },
1380  { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1381  { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1382  };
1383  bool Is64Bit = RetVT == MVT::i64;
1384  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1385  const TargetRegisterClass *RC =
1386  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1387  unsigned ResultReg;
1388  if (WantResult)
1389  ResultReg = createResultReg(RC);
1390  else
1391  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1392 
1393  const MCInstrDesc &II = TII.get(Opc);
1394  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1395  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1396  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1397  .addReg(LHSReg, getKillRegState(LHSIsKill))
1398  .addReg(RHSReg, getKillRegState(RHSIsKill))
1399  .addImm(getShifterImm(ShiftType, ShiftImm));
1400  return ResultReg;
1401 }
1402 
1403 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1404  bool LHSIsKill, unsigned RHSReg,
1405  bool RHSIsKill,
1407  uint64_t ShiftImm, bool SetFlags,
1408  bool WantResult) {
1409  assert(LHSReg && RHSReg && "Invalid register number.");
1410  assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1411  RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1412 
1413  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1414  return 0;
1415 
1416  if (ShiftImm >= 4)
1417  return 0;
1418 
1419  static const unsigned OpcTable[2][2][2] = {
1420  { { AArch64::SUBWrx, AArch64::SUBXrx },
1421  { AArch64::ADDWrx, AArch64::ADDXrx } },
1422  { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1423  { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1424  };
1425  bool Is64Bit = RetVT == MVT::i64;
1426  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1427  const TargetRegisterClass *RC = nullptr;
1428  if (SetFlags)
1429  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1430  else
1431  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1432  unsigned ResultReg;
1433  if (WantResult)
1434  ResultReg = createResultReg(RC);
1435  else
1436  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1437 
1438  const MCInstrDesc &II = TII.get(Opc);
1439  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1440  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1441  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1442  .addReg(LHSReg, getKillRegState(LHSIsKill))
1443  .addReg(RHSReg, getKillRegState(RHSIsKill))
1444  .addImm(getArithExtendImm(ExtType, ShiftImm));
1445  return ResultReg;
1446 }
1447 
1448 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1449  Type *Ty = LHS->getType();
1450  EVT EVT = TLI.getValueType(DL, Ty, true);
1451  if (!EVT.isSimple())
1452  return false;
1453  MVT VT = EVT.getSimpleVT();
1454 
1455  switch (VT.SimpleTy) {
1456  default:
1457  return false;
1458  case MVT::i1:
1459  case MVT::i8:
1460  case MVT::i16:
1461  case MVT::i32:
1462  case MVT::i64:
1463  return emitICmp(VT, LHS, RHS, IsZExt);
1464  case MVT::f32:
1465  case MVT::f64:
1466  return emitFCmp(VT, LHS, RHS);
1467  }
1468 }
1469 
1470 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1471  bool IsZExt) {
1472  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1473  IsZExt) != 0;
1474 }
1475 
1476 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1477  uint64_t Imm) {
1478  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1479  /*SetFlags=*/true, /*WantResult=*/false) != 0;
1480 }
1481 
1482 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1483  if (RetVT != MVT::f32 && RetVT != MVT::f64)
1484  return false;
1485 
1486  // Check to see if the 2nd operand is a constant that we can encode directly
1487  // in the compare.
1488  bool UseImm = false;
1489  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1490  if (CFP->isZero() && !CFP->isNegative())
1491  UseImm = true;
1492 
1493  unsigned LHSReg = getRegForValue(LHS);
1494  if (!LHSReg)
1495  return false;
1496  bool LHSIsKill = hasTrivialKill(LHS);
1497 
1498  if (UseImm) {
1499  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1500  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1501  .addReg(LHSReg, getKillRegState(LHSIsKill));
1502  return true;
1503  }
1504 
1505  unsigned RHSReg = getRegForValue(RHS);
1506  if (!RHSReg)
1507  return false;
1508  bool RHSIsKill = hasTrivialKill(RHS);
1509 
1510  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1511  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1512  .addReg(LHSReg, getKillRegState(LHSIsKill))
1513  .addReg(RHSReg, getKillRegState(RHSIsKill));
1514  return true;
1515 }
1516 
1517 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1518  bool SetFlags, bool WantResult, bool IsZExt) {
1519  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1520  IsZExt);
1521 }
1522 
1523 /// This method is a wrapper to simplify add emission.
1524 ///
1525 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1526 /// that fails, then try to materialize the immediate into a register and use
1527 /// emitAddSub_rr instead.
1528 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1529  int64_t Imm) {
1530  unsigned ResultReg;
1531  if (Imm < 0)
1532  ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1533  else
1534  ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1535 
1536  if (ResultReg)
1537  return ResultReg;
1538 
1539  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1540  if (!CReg)
1541  return 0;
1542 
1543  ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1544  return ResultReg;
1545 }
1546 
1547 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1548  bool SetFlags, bool WantResult, bool IsZExt) {
1549  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1550  IsZExt);
1551 }
1552 
1553 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1554  bool LHSIsKill, unsigned RHSReg,
1555  bool RHSIsKill, bool WantResult) {
1556  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1557  RHSIsKill, /*SetFlags=*/true, WantResult);
1558 }
1559 
1560 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1561  bool LHSIsKill, unsigned RHSReg,
1562  bool RHSIsKill,
1563  AArch64_AM::ShiftExtendType ShiftType,
1564  uint64_t ShiftImm, bool WantResult) {
1565  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1566  RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1567  WantResult);
1568 }
1569 
1570 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1571  const Value *LHS, const Value *RHS) {
1572  // Canonicalize immediates to the RHS first.
1573  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1574  std::swap(LHS, RHS);
1575 
1576  // Canonicalize mul by power-of-2 to the RHS.
1577  if (LHS->hasOneUse() && isValueAvailable(LHS))
1578  if (isMulPowOf2(LHS))
1579  std::swap(LHS, RHS);
1580 
1581  // Canonicalize shift immediate to the RHS.
1582  if (LHS->hasOneUse() && isValueAvailable(LHS))
1583  if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1584  if (isa<ConstantInt>(SI->getOperand(1)))
1585  std::swap(LHS, RHS);
1586 
1587  unsigned LHSReg = getRegForValue(LHS);
1588  if (!LHSReg)
1589  return 0;
1590  bool LHSIsKill = hasTrivialKill(LHS);
1591 
1592  unsigned ResultReg = 0;
1593  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1594  uint64_t Imm = C->getZExtValue();
1595  ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1596  }
1597  if (ResultReg)
1598  return ResultReg;
1599 
1600  // Check if the mul can be folded into the instruction.
1601  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1602  if (isMulPowOf2(RHS)) {
1603  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1604  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1605 
1606  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1607  if (C->getValue().isPowerOf2())
1608  std::swap(MulLHS, MulRHS);
1609 
1610  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1611  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1612 
1613  unsigned RHSReg = getRegForValue(MulLHS);
1614  if (!RHSReg)
1615  return 0;
1616  bool RHSIsKill = hasTrivialKill(MulLHS);
1617  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1618  RHSIsKill, ShiftVal);
1619  if (ResultReg)
1620  return ResultReg;
1621  }
1622  }
1623 
1624  // Check if the shift can be folded into the instruction.
1625  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1626  if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1627  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1628  uint64_t ShiftVal = C->getZExtValue();
1629  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1630  if (!RHSReg)
1631  return 0;
1632  bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1633  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1634  RHSIsKill, ShiftVal);
1635  if (ResultReg)
1636  return ResultReg;
1637  }
1638  }
1639 
1640  unsigned RHSReg = getRegForValue(RHS);
1641  if (!RHSReg)
1642  return 0;
1643  bool RHSIsKill = hasTrivialKill(RHS);
1644 
1645  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1646  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1647  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1648  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1649  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1650  }
1651  return ResultReg;
1652 }
1653 
1654 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1655  unsigned LHSReg, bool LHSIsKill,
1656  uint64_t Imm) {
1657  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1658  "ISD nodes are not consecutive!");
1659  static const unsigned OpcTable[3][2] = {
1660  { AArch64::ANDWri, AArch64::ANDXri },
1661  { AArch64::ORRWri, AArch64::ORRXri },
1662  { AArch64::EORWri, AArch64::EORXri }
1663  };
1664  const TargetRegisterClass *RC;
1665  unsigned Opc;
1666  unsigned RegSize;
1667  switch (RetVT.SimpleTy) {
1668  default:
1669  return 0;
1670  case MVT::i1:
1671  case MVT::i8:
1672  case MVT::i16:
1673  case MVT::i32: {
1674  unsigned Idx = ISDOpc - ISD::AND;
1675  Opc = OpcTable[Idx][0];
1676  RC = &AArch64::GPR32spRegClass;
1677  RegSize = 32;
1678  break;
1679  }
1680  case MVT::i64:
1681  Opc = OpcTable[ISDOpc - ISD::AND][1];
1682  RC = &AArch64::GPR64spRegClass;
1683  RegSize = 64;
1684  break;
1685  }
1686 
1687  if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1688  return 0;
1689 
1690  unsigned ResultReg =
1691  fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1692  AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1693  if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1694  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1695  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1696  }
1697  return ResultReg;
1698 }
1699 
1700 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1701  unsigned LHSReg, bool LHSIsKill,
1702  unsigned RHSReg, bool RHSIsKill,
1703  uint64_t ShiftImm) {
1704  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1705  "ISD nodes are not consecutive!");
1706  static const unsigned OpcTable[3][2] = {
1707  { AArch64::ANDWrs, AArch64::ANDXrs },
1708  { AArch64::ORRWrs, AArch64::ORRXrs },
1709  { AArch64::EORWrs, AArch64::EORXrs }
1710  };
1711 
1712  // Don't deal with undefined shifts.
1713  if (ShiftImm >= RetVT.getSizeInBits())
1714  return 0;
1715 
1716  const TargetRegisterClass *RC;
1717  unsigned Opc;
1718  switch (RetVT.SimpleTy) {
1719  default:
1720  return 0;
1721  case MVT::i1:
1722  case MVT::i8:
1723  case MVT::i16:
1724  case MVT::i32:
1725  Opc = OpcTable[ISDOpc - ISD::AND][0];
1726  RC = &AArch64::GPR32RegClass;
1727  break;
1728  case MVT::i64:
1729  Opc = OpcTable[ISDOpc - ISD::AND][1];
1730  RC = &AArch64::GPR64RegClass;
1731  break;
1732  }
1733  unsigned ResultReg =
1734  fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1736  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1737  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1738  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1739  }
1740  return ResultReg;
1741 }
1742 
1743 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1744  uint64_t Imm) {
1745  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1746 }
1747 
1748 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1749  bool WantZExt, MachineMemOperand *MMO) {
1750  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1751  return 0;
1752 
1753  // Simplify this down to something we can handle.
1754  if (!simplifyAddress(Addr, VT))
1755  return 0;
1756 
1757  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1758  if (!ScaleFactor)
1759  llvm_unreachable("Unexpected value type.");
1760 
1761  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1762  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1763  bool UseScaled = true;
1764  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1765  UseScaled = false;
1766  ScaleFactor = 1;
1767  }
1768 
1769  static const unsigned GPOpcTable[2][8][4] = {
1770  // Sign-extend.
1771  { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1772  AArch64::LDURXi },
1773  { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1774  AArch64::LDURXi },
1775  { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1776  AArch64::LDRXui },
1777  { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1778  AArch64::LDRXui },
1779  { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1780  AArch64::LDRXroX },
1781  { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1782  AArch64::LDRXroX },
1783  { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1784  AArch64::LDRXroW },
1785  { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1786  AArch64::LDRXroW }
1787  },
1788  // Zero-extend.
1789  { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1790  AArch64::LDURXi },
1791  { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1792  AArch64::LDURXi },
1793  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1794  AArch64::LDRXui },
1795  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1796  AArch64::LDRXui },
1797  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1798  AArch64::LDRXroX },
1799  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1800  AArch64::LDRXroX },
1801  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1802  AArch64::LDRXroW },
1803  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1804  AArch64::LDRXroW }
1805  }
1806  };
1807 
1808  static const unsigned FPOpcTable[4][2] = {
1809  { AArch64::LDURSi, AArch64::LDURDi },
1810  { AArch64::LDRSui, AArch64::LDRDui },
1811  { AArch64::LDRSroX, AArch64::LDRDroX },
1812  { AArch64::LDRSroW, AArch64::LDRDroW }
1813  };
1814 
1815  unsigned Opc;
1816  const TargetRegisterClass *RC;
1817  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1818  Addr.getOffsetReg();
1819  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1820  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1821  Addr.getExtendType() == AArch64_AM::SXTW)
1822  Idx++;
1823 
1824  bool IsRet64Bit = RetVT == MVT::i64;
1825  switch (VT.SimpleTy) {
1826  default:
1827  llvm_unreachable("Unexpected value type.");
1828  case MVT::i1: // Intentional fall-through.
1829  case MVT::i8:
1830  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1831  RC = (IsRet64Bit && !WantZExt) ?
1832  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1833  break;
1834  case MVT::i16:
1835  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1836  RC = (IsRet64Bit && !WantZExt) ?
1837  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1838  break;
1839  case MVT::i32:
1840  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1841  RC = (IsRet64Bit && !WantZExt) ?
1842  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1843  break;
1844  case MVT::i64:
1845  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1846  RC = &AArch64::GPR64RegClass;
1847  break;
1848  case MVT::f32:
1849  Opc = FPOpcTable[Idx][0];
1850  RC = &AArch64::FPR32RegClass;
1851  break;
1852  case MVT::f64:
1853  Opc = FPOpcTable[Idx][1];
1854  RC = &AArch64::FPR64RegClass;
1855  break;
1856  }
1857 
1858  // Create the base instruction, then add the operands.
1859  unsigned ResultReg = createResultReg(RC);
1860  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1861  TII.get(Opc), ResultReg);
1862  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1863 
1864  // Loading an i1 requires special handling.
1865  if (VT == MVT::i1) {
1866  unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1867  assert(ANDReg && "Unexpected AND instruction emission failure.");
1868  ResultReg = ANDReg;
1869  }
1870 
1871  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1872  // the 32bit reg to a 64bit reg.
1873  if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1874  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1875  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1876  TII.get(AArch64::SUBREG_TO_REG), Reg64)
1877  .addImm(0)
1878  .addReg(ResultReg, getKillRegState(true))
1879  .addImm(AArch64::sub_32);
1880  ResultReg = Reg64;
1881  }
1882  return ResultReg;
1883 }
1884 
1885 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1886  MVT VT;
1887  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1888  return false;
1889 
1890  if (VT.isVector())
1891  return selectOperator(I, I->getOpcode());
1892 
1893  unsigned ResultReg;
1894  switch (I->getOpcode()) {
1895  default:
1896  llvm_unreachable("Unexpected instruction.");
1897  case Instruction::Add:
1898  ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1899  break;
1900  case Instruction::Sub:
1901  ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1902  break;
1903  }
1904  if (!ResultReg)
1905  return false;
1906 
1907  updateValueMap(I, ResultReg);
1908  return true;
1909 }
1910 
1911 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1912  MVT VT;
1913  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1914  return false;
1915 
1916  if (VT.isVector())
1917  return selectOperator(I, I->getOpcode());
1918 
1919  unsigned ResultReg;
1920  switch (I->getOpcode()) {
1921  default:
1922  llvm_unreachable("Unexpected instruction.");
1923  case Instruction::And:
1924  ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1925  break;
1926  case Instruction::Or:
1927  ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1928  break;
1929  case Instruction::Xor:
1930  ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1931  break;
1932  }
1933  if (!ResultReg)
1934  return false;
1935 
1936  updateValueMap(I, ResultReg);
1937  return true;
1938 }
1939 
1940 bool AArch64FastISel::selectLoad(const Instruction *I) {
1941  MVT VT;
1942  // Verify we have a legal type before going any further. Currently, we handle
1943  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1944  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1945  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1946  cast<LoadInst>(I)->isAtomic())
1947  return false;
1948 
1949  const Value *SV = I->getOperand(0);
1950  if (TLI.supportSwiftError()) {
1951  // Swifterror values can come from either a function parameter with
1952  // swifterror attribute or an alloca with swifterror attribute.
1953  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1954  if (Arg->hasSwiftErrorAttr())
1955  return false;
1956  }
1957 
1958  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1959  if (Alloca->isSwiftError())
1960  return false;
1961  }
1962  }
1963 
1964  // See if we can handle this address.
1965  Address Addr;
1966  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1967  return false;
1968 
1969  // Fold the following sign-/zero-extend into the load instruction.
1970  bool WantZExt = true;
1971  MVT RetVT = VT;
1972  const Value *IntExtVal = nullptr;
1973  if (I->hasOneUse()) {
1974  if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1975  if (isTypeSupported(ZE->getType(), RetVT))
1976  IntExtVal = ZE;
1977  else
1978  RetVT = VT;
1979  } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1980  if (isTypeSupported(SE->getType(), RetVT))
1981  IntExtVal = SE;
1982  else
1983  RetVT = VT;
1984  WantZExt = false;
1985  }
1986  }
1987 
1988  unsigned ResultReg =
1989  emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1990  if (!ResultReg)
1991  return false;
1992 
1993  // There are a few different cases we have to handle, because the load or the
1994  // sign-/zero-extend might not be selected by FastISel if we fall-back to
1995  // SelectionDAG. There is also an ordering issue when both instructions are in
1996  // different basic blocks.
1997  // 1.) The load instruction is selected by FastISel, but the integer extend
1998  // not. This usually happens when the integer extend is in a different
1999  // basic block and SelectionDAG took over for that basic block.
2000  // 2.) The load instruction is selected before the integer extend. This only
2001  // happens when the integer extend is in a different basic block.
2002  // 3.) The load instruction is selected by SelectionDAG and the integer extend
2003  // by FastISel. This happens if there are instructions between the load
2004  // and the integer extend that couldn't be selected by FastISel.
2005  if (IntExtVal) {
2006  // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2007  // could select it. Emit a copy to subreg if necessary. FastISel will remove
2008  // it when it selects the integer extend.
2009  unsigned Reg = lookUpRegForValue(IntExtVal);
2010  auto *MI = MRI.getUniqueVRegDef(Reg);
2011  if (!MI) {
2012  if (RetVT == MVT::i64 && VT <= MVT::i32) {
2013  if (WantZExt) {
2014  // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2015  MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2016  ResultReg = std::prev(I)->getOperand(0).getReg();
2017  removeDeadCode(I, std::next(I));
2018  } else
2019  ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2020  /*IsKill=*/true,
2021  AArch64::sub_32);
2022  }
2023  updateValueMap(I, ResultReg);
2024  return true;
2025  }
2026 
2027  // The integer extend has already been emitted - delete all the instructions
2028  // that have been emitted by the integer extend lowering code and use the
2029  // result from the load instruction directly.
2030  while (MI) {
2031  Reg = 0;
2032  for (auto &Opnd : MI->uses()) {
2033  if (Opnd.isReg()) {
2034  Reg = Opnd.getReg();
2035  break;
2036  }
2037  }
2039  removeDeadCode(I, std::next(I));
2040  MI = nullptr;
2041  if (Reg)
2042  MI = MRI.getUniqueVRegDef(Reg);
2043  }
2044  updateValueMap(IntExtVal, ResultReg);
2045  return true;
2046  }
2047 
2048  updateValueMap(I, ResultReg);
2049  return true;
2050 }
2051 
2052 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2053  unsigned AddrReg,
2054  MachineMemOperand *MMO) {
2055  unsigned Opc;
2056  switch (VT.SimpleTy) {
2057  default: return false;
2058  case MVT::i8: Opc = AArch64::STLRB; break;
2059  case MVT::i16: Opc = AArch64::STLRH; break;
2060  case MVT::i32: Opc = AArch64::STLRW; break;
2061  case MVT::i64: Opc = AArch64::STLRX; break;
2062  }
2063 
2064  const MCInstrDesc &II = TII.get(Opc);
2065  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2066  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2067  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2068  .addReg(SrcReg)
2069  .addReg(AddrReg)
2070  .addMemOperand(MMO);
2071  return true;
2072 }
2073 
2074 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2075  MachineMemOperand *MMO) {
2076  if (!TLI.allowsMisalignedMemoryAccesses(VT))
2077  return false;
2078 
2079  // Simplify this down to something we can handle.
2080  if (!simplifyAddress(Addr, VT))
2081  return false;
2082 
2083  unsigned ScaleFactor = getImplicitScaleFactor(VT);
2084  if (!ScaleFactor)
2085  llvm_unreachable("Unexpected value type.");
2086 
2087  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2088  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2089  bool UseScaled = true;
2090  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2091  UseScaled = false;
2092  ScaleFactor = 1;
2093  }
2094 
2095  static const unsigned OpcTable[4][6] = {
2096  { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2097  AArch64::STURSi, AArch64::STURDi },
2098  { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2099  AArch64::STRSui, AArch64::STRDui },
2100  { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2101  AArch64::STRSroX, AArch64::STRDroX },
2102  { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2103  AArch64::STRSroW, AArch64::STRDroW }
2104  };
2105 
2106  unsigned Opc;
2107  bool VTIsi1 = false;
2108  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2109  Addr.getOffsetReg();
2110  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2111  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2112  Addr.getExtendType() == AArch64_AM::SXTW)
2113  Idx++;
2114 
2115  switch (VT.SimpleTy) {
2116  default: llvm_unreachable("Unexpected value type.");
2117  case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH;
2118  case MVT::i8: Opc = OpcTable[Idx][0]; break;
2119  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2120  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2121  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2122  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2123  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2124  }
2125 
2126  // Storing an i1 requires special handling.
2127  if (VTIsi1 && SrcReg != AArch64::WZR) {
2128  unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2129  assert(ANDReg && "Unexpected AND instruction emission failure.");
2130  SrcReg = ANDReg;
2131  }
2132  // Create the base instruction, then add the operands.
2133  const MCInstrDesc &II = TII.get(Opc);
2134  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2135  MachineInstrBuilder MIB =
2136  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2137  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2138 
2139  return true;
2140 }
2141 
2142 bool AArch64FastISel::selectStore(const Instruction *I) {
2143  MVT VT;
2144  const Value *Op0 = I->getOperand(0);
2145  // Verify we have a legal type before going any further. Currently, we handle
2146  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2147  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2148  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2149  return false;
2150 
2151  const Value *PtrV = I->getOperand(1);
2152  if (TLI.supportSwiftError()) {
2153  // Swifterror values can come from either a function parameter with
2154  // swifterror attribute or an alloca with swifterror attribute.
2155  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2156  if (Arg->hasSwiftErrorAttr())
2157  return false;
2158  }
2159 
2160  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2161  if (Alloca->isSwiftError())
2162  return false;
2163  }
2164  }
2165 
2166  // Get the value to be stored into a register. Use the zero register directly
2167  // when possible to avoid an unnecessary copy and a wasted register.
2168  unsigned SrcReg = 0;
2169  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2170  if (CI->isZero())
2171  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2172  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2173  if (CF->isZero() && !CF->isNegative()) {
2174  VT = MVT::getIntegerVT(VT.getSizeInBits());
2175  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2176  }
2177  }
2178 
2179  if (!SrcReg)
2180  SrcReg = getRegForValue(Op0);
2181 
2182  if (!SrcReg)
2183  return false;
2184 
2185  auto *SI = cast<StoreInst>(I);
2186 
2187  // Try to emit a STLR for seq_cst/release.
2188  if (SI->isAtomic()) {
2189  AtomicOrdering Ord = SI->getOrdering();
2190  // The non-atomic instructions are sufficient for relaxed stores.
2191  if (isReleaseOrStronger(Ord)) {
2192  // The STLR addressing mode only supports a base reg; pass that directly.
2193  unsigned AddrReg = getRegForValue(PtrV);
2194  return emitStoreRelease(VT, SrcReg, AddrReg,
2195  createMachineMemOperandFor(I));
2196  }
2197  }
2198 
2199  // See if we can handle this address.
2200  Address Addr;
2201  if (!computeAddress(PtrV, Addr, Op0->getType()))
2202  return false;
2203 
2204  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2205  return false;
2206  return true;
2207 }
2208 
2210  switch (Pred) {
2211  case CmpInst::FCMP_ONE:
2212  case CmpInst::FCMP_UEQ:
2213  default:
2214  // AL is our "false" for now. The other two need more compares.
2215  return AArch64CC::AL;
2216  case CmpInst::ICMP_EQ:
2217  case CmpInst::FCMP_OEQ:
2218  return AArch64CC::EQ;
2219  case CmpInst::ICMP_SGT:
2220  case CmpInst::FCMP_OGT:
2221  return AArch64CC::GT;
2222  case CmpInst::ICMP_SGE:
2223  case CmpInst::FCMP_OGE:
2224  return AArch64CC::GE;
2225  case CmpInst::ICMP_UGT:
2226  case CmpInst::FCMP_UGT:
2227  return AArch64CC::HI;
2228  case CmpInst::FCMP_OLT:
2229  return AArch64CC::MI;
2230  case CmpInst::ICMP_ULE:
2231  case CmpInst::FCMP_OLE:
2232  return AArch64CC::LS;
2233  case CmpInst::FCMP_ORD:
2234  return AArch64CC::VC;
2235  case CmpInst::FCMP_UNO:
2236  return AArch64CC::VS;
2237  case CmpInst::FCMP_UGE:
2238  return AArch64CC::PL;
2239  case CmpInst::ICMP_SLT:
2240  case CmpInst::FCMP_ULT:
2241  return AArch64CC::LT;
2242  case CmpInst::ICMP_SLE:
2243  case CmpInst::FCMP_ULE:
2244  return AArch64CC::LE;
2245  case CmpInst::FCMP_UNE:
2246  case CmpInst::ICMP_NE:
2247  return AArch64CC::NE;
2248  case CmpInst::ICMP_UGE:
2249  return AArch64CC::HS;
2250  case CmpInst::ICMP_ULT:
2251  return AArch64CC::LO;
2252  }
2253 }
2254 
2255 /// Try to emit a combined compare-and-branch instruction.
2256 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2257  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2258  // will not be produced, as they are conditional branch instructions that do
2259  // not set flags.
2260  if (FuncInfo.MF->getFunction().hasFnAttribute(
2261  Attribute::SpeculativeLoadHardening))
2262  return false;
2263 
2264  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2265  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2266  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2267 
2268  const Value *LHS = CI->getOperand(0);
2269  const Value *RHS = CI->getOperand(1);
2270 
2271  MVT VT;
2272  if (!isTypeSupported(LHS->getType(), VT))
2273  return false;
2274 
2275  unsigned BW = VT.getSizeInBits();
2276  if (BW > 64)
2277  return false;
2278 
2279  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2280  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2281 
2282  // Try to take advantage of fallthrough opportunities.
2283  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2284  std::swap(TBB, FBB);
2285  Predicate = CmpInst::getInversePredicate(Predicate);
2286  }
2287 
2288  int TestBit = -1;
2289  bool IsCmpNE;
2290  switch (Predicate) {
2291  default:
2292  return false;
2293  case CmpInst::ICMP_EQ:
2294  case CmpInst::ICMP_NE:
2295  if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2296  std::swap(LHS, RHS);
2297 
2298  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2299  return false;
2300 
2301  if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2302  if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2303  const Value *AndLHS = AI->getOperand(0);
2304  const Value *AndRHS = AI->getOperand(1);
2305 
2306  if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2307  if (C->getValue().isPowerOf2())
2308  std::swap(AndLHS, AndRHS);
2309 
2310  if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2311  if (C->getValue().isPowerOf2()) {
2312  TestBit = C->getValue().logBase2();
2313  LHS = AndLHS;
2314  }
2315  }
2316 
2317  if (VT == MVT::i1)
2318  TestBit = 0;
2319 
2320  IsCmpNE = Predicate == CmpInst::ICMP_NE;
2321  break;
2322  case CmpInst::ICMP_SLT:
2323  case CmpInst::ICMP_SGE:
2324  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2325  return false;
2326 
2327  TestBit = BW - 1;
2328  IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2329  break;
2330  case CmpInst::ICMP_SGT:
2331  case CmpInst::ICMP_SLE:
2332  if (!isa<ConstantInt>(RHS))
2333  return false;
2334 
2335  if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2336  return false;
2337 
2338  TestBit = BW - 1;
2339  IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2340  break;
2341  } // end switch
2342 
2343  static const unsigned OpcTable[2][2][2] = {
2344  { {AArch64::CBZW, AArch64::CBZX },
2345  {AArch64::CBNZW, AArch64::CBNZX} },
2346  { {AArch64::TBZW, AArch64::TBZX },
2347  {AArch64::TBNZW, AArch64::TBNZX} }
2348  };
2349 
2350  bool IsBitTest = TestBit != -1;
2351  bool Is64Bit = BW == 64;
2352  if (TestBit < 32 && TestBit >= 0)
2353  Is64Bit = false;
2354 
2355  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2356  const MCInstrDesc &II = TII.get(Opc);
2357 
2358  unsigned SrcReg = getRegForValue(LHS);
2359  if (!SrcReg)
2360  return false;
2361  bool SrcIsKill = hasTrivialKill(LHS);
2362 
2363  if (BW == 64 && !Is64Bit)
2364  SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2365  AArch64::sub_32);
2366 
2367  if ((BW < 32) && !IsBitTest)
2368  SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2369 
2370  // Emit the combined compare and branch instruction.
2371  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2372  MachineInstrBuilder MIB =
2373  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2374  .addReg(SrcReg, getKillRegState(SrcIsKill));
2375  if (IsBitTest)
2376  MIB.addImm(TestBit);
2377  MIB.addMBB(TBB);
2378 
2379  finishCondBranch(BI->getParent(), TBB, FBB);
2380  return true;
2381 }
2382 
2383 bool AArch64FastISel::selectBranch(const Instruction *I) {
2384  const BranchInst *BI = cast<BranchInst>(I);
2385  if (BI->isUnconditional()) {
2386  MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2387  fastEmitBranch(MSucc, BI->getDebugLoc());
2388  return true;
2389  }
2390 
2391  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2392  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2393 
2394  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2395  if (CI->hasOneUse() && isValueAvailable(CI)) {
2396  // Try to optimize or fold the cmp.
2397  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2398  switch (Predicate) {
2399  default:
2400  break;
2401  case CmpInst::FCMP_FALSE:
2402  fastEmitBranch(FBB, DbgLoc);
2403  return true;
2404  case CmpInst::FCMP_TRUE:
2405  fastEmitBranch(TBB, DbgLoc);
2406  return true;
2407  }
2408 
2409  // Try to emit a combined compare-and-branch first.
2410  if (emitCompareAndBranch(BI))
2411  return true;
2412 
2413  // Try to take advantage of fallthrough opportunities.
2414  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2415  std::swap(TBB, FBB);
2416  Predicate = CmpInst::getInversePredicate(Predicate);
2417  }
2418 
2419  // Emit the cmp.
2420  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2421  return false;
2422 
2423  // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2424  // instruction.
2425  AArch64CC::CondCode CC = getCompareCC(Predicate);
2427  switch (Predicate) {
2428  default:
2429  break;
2430  case CmpInst::FCMP_UEQ:
2431  ExtraCC = AArch64CC::EQ;
2432  CC = AArch64CC::VS;
2433  break;
2434  case CmpInst::FCMP_ONE:
2435  ExtraCC = AArch64CC::MI;
2436  CC = AArch64CC::GT;
2437  break;
2438  }
2439  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2440 
2441  // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2442  if (ExtraCC != AArch64CC::AL) {
2443  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2444  .addImm(ExtraCC)
2445  .addMBB(TBB);
2446  }
2447 
2448  // Emit the branch.
2449  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2450  .addImm(CC)
2451  .addMBB(TBB);
2452 
2453  finishCondBranch(BI->getParent(), TBB, FBB);
2454  return true;
2455  }
2456  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2457  uint64_t Imm = CI->getZExtValue();
2458  MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2459  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2460  .addMBB(Target);
2461 
2462  // Obtain the branch probability and add the target to the successor list.
2463  if (FuncInfo.BPI) {
2464  auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2465  BI->getParent(), Target->getBasicBlock());
2466  FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2467  } else
2468  FuncInfo.MBB->addSuccessorWithoutProb(Target);
2469  return true;
2470  } else {
2472  if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2473  // Fake request the condition, otherwise the intrinsic might be completely
2474  // optimized away.
2475  unsigned CondReg = getRegForValue(BI->getCondition());
2476  if (!CondReg)
2477  return false;
2478 
2479  // Emit the branch.
2480  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2481  .addImm(CC)
2482  .addMBB(TBB);
2483 
2484  finishCondBranch(BI->getParent(), TBB, FBB);
2485  return true;
2486  }
2487  }
2488 
2489  unsigned CondReg = getRegForValue(BI->getCondition());
2490  if (CondReg == 0)
2491  return false;
2492  bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2493 
2494  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2495  unsigned Opcode = AArch64::TBNZW;
2496  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2497  std::swap(TBB, FBB);
2498  Opcode = AArch64::TBZW;
2499  }
2500 
2501  const MCInstrDesc &II = TII.get(Opcode);
2502  unsigned ConstrainedCondReg
2503  = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2504  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2505  .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2506  .addImm(0)
2507  .addMBB(TBB);
2508 
2509  finishCondBranch(BI->getParent(), TBB, FBB);
2510  return true;
2511 }
2512 
2513 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2514  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2515  unsigned AddrReg = getRegForValue(BI->getOperand(0));
2516  if (AddrReg == 0)
2517  return false;
2518 
2519  // Emit the indirect branch.
2520  const MCInstrDesc &II = TII.get(AArch64::BR);
2521  AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2522  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2523 
2524  // Make sure the CFG is up-to-date.
2525  for (auto *Succ : BI->successors())
2526  FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2527 
2528  return true;
2529 }
2530 
2531 bool AArch64FastISel::selectCmp(const Instruction *I) {
2532  const CmpInst *CI = cast<CmpInst>(I);
2533 
2534  // Vectors of i1 are weird: bail out.
2535  if (CI->getType()->isVectorTy())
2536  return false;
2537 
2538  // Try to optimize or fold the cmp.
2539  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2540  unsigned ResultReg = 0;
2541  switch (Predicate) {
2542  default:
2543  break;
2544  case CmpInst::FCMP_FALSE:
2545  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2546  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2547  TII.get(TargetOpcode::COPY), ResultReg)
2548  .addReg(AArch64::WZR, getKillRegState(true));
2549  break;
2550  case CmpInst::FCMP_TRUE:
2551  ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2552  break;
2553  }
2554 
2555  if (ResultReg) {
2556  updateValueMap(I, ResultReg);
2557  return true;
2558  }
2559 
2560  // Emit the cmp.
2561  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2562  return false;
2563 
2564  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2565 
2566  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2567  // condition codes are inverted, because they are used by CSINC.
2568  static unsigned CondCodeTable[2][2] = {
2571  };
2572  unsigned *CondCodes = nullptr;
2573  switch (Predicate) {
2574  default:
2575  break;
2576  case CmpInst::FCMP_UEQ:
2577  CondCodes = &CondCodeTable[0][0];
2578  break;
2579  case CmpInst::FCMP_ONE:
2580  CondCodes = &CondCodeTable[1][0];
2581  break;
2582  }
2583 
2584  if (CondCodes) {
2585  unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2586  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2587  TmpReg1)
2588  .addReg(AArch64::WZR, getKillRegState(true))
2589  .addReg(AArch64::WZR, getKillRegState(true))
2590  .addImm(CondCodes[0]);
2591  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2592  ResultReg)
2593  .addReg(TmpReg1, getKillRegState(true))
2594  .addReg(AArch64::WZR, getKillRegState(true))
2595  .addImm(CondCodes[1]);
2596 
2597  updateValueMap(I, ResultReg);
2598  return true;
2599  }
2600 
2601  // Now set a register based on the comparison.
2602  AArch64CC::CondCode CC = getCompareCC(Predicate);
2603  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2604  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2605  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2606  ResultReg)
2607  .addReg(AArch64::WZR, getKillRegState(true))
2608  .addReg(AArch64::WZR, getKillRegState(true))
2609  .addImm(invertedCC);
2610 
2611  updateValueMap(I, ResultReg);
2612  return true;
2613 }
2614 
2615 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2616 /// value.
2617 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2618  if (!SI->getType()->isIntegerTy(1))
2619  return false;
2620 
2621  const Value *Src1Val, *Src2Val;
2622  unsigned Opc = 0;
2623  bool NeedExtraOp = false;
2624  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2625  if (CI->isOne()) {
2626  Src1Val = SI->getCondition();
2627  Src2Val = SI->getFalseValue();
2628  Opc = AArch64::ORRWrr;
2629  } else {
2630  assert(CI->isZero());
2631  Src1Val = SI->getFalseValue();
2632  Src2Val = SI->getCondition();
2633  Opc = AArch64::BICWrr;
2634  }
2635  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2636  if (CI->isOne()) {
2637  Src1Val = SI->getCondition();
2638  Src2Val = SI->getTrueValue();
2639  Opc = AArch64::ORRWrr;
2640  NeedExtraOp = true;
2641  } else {
2642  assert(CI->isZero());
2643  Src1Val = SI->getCondition();
2644  Src2Val = SI->getTrueValue();
2645  Opc = AArch64::ANDWrr;
2646  }
2647  }
2648 
2649  if (!Opc)
2650  return false;
2651 
2652  unsigned Src1Reg = getRegForValue(Src1Val);
2653  if (!Src1Reg)
2654  return false;
2655  bool Src1IsKill = hasTrivialKill(Src1Val);
2656 
2657  unsigned Src2Reg = getRegForValue(Src2Val);
2658  if (!Src2Reg)
2659  return false;
2660  bool Src2IsKill = hasTrivialKill(Src2Val);
2661 
2662  if (NeedExtraOp) {
2663  Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2664  Src1IsKill = true;
2665  }
2666  unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2667  Src1IsKill, Src2Reg, Src2IsKill);
2668  updateValueMap(SI, ResultReg);
2669  return true;
2670 }
2671 
2672 bool AArch64FastISel::selectSelect(const Instruction *I) {
2673  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2674  MVT VT;
2675  if (!isTypeSupported(I->getType(), VT))
2676  return false;
2677 
2678  unsigned Opc;
2679  const TargetRegisterClass *RC;
2680  switch (VT.SimpleTy) {
2681  default:
2682  return false;
2683  case MVT::i1:
2684  case MVT::i8:
2685  case MVT::i16:
2686  case MVT::i32:
2687  Opc = AArch64::CSELWr;
2688  RC = &AArch64::GPR32RegClass;
2689  break;
2690  case MVT::i64:
2691  Opc = AArch64::CSELXr;
2692  RC = &AArch64::GPR64RegClass;
2693  break;
2694  case MVT::f32:
2695  Opc = AArch64::FCSELSrrr;
2696  RC = &AArch64::FPR32RegClass;
2697  break;
2698  case MVT::f64:
2699  Opc = AArch64::FCSELDrrr;
2700  RC = &AArch64::FPR64RegClass;
2701  break;
2702  }
2703 
2704  const SelectInst *SI = cast<SelectInst>(I);
2705  const Value *Cond = SI->getCondition();
2708 
2709  if (optimizeSelect(SI))
2710  return true;
2711 
2712  // Try to pickup the flags, so we don't have to emit another compare.
2713  if (foldXALUIntrinsic(CC, I, Cond)) {
2714  // Fake request the condition to force emission of the XALU intrinsic.
2715  unsigned CondReg = getRegForValue(Cond);
2716  if (!CondReg)
2717  return false;
2718  } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2719  isValueAvailable(Cond)) {
2720  const auto *Cmp = cast<CmpInst>(Cond);
2721  // Try to optimize or fold the cmp.
2722  CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2723  const Value *FoldSelect = nullptr;
2724  switch (Predicate) {
2725  default:
2726  break;
2727  case CmpInst::FCMP_FALSE:
2728  FoldSelect = SI->getFalseValue();
2729  break;
2730  case CmpInst::FCMP_TRUE:
2731  FoldSelect = SI->getTrueValue();
2732  break;
2733  }
2734 
2735  if (FoldSelect) {
2736  unsigned SrcReg = getRegForValue(FoldSelect);
2737  if (!SrcReg)
2738  return false;
2739  unsigned UseReg = lookUpRegForValue(SI);
2740  if (UseReg)
2741  MRI.clearKillFlags(UseReg);
2742 
2743  updateValueMap(I, SrcReg);
2744  return true;
2745  }
2746 
2747  // Emit the cmp.
2748  if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2749  return false;
2750 
2751  // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2752  CC = getCompareCC(Predicate);
2753  switch (Predicate) {
2754  default:
2755  break;
2756  case CmpInst::FCMP_UEQ:
2757  ExtraCC = AArch64CC::EQ;
2758  CC = AArch64CC::VS;
2759  break;
2760  case CmpInst::FCMP_ONE:
2761  ExtraCC = AArch64CC::MI;
2762  CC = AArch64CC::GT;
2763  break;
2764  }
2765  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2766  } else {
2767  unsigned CondReg = getRegForValue(Cond);
2768  if (!CondReg)
2769  return false;
2770  bool CondIsKill = hasTrivialKill(Cond);
2771 
2772  const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2773  CondReg = constrainOperandRegClass(II, CondReg, 1);
2774 
2775  // Emit a TST instruction (ANDS wzr, reg, #imm).
2776  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2777  AArch64::WZR)
2778  .addReg(CondReg, getKillRegState(CondIsKill))
2780  }
2781 
2782  unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2783  bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2784 
2785  unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2786  bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2787 
2788  if (!Src1Reg || !Src2Reg)
2789  return false;
2790 
2791  if (ExtraCC != AArch64CC::AL) {
2792  Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2793  Src2IsKill, ExtraCC);
2794  Src2IsKill = true;
2795  }
2796  unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2797  Src2IsKill, CC);
2798  updateValueMap(I, ResultReg);
2799  return true;
2800 }
2801 
2802 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2803  Value *V = I->getOperand(0);
2804  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2805  return false;
2806 
2807  unsigned Op = getRegForValue(V);
2808  if (Op == 0)
2809  return false;
2810 
2811  unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2812  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2813  ResultReg).addReg(Op);
2814  updateValueMap(I, ResultReg);
2815  return true;
2816 }
2817 
2818 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2819  Value *V = I->getOperand(0);
2820  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2821  return false;
2822 
2823  unsigned Op = getRegForValue(V);
2824  if (Op == 0)
2825  return false;
2826 
2827  unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2828  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2829  ResultReg).addReg(Op);
2830  updateValueMap(I, ResultReg);
2831  return true;
2832 }
2833 
2834 // FPToUI and FPToSI
2835 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2836  MVT DestVT;
2837  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2838  return false;
2839 
2840  unsigned SrcReg = getRegForValue(I->getOperand(0));
2841  if (SrcReg == 0)
2842  return false;
2843 
2844  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2845  if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2846  return false;
2847 
2848  unsigned Opc;
2849  if (SrcVT == MVT::f64) {
2850  if (Signed)
2851  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2852  else
2853  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2854  } else {
2855  if (Signed)
2856  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2857  else
2858  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2859  }
2860  unsigned ResultReg = createResultReg(
2861  DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2862  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2863  .addReg(SrcReg);
2864  updateValueMap(I, ResultReg);
2865  return true;
2866 }
2867 
2868 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2869  MVT DestVT;
2870  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2871  return false;
2872  // Let regular ISEL handle FP16
2873  if (DestVT == MVT::f16)
2874  return false;
2875 
2876  assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2877  "Unexpected value type.");
2878 
2879  unsigned SrcReg = getRegForValue(I->getOperand(0));
2880  if (!SrcReg)
2881  return false;
2882  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2883 
2884  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2885 
2886  // Handle sign-extension.
2887  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2888  SrcReg =
2889  emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2890  if (!SrcReg)
2891  return false;
2892  SrcIsKill = true;
2893  }
2894 
2895  unsigned Opc;
2896  if (SrcVT == MVT::i64) {
2897  if (Signed)
2898  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2899  else
2900  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2901  } else {
2902  if (Signed)
2903  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2904  else
2905  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2906  }
2907 
2908  unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2909  SrcIsKill);
2910  updateValueMap(I, ResultReg);
2911  return true;
2912 }
2913 
2914 bool AArch64FastISel::fastLowerArguments() {
2915  if (!FuncInfo.CanLowerReturn)
2916  return false;
2917 
2918  const Function *F = FuncInfo.Fn;
2919  if (F->isVarArg())
2920  return false;
2921 
2922  CallingConv::ID CC = F->getCallingConv();
2923  if (CC != CallingConv::C && CC != CallingConv::Swift)
2924  return false;
2925 
2926  if (Subtarget->hasCustomCallingConv())
2927  return false;
2928 
2929  // Only handle simple cases of up to 8 GPR and FPR each.
2930  unsigned GPRCnt = 0;
2931  unsigned FPRCnt = 0;
2932  for (auto const &Arg : F->args()) {
2933  if (Arg.hasAttribute(Attribute::ByVal) ||
2934  Arg.hasAttribute(Attribute::InReg) ||
2935  Arg.hasAttribute(Attribute::StructRet) ||
2936  Arg.hasAttribute(Attribute::SwiftSelf) ||
2937  Arg.hasAttribute(Attribute::SwiftError) ||
2938  Arg.hasAttribute(Attribute::Nest))
2939  return false;
2940 
2941  Type *ArgTy = Arg.getType();
2942  if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2943  return false;
2944 
2945  EVT ArgVT = TLI.getValueType(DL, ArgTy);
2946  if (!ArgVT.isSimple())
2947  return false;
2948 
2949  MVT VT = ArgVT.getSimpleVT().SimpleTy;
2950  if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2951  return false;
2952 
2953  if (VT.isVector() &&
2954  (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2955  return false;
2956 
2957  if (VT >= MVT::i1 && VT <= MVT::i64)
2958  ++GPRCnt;
2959  else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2960  VT.is128BitVector())
2961  ++FPRCnt;
2962  else
2963  return false;
2964 
2965  if (GPRCnt > 8 || FPRCnt > 8)
2966  return false;
2967  }
2968 
2969  static const MCPhysReg Registers[6][8] = {
2970  { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2971  AArch64::W5, AArch64::W6, AArch64::W7 },
2972  { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2973  AArch64::X5, AArch64::X6, AArch64::X7 },
2974  { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2975  AArch64::H5, AArch64::H6, AArch64::H7 },
2976  { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2977  AArch64::S5, AArch64::S6, AArch64::S7 },
2978  { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2979  AArch64::D5, AArch64::D6, AArch64::D7 },
2980  { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2981  AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2982  };
2983 
2984  unsigned GPRIdx = 0;
2985  unsigned FPRIdx = 0;
2986  for (auto const &Arg : F->args()) {
2987  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2988  unsigned SrcReg;
2989  const TargetRegisterClass *RC;
2990  if (VT >= MVT::i1 && VT <= MVT::i32) {
2991  SrcReg = Registers[0][GPRIdx++];
2992  RC = &AArch64::GPR32RegClass;
2993  VT = MVT::i32;
2994  } else if (VT == MVT::i64) {
2995  SrcReg = Registers[1][GPRIdx++];
2996  RC = &AArch64::GPR64RegClass;
2997  } else if (VT == MVT::f16) {
2998  SrcReg = Registers[2][FPRIdx++];
2999  RC = &AArch64::FPR16RegClass;
3000  } else if (VT == MVT::f32) {
3001  SrcReg = Registers[3][FPRIdx++];
3002  RC = &AArch64::FPR32RegClass;
3003  } else if ((VT == MVT::f64) || VT.is64BitVector()) {
3004  SrcReg = Registers[4][FPRIdx++];
3005  RC = &AArch64::FPR64RegClass;
3006  } else if (VT.is128BitVector()) {
3007  SrcReg = Registers[5][FPRIdx++];
3008  RC = &AArch64::FPR128RegClass;
3009  } else
3010  llvm_unreachable("Unexpected value type.");
3011 
3012  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3013  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3014  // Without this, EmitLiveInCopies may eliminate the livein if its only
3015  // use is a bitcast (which isn't turned into an instruction).
3016  unsigned ResultReg = createResultReg(RC);
3017  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3018  TII.get(TargetOpcode::COPY), ResultReg)
3019  .addReg(DstReg, getKillRegState(true));
3020  updateValueMap(&Arg, ResultReg);
3021  }
3022  return true;
3023 }
3024 
3025 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3026  SmallVectorImpl<MVT> &OutVTs,
3027  unsigned &NumBytes) {
3028  CallingConv::ID CC = CLI.CallConv;
3030  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3031  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3032 
3033  // Get a count of how many bytes are to be pushed on the stack.
3034  NumBytes = CCInfo.getNextStackOffset();
3035 
3036  // Issue CALLSEQ_START
3037  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3038  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3039  .addImm(NumBytes).addImm(0);
3040 
3041  // Process the args.
3042  for (CCValAssign &VA : ArgLocs) {
3043  const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3044  MVT ArgVT = OutVTs[VA.getValNo()];
3045 
3046  unsigned ArgReg = getRegForValue(ArgVal);
3047  if (!ArgReg)
3048  return false;
3049 
3050  // Handle arg promotion: SExt, ZExt, AExt.
3051  switch (VA.getLocInfo()) {
3052  case CCValAssign::Full:
3053  break;
3054  case CCValAssign::SExt: {
3055  MVT DestVT = VA.getLocVT();
3056  MVT SrcVT = ArgVT;
3057  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3058  if (!ArgReg)
3059  return false;
3060  break;
3061  }
3062  case CCValAssign::AExt:
3063  // Intentional fall-through.
3064  case CCValAssign::ZExt: {
3065  MVT DestVT = VA.getLocVT();
3066  MVT SrcVT = ArgVT;
3067  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3068  if (!ArgReg)
3069  return false;
3070  break;
3071  }
3072  default:
3073  llvm_unreachable("Unknown arg promotion!");
3074  }
3075 
3076  // Now copy/store arg to correct locations.
3077  if (VA.isRegLoc() && !VA.needsCustom()) {
3078  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3079  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3080  CLI.OutRegs.push_back(VA.getLocReg());
3081  } else if (VA.needsCustom()) {
3082  // FIXME: Handle custom args.
3083  return false;
3084  } else {
3085  assert(VA.isMemLoc() && "Assuming store on stack.");
3086 
3087  // Don't emit stores for undef values.
3088  if (isa<UndefValue>(ArgVal))
3089  continue;
3090 
3091  // Need to store on the stack.
3092  unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3093 
3094  unsigned BEAlign = 0;
3095  if (ArgSize < 8 && !Subtarget->isLittleEndian())
3096  BEAlign = 8 - ArgSize;
3097 
3098  Address Addr;
3099  Addr.setKind(Address::RegBase);
3100  Addr.setReg(AArch64::SP);
3101  Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3102 
3103  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3104  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3105  MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3106  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3107 
3108  if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3109  return false;
3110  }
3111  }
3112  return true;
3113 }
3114 
3115 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3116  unsigned NumBytes) {
3117  CallingConv::ID CC = CLI.CallConv;
3118 
3119  // Issue CALLSEQ_END
3120  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3121  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3122  .addImm(NumBytes).addImm(0);
3123 
3124  // Now the return value.
3125  if (RetVT != MVT::isVoid) {
3127  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3128  CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3129 
3130  // Only handle a single return value.
3131  if (RVLocs.size() != 1)
3132  return false;
3133 
3134  // Copy all of the result registers out of their specified physreg.
3135  MVT CopyVT = RVLocs[0].getValVT();
3136 
3137  // TODO: Handle big-endian results
3138  if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3139  return false;
3140 
3141  unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3142  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3143  TII.get(TargetOpcode::COPY), ResultReg)
3144  .addReg(RVLocs[0].getLocReg());
3145  CLI.InRegs.push_back(RVLocs[0].getLocReg());
3146 
3147  CLI.ResultReg = ResultReg;
3148  CLI.NumResultRegs = 1;
3149  }
3150 
3151  return true;
3152 }
3153 
3154 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3155  CallingConv::ID CC = CLI.CallConv;
3156  bool IsTailCall = CLI.IsTailCall;
3157  bool IsVarArg = CLI.IsVarArg;
3158  const Value *Callee = CLI.Callee;
3159  MCSymbol *Symbol = CLI.Symbol;
3160 
3161  if (!Callee && !Symbol)
3162  return false;
3163 
3164  // Allow SelectionDAG isel to handle tail calls.
3165  if (IsTailCall)
3166  return false;
3167 
3168  CodeModel::Model CM = TM.getCodeModel();
3169  // Only support the small-addressing and large code models.
3170  if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3171  return false;
3172 
3173  // FIXME: Add large code model support for ELF.
3174  if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3175  return false;
3176 
3177  // Let SDISel handle vararg functions.
3178  if (IsVarArg)
3179  return false;
3180 
3181  // FIXME: Only handle *simple* calls for now.
3182  MVT RetVT;
3183  if (CLI.RetTy->isVoidTy())
3184  RetVT = MVT::isVoid;
3185  else if (!isTypeLegal(CLI.RetTy, RetVT))
3186  return false;
3187 
3188  for (auto Flag : CLI.OutFlags)
3189  if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3190  Flag.isSwiftSelf() || Flag.isSwiftError())
3191  return false;
3192 
3193  // Set up the argument vectors.
3194  SmallVector<MVT, 16> OutVTs;
3195  OutVTs.reserve(CLI.OutVals.size());
3196 
3197  for (auto *Val : CLI.OutVals) {
3198  MVT VT;
3199  if (!isTypeLegal(Val->getType(), VT) &&
3200  !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3201  return false;
3202 
3203  // We don't handle vector parameters yet.
3204  if (VT.isVector() || VT.getSizeInBits() > 64)
3205  return false;
3206 
3207  OutVTs.push_back(VT);
3208  }
3209 
3210  Address Addr;
3211  if (Callee && !computeCallAddress(Callee, Addr))
3212  return false;
3213 
3214  // Handle the arguments now that we've gotten them.
3215  unsigned NumBytes;
3216  if (!processCallArgs(CLI, OutVTs, NumBytes))
3217  return false;
3218 
3219  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3220  if (RegInfo->isAnyArgRegReserved(*MF))
3221  RegInfo->emitReservedArgRegCallError(*MF);
3222 
3223  // Issue the call.
3224  MachineInstrBuilder MIB;
3225  if (Subtarget->useSmallAddressing()) {
3226  const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3227  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3228  if (Symbol)
3229  MIB.addSym(Symbol, 0);
3230  else if (Addr.getGlobalValue())
3231  MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3232  else if (Addr.getReg()) {
3233  unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3234  MIB.addReg(Reg);
3235  } else
3236  return false;
3237  } else {
3238  unsigned CallReg = 0;
3239  if (Symbol) {
3240  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3241  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3242  ADRPReg)
3243  .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3244 
3245  CallReg = createResultReg(&AArch64::GPR64RegClass);
3246  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3247  TII.get(AArch64::LDRXui), CallReg)
3248  .addReg(ADRPReg)
3249  .addSym(Symbol,
3251  } else if (Addr.getGlobalValue())
3252  CallReg = materializeGV(Addr.getGlobalValue());
3253  else if (Addr.getReg())
3254  CallReg = Addr.getReg();
3255 
3256  if (!CallReg)
3257  return false;
3258 
3259  const MCInstrDesc &II = TII.get(AArch64::BLR);
3260  CallReg = constrainOperandRegClass(II, CallReg, 0);
3261  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3262  }
3263 
3264  // Add implicit physical register uses to the call.
3265  for (auto Reg : CLI.OutRegs)
3266  MIB.addReg(Reg, RegState::Implicit);
3267 
3268  // Add a register mask with the call-preserved registers.
3269  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3270  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3271 
3272  CLI.Call = MIB;
3273 
3274  // Finish off the call including any return values.
3275  return finishCall(CLI, RetVT, NumBytes);
3276 }
3277 
3278 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3279  if (Alignment)
3280  return Len / Alignment <= 4;
3281  else
3282  return Len < 32;
3283 }
3284 
3285 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3286  uint64_t Len, unsigned Alignment) {
3287  // Make sure we don't bloat code by inlining very large memcpy's.
3288  if (!isMemCpySmall(Len, Alignment))
3289  return false;
3290 
3291  int64_t UnscaledOffset = 0;
3292  Address OrigDest = Dest;
3293  Address OrigSrc = Src;
3294 
3295  while (Len) {
3296  MVT VT;
3297  if (!Alignment || Alignment >= 8) {
3298  if (Len >= 8)
3299  VT = MVT::i64;
3300  else if (Len >= 4)
3301  VT = MVT::i32;
3302  else if (Len >= 2)
3303  VT = MVT::i16;
3304  else {
3305  VT = MVT::i8;
3306  }
3307  } else {
3308  // Bound based on alignment.
3309  if (Len >= 4 && Alignment == 4)
3310  VT = MVT::i32;
3311  else if (Len >= 2 && Alignment == 2)
3312  VT = MVT::i16;
3313  else {
3314  VT = MVT::i8;
3315  }
3316  }
3317 
3318  unsigned ResultReg = emitLoad(VT, VT, Src);
3319  if (!ResultReg)
3320  return false;
3321 
3322  if (!emitStore(VT, ResultReg, Dest))
3323  return false;
3324 
3325  int64_t Size = VT.getSizeInBits() / 8;
3326  Len -= Size;
3327  UnscaledOffset += Size;
3328 
3329  // We need to recompute the unscaled offset for each iteration.
3330  Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3331  Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3332  }
3333 
3334  return true;
3335 }
3336 
3337 /// Check if it is possible to fold the condition from the XALU intrinsic
3338 /// into the user. The condition code will only be updated on success.
3339 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3340  const Instruction *I,
3341  const Value *Cond) {
3342  if (!isa<ExtractValueInst>(Cond))
3343  return false;
3344 
3345  const auto *EV = cast<ExtractValueInst>(Cond);
3346  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3347  return false;
3348 
3349  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3350  MVT RetVT;
3351  const Function *Callee = II->getCalledFunction();
3352  Type *RetTy =
3353  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3354  if (!isTypeLegal(RetTy, RetVT))
3355  return false;
3356 
3357  if (RetVT != MVT::i32 && RetVT != MVT::i64)
3358  return false;
3359 
3360  const Value *LHS = II->getArgOperand(0);
3361  const Value *RHS = II->getArgOperand(1);
3362 
3363  // Canonicalize immediate to the RHS.
3364  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3365  isCommutativeIntrinsic(II))
3366  std::swap(LHS, RHS);
3367 
3368  // Simplify multiplies.
3369  Intrinsic::ID IID = II->getIntrinsicID();
3370  switch (IID) {
3371  default:
3372  break;
3373  case Intrinsic::smul_with_overflow:
3374  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3375  if (C->getValue() == 2)
3376  IID = Intrinsic::sadd_with_overflow;
3377  break;
3378  case Intrinsic::umul_with_overflow:
3379  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3380  if (C->getValue() == 2)
3381  IID = Intrinsic::uadd_with_overflow;
3382  break;
3383  }
3384 
3385  AArch64CC::CondCode TmpCC;
3386  switch (IID) {
3387  default:
3388  return false;
3389  case Intrinsic::sadd_with_overflow:
3390  case Intrinsic::ssub_with_overflow:
3391  TmpCC = AArch64CC::VS;
3392  break;
3393  case Intrinsic::uadd_with_overflow:
3394  TmpCC = AArch64CC::HS;
3395  break;
3396  case Intrinsic::usub_with_overflow:
3397  TmpCC = AArch64CC::LO;
3398  break;
3399  case Intrinsic::smul_with_overflow:
3400  case Intrinsic::umul_with_overflow:
3401  TmpCC = AArch64CC::NE;
3402  break;
3403  }
3404 
3405  // Check if both instructions are in the same basic block.
3406  if (!isValueAvailable(II))
3407  return false;
3408 
3409  // Make sure nothing is in the way
3410  BasicBlock::const_iterator Start(I);
3412  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3413  // We only expect extractvalue instructions between the intrinsic and the
3414  // instruction to be selected.
3415  if (!isa<ExtractValueInst>(Itr))
3416  return false;
3417 
3418  // Check that the extractvalue operand comes from the intrinsic.
3419  const auto *EVI = cast<ExtractValueInst>(Itr);
3420  if (EVI->getAggregateOperand() != II)
3421  return false;
3422  }
3423 
3424  CC = TmpCC;
3425  return true;
3426 }
3427 
3428 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3429  // FIXME: Handle more intrinsics.
3430  switch (II->getIntrinsicID()) {
3431  default: return false;
3432  case Intrinsic::frameaddress: {
3433  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3434  MFI.setFrameAddressIsTaken(true);
3435 
3436  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3437  Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3438  Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3439  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3440  TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3441  // Recursively load frame address
3442  // ldr x0, [fp]
3443  // ldr x0, [x0]
3444  // ldr x0, [x0]
3445  // ...
3446  unsigned DestReg;
3447  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3448  while (Depth--) {
3449  DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3450  SrcReg, /*IsKill=*/true, 0);
3451  assert(DestReg && "Unexpected LDR instruction emission failure.");
3452  SrcReg = DestReg;
3453  }
3454 
3455  updateValueMap(II, SrcReg);
3456  return true;
3457  }
3458  case Intrinsic::sponentry: {
3459  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3460 
3461  // SP = FP + Fixed Object + 16
3462  int FI = MFI.CreateFixedObject(4, 0, false);
3463  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3464  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3465  TII.get(AArch64::ADDXri), ResultReg)
3466  .addFrameIndex(FI)
3467  .addImm(0)
3468  .addImm(0);
3469 
3470  updateValueMap(II, ResultReg);
3471  return true;
3472  }
3473  case Intrinsic::memcpy:
3474  case Intrinsic::memmove: {
3475  const auto *MTI = cast<MemTransferInst>(II);
3476  // Don't handle volatile.
3477  if (MTI->isVolatile())
3478  return false;
3479 
3480  // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3481  // we would emit dead code because we don't currently handle memmoves.
3482  bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3483  if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3484  // Small memcpy's are common enough that we want to do them without a call
3485  // if possible.
3486  uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3487  unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3488  MTI->getSourceAlignment());
3489  if (isMemCpySmall(Len, Alignment)) {
3490  Address Dest, Src;
3491  if (!computeAddress(MTI->getRawDest(), Dest) ||
3492  !computeAddress(MTI->getRawSource(), Src))
3493  return false;
3494  if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3495  return true;
3496  }
3497  }
3498 
3499  if (!MTI->getLength()->getType()->isIntegerTy(64))
3500  return false;
3501 
3502  if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3503  // Fast instruction selection doesn't support the special
3504  // address spaces.
3505  return false;
3506 
3507  const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3508  return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3509  }
3510  case Intrinsic::memset: {
3511  const MemSetInst *MSI = cast<MemSetInst>(II);
3512  // Don't handle volatile.
3513  if (MSI->isVolatile())
3514  return false;
3515 
3516  if (!MSI->getLength()->getType()->isIntegerTy(64))
3517  return false;
3518 
3519  if (MSI->getDestAddressSpace() > 255)
3520  // Fast instruction selection doesn't support the special
3521  // address spaces.
3522  return false;
3523 
3524  return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3525  }
3526  case Intrinsic::sin:
3527  case Intrinsic::cos:
3528  case Intrinsic::pow: {
3529  MVT RetVT;
3530  if (!isTypeLegal(II->getType(), RetVT))
3531  return false;
3532 
3533  if (RetVT != MVT::f32 && RetVT != MVT::f64)
3534  return false;
3535 
3536  static const RTLIB::Libcall LibCallTable[3][2] = {
3537  { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3538  { RTLIB::COS_F32, RTLIB::COS_F64 },
3539  { RTLIB::POW_F32, RTLIB::POW_F64 }
3540  };
3541  RTLIB::Libcall LC;
3542  bool Is64Bit = RetVT == MVT::f64;
3543  switch (II->getIntrinsicID()) {
3544  default:
3545  llvm_unreachable("Unexpected intrinsic.");
3546  case Intrinsic::sin:
3547  LC = LibCallTable[0][Is64Bit];
3548  break;
3549  case Intrinsic::cos:
3550  LC = LibCallTable[1][Is64Bit];
3551  break;
3552  case Intrinsic::pow:
3553  LC = LibCallTable[2][Is64Bit];
3554  break;
3555  }
3556 
3557  ArgListTy Args;
3558  Args.reserve(II->getNumArgOperands());
3559 
3560  // Populate the argument list.
3561  for (auto &Arg : II->arg_operands()) {
3562  ArgListEntry Entry;
3563  Entry.Val = Arg;
3564  Entry.Ty = Arg->getType();
3565  Args.push_back(Entry);
3566  }
3567 
3568  CallLoweringInfo CLI;
3569  MCContext &Ctx = MF->getContext();
3570  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3571  TLI.getLibcallName(LC), std::move(Args));
3572  if (!lowerCallTo(CLI))
3573  return false;
3574  updateValueMap(II, CLI.ResultReg);
3575  return true;
3576  }
3577  case Intrinsic::fabs: {
3578  MVT VT;
3579  if (!isTypeLegal(II->getType(), VT))
3580  return false;
3581 
3582  unsigned Opc;
3583  switch (VT.SimpleTy) {
3584  default:
3585  return false;
3586  case MVT::f32:
3587  Opc = AArch64::FABSSr;
3588  break;
3589  case MVT::f64:
3590  Opc = AArch64::FABSDr;
3591  break;
3592  }
3593  unsigned SrcReg = getRegForValue(II->getOperand(0));
3594  if (!SrcReg)
3595  return false;
3596  bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3597  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3598  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3599  .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3600  updateValueMap(II, ResultReg);
3601  return true;
3602  }
3603  case Intrinsic::trap:
3604  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3605  .addImm(1);
3606  return true;
3607  case Intrinsic::debugtrap: {
3608  if (Subtarget->isTargetWindows()) {
3609  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3610  .addImm(0xF000);
3611  return true;
3612  }
3613  break;
3614  }
3615 
3616  case Intrinsic::sqrt: {
3617  Type *RetTy = II->getCalledFunction()->getReturnType();
3618 
3619  MVT VT;
3620  if (!isTypeLegal(RetTy, VT))
3621  return false;
3622 
3623  unsigned Op0Reg = getRegForValue(II->getOperand(0));
3624  if (!Op0Reg)
3625  return false;
3626  bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3627 
3628  unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3629  if (!ResultReg)
3630  return false;
3631 
3632  updateValueMap(II, ResultReg);
3633  return true;
3634  }
3635  case Intrinsic::sadd_with_overflow:
3636  case Intrinsic::uadd_with_overflow:
3637  case Intrinsic::ssub_with_overflow:
3638  case Intrinsic::usub_with_overflow:
3639  case Intrinsic::smul_with_overflow:
3640  case Intrinsic::umul_with_overflow: {
3641  // This implements the basic lowering of the xalu with overflow intrinsics.
3642  const Function *Callee = II->getCalledFunction();
3643  auto *Ty = cast<StructType>(Callee->getReturnType());
3644  Type *RetTy = Ty->getTypeAtIndex(0U);
3645 
3646  MVT VT;
3647  if (!isTypeLegal(RetTy, VT))
3648  return false;
3649 
3650  if (VT != MVT::i32 && VT != MVT::i64)
3651  return false;
3652 
3653  const Value *LHS = II->getArgOperand(0);
3654  const Value *RHS = II->getArgOperand(1);
3655  // Canonicalize immediate to the RHS.
3656  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3657  isCommutativeIntrinsic(II))
3658  std::swap(LHS, RHS);
3659 
3660  // Simplify multiplies.
3661  Intrinsic::ID IID = II->getIntrinsicID();
3662  switch (IID) {
3663  default:
3664  break;
3665  case Intrinsic::smul_with_overflow:
3666  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3667  if (C->getValue() == 2) {
3668  IID = Intrinsic::sadd_with_overflow;
3669  RHS = LHS;
3670  }
3671  break;
3672  case Intrinsic::umul_with_overflow:
3673  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3674  if (C->getValue() == 2) {
3675  IID = Intrinsic::uadd_with_overflow;
3676  RHS = LHS;
3677  }
3678  break;
3679  }
3680 
3681  unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3683  switch (IID) {
3684  default: llvm_unreachable("Unexpected intrinsic!");
3685  case Intrinsic::sadd_with_overflow:
3686  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3687  CC = AArch64CC::VS;
3688  break;
3689  case Intrinsic::uadd_with_overflow:
3690  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3691  CC = AArch64CC::HS;
3692  break;
3693  case Intrinsic::ssub_with_overflow:
3694  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3695  CC = AArch64CC::VS;
3696  break;
3697  case Intrinsic::usub_with_overflow:
3698  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3699  CC = AArch64CC::LO;
3700  break;
3701  case Intrinsic::smul_with_overflow: {
3702  CC = AArch64CC::NE;
3703  unsigned LHSReg = getRegForValue(LHS);
3704  if (!LHSReg)
3705  return false;
3706  bool LHSIsKill = hasTrivialKill(LHS);
3707 
3708  unsigned RHSReg = getRegForValue(RHS);
3709  if (!RHSReg)
3710  return false;
3711  bool RHSIsKill = hasTrivialKill(RHS);
3712 
3713  if (VT == MVT::i32) {
3714  MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3715  unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3716  /*IsKill=*/false, 32);
3717  MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3718  AArch64::sub_32);
3719  ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3720  AArch64::sub_32);
3721  emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3722  AArch64_AM::ASR, 31, /*WantResult=*/false);
3723  } else {
3724  assert(VT == MVT::i64 && "Unexpected value type.");
3725  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3726  // reused in the next instruction.
3727  MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3728  /*IsKill=*/false);
3729  unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3730  RHSReg, RHSIsKill);
3731  emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3732  AArch64_AM::ASR, 63, /*WantResult=*/false);
3733  }
3734  break;
3735  }
3736  case Intrinsic::umul_with_overflow: {
3737  CC = AArch64CC::NE;
3738  unsigned LHSReg = getRegForValue(LHS);
3739  if (!LHSReg)
3740  return false;
3741  bool LHSIsKill = hasTrivialKill(LHS);
3742 
3743  unsigned RHSReg = getRegForValue(RHS);
3744  if (!RHSReg)
3745  return false;
3746  bool RHSIsKill = hasTrivialKill(RHS);
3747 
3748  if (VT == MVT::i32) {
3749  MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3750  emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3751  /*IsKill=*/false, AArch64_AM::LSR, 32,
3752  /*WantResult=*/false);
3753  MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3754  AArch64::sub_32);
3755  } else {
3756  assert(VT == MVT::i64 && "Unexpected value type.");
3757  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3758  // reused in the next instruction.
3759  MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3760  /*IsKill=*/false);
3761  unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3762  RHSReg, RHSIsKill);
3763  emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3764  /*IsKill=*/false, /*WantResult=*/false);
3765  }
3766  break;
3767  }
3768  }
3769 
3770  if (MulReg) {
3771  ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3772  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3773  TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3774  }
3775 
3776  if (!ResultReg1)
3777  return false;
3778 
3779  ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3780  AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3781  /*IsKill=*/true, getInvertedCondCode(CC));
3782  (void)ResultReg2;
3783  assert((ResultReg1 + 1) == ResultReg2 &&
3784  "Nonconsecutive result registers.");
3785  updateValueMap(II, ResultReg1, 2);
3786  return true;
3787  }
3788  }
3789  return false;
3790 }
3791 
3792 bool AArch64FastISel::selectRet(const Instruction *I) {
3793  const ReturnInst *Ret = cast<ReturnInst>(I);
3794  const Function &F = *I->getParent()->getParent();
3795 
3796  if (!FuncInfo.CanLowerReturn)
3797  return false;
3798 
3799  if (F.isVarArg())
3800  return false;
3801 
3802  if (TLI.supportSwiftError() &&
3803  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3804  return false;
3805 
3806  if (TLI.supportSplitCSR(FuncInfo.MF))
3807  return false;
3808 
3809  // Build a list of return value registers.
3810  SmallVector<unsigned, 4> RetRegs;
3811 
3812  if (Ret->getNumOperands() > 0) {
3813  CallingConv::ID CC = F.getCallingConv();
3815  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3816 
3817  // Analyze operands of the call, assigning locations to each operand.
3819  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3822  CCInfo.AnalyzeReturn(Outs, RetCC);
3823 
3824  // Only handle a single return value for now.
3825  if (ValLocs.size() != 1)
3826  return false;
3827 
3828  CCValAssign &VA = ValLocs[0];
3829  const Value *RV = Ret->getOperand(0);
3830 
3831  // Don't bother handling odd stuff for now.
3832  if ((VA.getLocInfo() != CCValAssign::Full) &&
3833  (VA.getLocInfo() != CCValAssign::BCvt))
3834  return false;
3835 
3836  // Only handle register returns for now.
3837  if (!VA.isRegLoc())
3838  return false;
3839 
3840  unsigned Reg = getRegForValue(RV);
3841  if (Reg == 0)
3842  return false;
3843 
3844  unsigned SrcReg = Reg + VA.getValNo();
3845  Register DestReg = VA.getLocReg();
3846  // Avoid a cross-class copy. This is very unlikely.
3847  if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3848  return false;
3849 
3850  EVT RVEVT = TLI.getValueType(DL, RV->getType());
3851  if (!RVEVT.isSimple())
3852  return false;
3853 
3854  // Vectors (of > 1 lane) in big endian need tricky handling.
3855  if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3856  !Subtarget->isLittleEndian())
3857  return false;
3858 
3859  MVT RVVT = RVEVT.getSimpleVT();
3860  if (RVVT == MVT::f128)
3861  return false;
3862 
3863  MVT DestVT = VA.getValVT();
3864  // Special handling for extended integers.
3865  if (RVVT != DestVT) {
3866  if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3867  return false;
3868 
3869  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3870  return false;
3871 
3872  bool IsZExt = Outs[0].Flags.isZExt();
3873  SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3874  if (SrcReg == 0)
3875  return false;
3876  }
3877 
3878  // Make the copy.
3879  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3880  TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3881 
3882  // Add register to return instruction.
3883  RetRegs.push_back(VA.getLocReg());
3884  }
3885 
3886  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3887  TII.get(AArch64::RET_ReallyLR));
3888  for (unsigned RetReg : RetRegs)
3889  MIB.addReg(RetReg, RegState::Implicit);
3890  return true;
3891 }
3892 
3893 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3894  Type *DestTy = I->getType();
3895  Value *Op = I->getOperand(0);
3896  Type *SrcTy = Op->getType();
3897 
3898  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3899  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3900  if (!SrcEVT.isSimple())
3901  return false;
3902  if (!DestEVT.isSimple())
3903  return false;
3904 
3905  MVT SrcVT = SrcEVT.getSimpleVT();
3906  MVT DestVT = DestEVT.getSimpleVT();
3907 
3908  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3909  SrcVT != MVT::i8)
3910  return false;
3911  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3912  DestVT != MVT::i1)
3913  return false;
3914 
3915  unsigned SrcReg = getRegForValue(Op);
3916  if (!SrcReg)
3917  return false;
3918  bool SrcIsKill = hasTrivialKill(Op);
3919 
3920  // If we're truncating from i64 to a smaller non-legal type then generate an
3921  // AND. Otherwise, we know the high bits are undefined and a truncate only
3922  // generate a COPY. We cannot mark the source register also as result
3923  // register, because this can incorrectly transfer the kill flag onto the
3924  // source register.
3925  unsigned ResultReg;
3926  if (SrcVT == MVT::i64) {
3927  uint64_t Mask = 0;
3928  switch (DestVT.SimpleTy) {
3929  default:
3930  // Trunc i64 to i32 is handled by the target-independent fast-isel.
3931  return false;
3932  case MVT::i1:
3933  Mask = 0x1;
3934  break;
3935  case MVT::i8:
3936  Mask = 0xff;
3937  break;
3938  case MVT::i16:
3939  Mask = 0xffff;
3940  break;
3941  }
3942  // Issue an extract_subreg to get the lower 32-bits.
3943  unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3944  AArch64::sub_32);
3945  // Create the AND instruction which performs the actual truncation.
3946  ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3947  assert(ResultReg && "Unexpected AND instruction emission failure.");
3948  } else {
3949  ResultReg = createResultReg(&AArch64::GPR32RegClass);
3950  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3951  TII.get(TargetOpcode::COPY), ResultReg)
3952  .addReg(SrcReg, getKillRegState(SrcIsKill));
3953  }
3954 
3955  updateValueMap(I, ResultReg);
3956  return true;
3957 }
3958 
3959 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3960  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3961  DestVT == MVT::i64) &&
3962  "Unexpected value type.");
3963  // Handle i8 and i16 as i32.
3964  if (DestVT == MVT::i8 || DestVT == MVT::i16)
3965  DestVT = MVT::i32;
3966 
3967  if (IsZExt) {
3968  unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3969  assert(ResultReg && "Unexpected AND instruction emission failure.");
3970  if (DestVT == MVT::i64) {
3971  // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
3972  // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
3973  Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3974  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3975  TII.get(AArch64::SUBREG_TO_REG), Reg64)
3976  .addImm(0)
3977  .addReg(ResultReg)
3978  .addImm(AArch64::sub_32);
3979  ResultReg = Reg64;
3980  }
3981  return ResultReg;
3982  } else {
3983  if (DestVT == MVT::i64) {
3984  // FIXME: We're SExt i1 to i64.
3985  return 0;
3986  }
3987  return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3988  /*TODO:IsKill=*/false, 0, 0);
3989  }
3990 }
3991 
3992 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3993  unsigned Op1, bool Op1IsKill) {
3994  unsigned Opc, ZReg;
3995  switch (RetVT.SimpleTy) {
3996  default: return 0;
3997  case MVT::i8:
3998  case MVT::i16:
3999  case MVT::i32:
4000  RetVT = MVT::i32;
4001  Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4002  case MVT::i64:
4003  Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4004  }
4005 
4006  const TargetRegisterClass *RC =
4007  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4008  return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
4009  /*IsKill=*/ZReg, true);
4010 }
4011 
4012 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4013  unsigned Op1, bool Op1IsKill) {
4014  if (RetVT != MVT::i64)
4015  return 0;
4016 
4017  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4018  Op0, Op0IsKill, Op1, Op1IsKill,
4019  AArch64::XZR, /*IsKill=*/true);
4020 }
4021 
4022 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4023  unsigned Op1, bool Op1IsKill) {
4024  if (RetVT != MVT::i64)
4025  return 0;
4026 
4027  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4028  Op0, Op0IsKill, Op1, Op1IsKill,
4029  AArch64::XZR, /*IsKill=*/true);
4030 }
4031 
4032 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4033  unsigned Op1Reg, bool Op1IsKill) {
4034  unsigned Opc = 0;
4035  bool NeedTrunc = false;
4036  uint64_t Mask = 0;
4037  switch (RetVT.SimpleTy) {
4038  default: return 0;
4039  case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4040  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4041  case MVT::i32: Opc = AArch64::LSLVWr; break;
4042  case MVT::i64: Opc = AArch64::LSLVXr; break;
4043  }
4044 
4045  const TargetRegisterClass *RC =
4046  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4047  if (NeedTrunc) {
4048  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4049  Op1IsKill = true;
4050  }
4051  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4052  Op1IsKill);
4053  if (NeedTrunc)
4054  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4055  return ResultReg;
4056 }
4057 
4058 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4059  bool Op0IsKill, uint64_t Shift,
4060  bool IsZExt) {
4061  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4062  "Unexpected source/return type pair.");
4063  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4064  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4065  "Unexpected source value type.");
4066  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4067  RetVT == MVT::i64) && "Unexpected return value type.");
4068 
4069  bool Is64Bit = (RetVT == MVT::i64);
4070  unsigned RegSize = Is64Bit ? 64 : 32;
4071  unsigned DstBits = RetVT.getSizeInBits();
4072  unsigned SrcBits = SrcVT.getSizeInBits();
4073  const TargetRegisterClass *RC =
4074  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4075 
4076  // Just emit a copy for "zero" shifts.
4077  if (Shift == 0) {
4078  if (RetVT == SrcVT) {
4079  unsigned ResultReg = createResultReg(RC);
4080  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4081  TII.get(TargetOpcode::COPY), ResultReg)
4082  .addReg(Op0, getKillRegState(Op0IsKill));
4083  return ResultReg;
4084  } else
4085  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4086  }
4087 
4088  // Don't deal with undefined shifts.
4089  if (Shift >= DstBits)
4090  return 0;
4091 
4092  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4093  // {S|U}BFM Wd, Wn, #r, #s
4094  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4095 
4096  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4097  // %2 = shl i16 %1, 4
4098  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4099  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4100  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4101  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4102 
4103  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4104  // %2 = shl i16 %1, 8
4105  // Wd<32+7-24,32-24> = Wn<7:0>
4106  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4107  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4108  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4109 
4110  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4111  // %2 = shl i16 %1, 12
4112  // Wd<32+3-20,32-20> = Wn<3:0>
4113  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4114  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4115  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4116 
4117  unsigned ImmR = RegSize - Shift;
4118  // Limit the width to the length of the source type.
4119  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4120  static const unsigned OpcTable[2][2] = {
4121  {AArch64::SBFMWri, AArch64::SBFMXri},
4122  {AArch64::UBFMWri, AArch64::UBFMXri}
4123  };
4124  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4125  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4126  Register TmpReg = MRI.createVirtualRegister(RC);
4127  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4128  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4129  .addImm(0)
4130  .addReg(Op0, getKillRegState(Op0IsKill))
4131  .addImm(AArch64::sub_32);
4132  Op0 = TmpReg;
4133  Op0IsKill = true;
4134  }
4135  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4136 }
4137 
4138 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4139  unsigned Op1Reg, bool Op1IsKill) {
4140  unsigned Opc = 0;
4141  bool NeedTrunc = false;
4142  uint64_t Mask = 0;
4143  switch (RetVT.SimpleTy) {
4144  default: return 0;
4145  case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4146  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4147  case MVT::i32: Opc = AArch64::LSRVWr; break;
4148  case MVT::i64: Opc = AArch64::LSRVXr; break;
4149  }
4150 
4151  const TargetRegisterClass *RC =
4152  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4153  if (NeedTrunc) {
4154  Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4155  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4156  Op0IsKill = Op1IsKill = true;
4157  }
4158  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4159  Op1IsKill);
4160  if (NeedTrunc)
4161  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4162  return ResultReg;
4163 }
4164 
4165 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4166  bool Op0IsKill, uint64_t Shift,
4167  bool IsZExt) {
4168  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4169  "Unexpected source/return type pair.");
4170  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4171  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4172  "Unexpected source value type.");
4173  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4174  RetVT == MVT::i64) && "Unexpected return value type.");
4175 
4176  bool Is64Bit = (RetVT == MVT::i64);
4177  unsigned RegSize = Is64Bit ? 64 : 32;
4178  unsigned DstBits = RetVT.getSizeInBits();
4179  unsigned SrcBits = SrcVT.getSizeInBits();
4180  const TargetRegisterClass *RC =
4181  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4182 
4183  // Just emit a copy for "zero" shifts.
4184  if (Shift == 0) {
4185  if (RetVT == SrcVT) {
4186  unsigned ResultReg = createResultReg(RC);
4187  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4188  TII.get(TargetOpcode::COPY), ResultReg)
4189  .addReg(Op0, getKillRegState(Op0IsKill));
4190  return ResultReg;
4191  } else
4192  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4193  }
4194 
4195  // Don't deal with undefined shifts.
4196  if (Shift >= DstBits)
4197  return 0;
4198 
4199  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4200  // {S|U}BFM Wd, Wn, #r, #s
4201  // Wd<s-r:0> = Wn<s:r> when r <= s
4202 
4203  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4204  // %2 = lshr i16 %1, 4
4205  // Wd<7-4:0> = Wn<7:4>
4206  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4207  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4208  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4209 
4210  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4211  // %2 = lshr i16 %1, 8
4212  // Wd<7-7,0> = Wn<7:7>
4213  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4214  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4215  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4216 
4217  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4218  // %2 = lshr i16 %1, 12
4219  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4220  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4221  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4222  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4223 
4224  if (Shift >= SrcBits && IsZExt)
4225  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4226 
4227  // It is not possible to fold a sign-extend into the LShr instruction. In this
4228  // case emit a sign-extend.
4229  if (!IsZExt) {
4230  Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4231  if (!Op0)
4232  return 0;
4233  Op0IsKill = true;
4234  SrcVT = RetVT;
4235  SrcBits = SrcVT.getSizeInBits();
4236  IsZExt = true;
4237  }
4238 
4239  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4240  unsigned ImmS = SrcBits - 1;
4241  static const unsigned OpcTable[2][2] = {
4242  {AArch64::SBFMWri, AArch64::SBFMXri},
4243  {AArch64::UBFMWri, AArch64::UBFMXri}
4244  };
4245  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4246  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4247  Register TmpReg = MRI.createVirtualRegister(RC);
4248  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4249  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4250  .addImm(0)
4251  .addReg(Op0, getKillRegState(Op0IsKill))
4252  .addImm(AArch64::sub_32);
4253  Op0 = TmpReg;
4254  Op0IsKill = true;
4255  }
4256  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4257 }
4258 
4259 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4260  unsigned Op1Reg, bool Op1IsKill) {
4261  unsigned Opc = 0;
4262  bool NeedTrunc = false;
4263  uint64_t Mask = 0;
4264  switch (RetVT.SimpleTy) {
4265  default: return 0;
4266  case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4267  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4268  case MVT::i32: Opc = AArch64::ASRVWr; break;
4269  case MVT::i64: Opc = AArch64::ASRVXr; break;
4270  }
4271 
4272  const TargetRegisterClass *RC =
4273  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4274  if (NeedTrunc) {
4275  Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4276  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4277  Op0IsKill = Op1IsKill = true;
4278  }
4279  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4280  Op1IsKill);
4281  if (NeedTrunc)
4282  ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4283  return ResultReg;
4284 }
4285 
4286 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4287  bool Op0IsKill, uint64_t Shift,
4288  bool IsZExt) {
4289  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4290  "Unexpected source/return type pair.");
4291  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4292  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4293  "Unexpected source value type.");
4294  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4295  RetVT == MVT::i64) && "Unexpected return value type.");
4296 
4297  bool Is64Bit = (RetVT == MVT::i64);
4298  unsigned RegSize = Is64Bit ? 64 : 32;
4299  unsigned DstBits = RetVT.getSizeInBits();
4300  unsigned SrcBits = SrcVT.getSizeInBits();
4301  const TargetRegisterClass *RC =
4302  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4303 
4304  // Just emit a copy for "zero" shifts.
4305  if (Shift == 0) {
4306  if (RetVT == SrcVT) {
4307  unsigned ResultReg = createResultReg(RC);
4308  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4309  TII.get(TargetOpcode::COPY), ResultReg)
4310  .addReg(Op0, getKillRegState(Op0IsKill));
4311  return ResultReg;
4312  } else
4313  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4314  }
4315 
4316  // Don't deal with undefined shifts.
4317  if (Shift >= DstBits)
4318  return 0;
4319 
4320  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4321  // {S|U}BFM Wd, Wn, #r, #s
4322  // Wd<s-r:0> = Wn<s:r> when r <= s
4323 
4324  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4325  // %2 = ashr i16 %1, 4
4326  // Wd<7-4:0> = Wn<7:4>
4327  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4328  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4329  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4330 
4331  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4332  // %2 = ashr i16 %1, 8
4333  // Wd<7-7,0> = Wn<7:7>
4334  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4335  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4336  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4337 
4338  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4339  // %2 = ashr i16 %1, 12
4340  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4341  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4342  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4343  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4344 
4345  if (Shift >= SrcBits && IsZExt)
4346  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4347 
4348  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4349  unsigned ImmS = SrcBits - 1;
4350  static const unsigned OpcTable[2][2] = {
4351  {AArch64::SBFMWri, AArch64::SBFMXri},
4352  {AArch64::UBFMWri, AArch64::UBFMXri}
4353  };
4354  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4355  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4356  Register TmpReg = MRI.createVirtualRegister(RC);
4357  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4358  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4359  .addImm(0)
4360  .addReg(Op0, getKillRegState(Op0IsKill))
4361  .addImm(AArch64::sub_32);
4362  Op0 = TmpReg;
4363  Op0IsKill = true;
4364  }
4365  return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4366 }
4367 
4368 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4369  bool IsZExt) {
4370  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4371 
4372  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4373  // DestVT are odd things, so test to make sure that they are both types we can
4374  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4375  // bail out to SelectionDAG.
4376  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4377  (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4378  ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4379  (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4380  return 0;
4381 
4382  unsigned Opc;
4383  unsigned Imm = 0;
4384 
4385  switch (SrcVT.SimpleTy) {
4386  default:
4387  return 0;
4388  case MVT::i1:
4389  return emiti1Ext(SrcReg, DestVT, IsZExt);
4390  case MVT::i8:
4391  if (DestVT == MVT::i64)
4392  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4393  else
4394  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4395  Imm = 7;
4396  break;
4397  case MVT::i16:
4398  if (DestVT == MVT::i64)
4399  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4400  else
4401  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4402  Imm = 15;
4403  break;
4404  case MVT::i32:
4405  assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4406  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4407  Imm = 31;
4408  break;
4409  }
4410 
4411  // Handle i8 and i16 as i32.
4412  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4413  DestVT = MVT::i32;
4414  else if (DestVT == MVT::i64) {
4415  Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4416  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4417  TII.get(AArch64::SUBREG_TO_REG), Src64)
4418  .addImm(0)
4419  .addReg(SrcReg)
4420  .addImm(AArch64::sub_32);
4421  SrcReg = Src64;
4422  }
4423 
4424  const TargetRegisterClass *RC =
4425  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4426  return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4427 }
4428 
4429 static bool isZExtLoad(const MachineInstr *LI) {
4430  switch (LI->getOpcode()) {
4431  default:
4432  return false;
4433  case AArch64::LDURBBi:
4434  case AArch64::LDURHHi:
4435  case AArch64::LDURWi:
4436  case AArch64::LDRBBui:
4437  case AArch64::LDRHHui:
4438  case AArch64::LDRWui:
4439  case AArch64::LDRBBroX:
4440  case AArch64::LDRHHroX:
4441  case AArch64::LDRWroX:
4442  case AArch64::LDRBBroW:
4443  case AArch64::LDRHHroW:
4444  case AArch64::LDRWroW:
4445  return true;
4446  }
4447 }
4448 
4449 static bool isSExtLoad(const MachineInstr *LI) {
4450  switch (LI->getOpcode()) {
4451  default:
4452  return false;
4453  case AArch64::LDURSBWi:
4454  case AArch64::LDURSHWi:
4455  case AArch64::LDURSBXi:
4456  case AArch64::LDURSHXi:
4457  case AArch64::LDURSWi:
4458  case AArch64::LDRSBWui:
4459  case AArch64::LDRSHWui:
4460  case AArch64::LDRSBXui:
4461  case AArch64::LDRSHXui:
4462  case AArch64::LDRSWui:
4463  case AArch64::LDRSBWroX:
4464  case AArch64::LDRSHWroX:
4465  case AArch64::LDRSBXroX:
4466  case AArch64::LDRSHXroX:
4467  case AArch64::LDRSWroX:
4468  case AArch64::LDRSBWroW:
4469  case AArch64::LDRSHWroW:
4470  case AArch64::LDRSBXroW:
4471  case AArch64::LDRSHXroW:
4472  case AArch64::LDRSWroW:
4473  return true;
4474  }
4475 }
4476 
4477 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4478  MVT SrcVT) {
4479  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4480  if (!LI || !LI->hasOneUse())
4481  return false;
4482 
4483  // Check if the load instruction has already been selected.
4484  unsigned Reg = lookUpRegForValue(LI);
4485  if (!Reg)
4486  return false;
4487 
4488  MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4489  if (!MI)
4490  return false;
4491 
4492  // Check if the correct load instruction has been emitted - SelectionDAG might
4493  // have emitted a zero-extending load, but we need a sign-extending load.
4494  bool IsZExt = isa<ZExtInst>(I);
4495  const auto *LoadMI = MI;
4496  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4497  LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4498  Register LoadReg = MI->getOperand(1).getReg();
4499  LoadMI = MRI.getUniqueVRegDef(LoadReg);
4500  assert(LoadMI && "Expected valid instruction");
4501  }
4502  if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4503  return false;
4504 
4505  // Nothing to be done.
4506  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4507  updateValueMap(I, Reg);
4508  return true;
4509  }
4510 
4511  if (IsZExt) {
4512  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4513  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4514  TII.get(AArch64::SUBREG_TO_REG), Reg64)
4515  .addImm(0)
4516  .addReg(Reg, getKillRegState(true))
4517  .addImm(AArch64::sub_32);
4518  Reg = Reg64;
4519  } else {
4520  assert((MI->getOpcode() == TargetOpcode::COPY &&
4521  MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4522  "Expected copy instruction");
4523  Reg = MI->getOperand(1).getReg();
4525  removeDeadCode(I, std::next(I));
4526  }
4527  updateValueMap(I, Reg);
4528  return true;
4529 }
4530 
4531 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4532  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4533  "Unexpected integer extend instruction.");
4534  MVT RetVT;
4535  MVT SrcVT;
4536  if (!isTypeSupported(I->getType(), RetVT))
4537  return false;
4538 
4539  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4540  return false;
4541 
4542  // Try to optimize already sign-/zero-extended values from load instructions.
4543  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4544  return true;
4545 
4546  unsigned SrcReg = getRegForValue(I->getOperand(0));
4547  if (!SrcReg)
4548  return false;
4549  bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4550 
4551  // Try to optimize already sign-/zero-extended values from function arguments.
4552  bool IsZExt = isa<ZExtInst>(I);
4553  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4554  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4555  if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4556  unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4557  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4558  TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4559  .addImm(0)
4560  .addReg(SrcReg, getKillRegState(SrcIsKill))
4561  .addImm(AArch64::sub_32);
4562  SrcReg = ResultReg;
4563  }
4564  // Conservatively clear all kill flags from all uses, because we are
4565  // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4566  // level. The result of the instruction at IR level might have been
4567  // trivially dead, which is now not longer true.
4568  unsigned UseReg = lookUpRegForValue(I);
4569  if (UseReg)
4570  MRI.clearKillFlags(UseReg);
4571 
4572  updateValueMap(I, SrcReg);
4573  return true;
4574  }
4575  }
4576 
4577  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4578  if (!ResultReg)
4579  return false;
4580 
4581  updateValueMap(I, ResultReg);
4582  return true;
4583 }
4584 
4585 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4586  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4587  if (!DestEVT.isSimple())
4588  return false;
4589 
4590  MVT DestVT = DestEVT.getSimpleVT();
4591  if (DestVT != MVT::i64 && DestVT != MVT::i32)
4592  return false;
4593 
4594  unsigned DivOpc;
4595  bool Is64bit = (DestVT == MVT::i64);
4596  switch (ISDOpcode) {
4597  default:
4598  return false;
4599  case ISD::SREM:
4600  DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4601  break;
4602  case ISD::UREM:
4603  DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4604  break;
4605  }
4606  unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4607  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4608  if (!Src0Reg)
4609  return false;
4610  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4611 
4612  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4613  if (!Src1Reg)
4614  return false;
4615  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4616 
4617  const TargetRegisterClass *RC =
4618  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4619  unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4620  Src1Reg, /*IsKill=*/false);
4621  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4622  // The remainder is computed as numerator - (quotient * denominator) using the
4623  // MSUB instruction.
4624  unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4625  Src1Reg, Src1IsKill, Src0Reg,
4626  Src0IsKill);
4627  updateValueMap(I, ResultReg);
4628  return true;
4629 }
4630 
4631 bool AArch64FastISel::selectMul(const Instruction *I) {
4632  MVT VT;
4633  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4634  return false;
4635 
4636  if (VT.isVector())
4637  return selectBinaryOp(I, ISD::MUL);
4638 
4639  const Value *Src0 = I->getOperand(0);
4640  const Value *Src1 = I->getOperand(1);
4641  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4642  if (C->getValue().isPowerOf2())
4643  std::swap(Src0, Src1);
4644 
4645  // Try to simplify to a shift instruction.
4646  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4647  if (C->getValue().isPowerOf2()) {
4648  uint64_t ShiftVal = C->getValue().logBase2();
4649  MVT SrcVT = VT;
4650  bool IsZExt = true;
4651  if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4652  if (!isIntExtFree(ZExt)) {
4653  MVT VT;
4654  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4655  SrcVT = VT;
4656  IsZExt = true;
4657  Src0 = ZExt->getOperand(0);
4658  }
4659  }
4660  } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4661  if (!isIntExtFree(SExt)) {
4662  MVT VT;
4663  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4664  SrcVT = VT;
4665  IsZExt = false;
4666  Src0 = SExt->getOperand(0);
4667  }
4668  }
4669  }
4670 
4671  unsigned Src0Reg = getRegForValue(Src0);
4672  if (!Src0Reg)
4673  return false;
4674  bool Src0IsKill = hasTrivialKill(Src0);
4675 
4676  unsigned ResultReg =
4677  emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4678 
4679  if (ResultReg) {
4680  updateValueMap(I, ResultReg);
4681  return true;
4682  }
4683  }
4684 
4685  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4686  if (!Src0Reg)
4687  return false;
4688  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4689 
4690  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4691  if (!Src1Reg)
4692  return false;
4693  bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4694 
4695  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4696 
4697  if (!ResultReg)
4698  return false;
4699 
4700  updateValueMap(I, ResultReg);
4701  return true;
4702 }
4703 
4704 bool AArch64FastISel::selectShift(const Instruction *I) {
4705  MVT RetVT;
4706  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4707  return false;
4708 
4709  if (RetVT.isVector())
4710  return selectOperator(I, I->getOpcode());
4711 
4712  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4713  unsigned ResultReg = 0;
4714  uint64_t ShiftVal = C->getZExtValue();
4715  MVT SrcVT = RetVT;
4716  bool IsZExt = I->getOpcode() != Instruction::AShr;
4717  const Value *Op0 = I->getOperand(0);
4718  if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4719  if (!isIntExtFree(ZExt)) {
4720  MVT TmpVT;
4721  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4722  SrcVT = TmpVT;
4723  IsZExt = true;
4724  Op0 = ZExt->getOperand(0);
4725  }
4726  }
4727  } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4728  if (!isIntExtFree(SExt)) {
4729  MVT TmpVT;
4730  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4731  SrcVT = TmpVT;
4732  IsZExt = false;
4733  Op0 = SExt->getOperand(0);
4734  }
4735  }
4736  }
4737 
4738  unsigned Op0Reg = getRegForValue(Op0);
4739  if (!Op0Reg)
4740  return false;
4741  bool Op0IsKill = hasTrivialKill(Op0);
4742 
4743  switch (I->getOpcode()) {
4744  default: llvm_unreachable("Unexpected instruction.");
4745  case Instruction::Shl:
4746  ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4747  break;
4748  case Instruction::AShr:
4749  ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4750  break;
4751  case Instruction::LShr:
4752  ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4753  break;
4754  }
4755  if (!ResultReg)
4756  return false;
4757 
4758  updateValueMap(I, ResultReg);
4759  return true;
4760  }
4761 
4762  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4763  if (!Op0Reg)
4764  return false;
4765  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4766 
4767  unsigned Op1Reg = getRegForValue(I->getOperand(1));
4768  if (!Op1Reg)
4769  return false;
4770  bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4771 
4772  unsigned ResultReg = 0;
4773  switch (I->getOpcode()) {
4774  default: llvm_unreachable("Unexpected instruction.");
4775  case Instruction::Shl:
4776  ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4777  break;
4778  case Instruction::AShr:
4779  ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4780  break;
4781  case Instruction::LShr:
4782  ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4783  break;
4784  }
4785 
4786  if (!ResultReg)
4787  return false;
4788 
4789  updateValueMap(I, ResultReg);
4790  return true;
4791 }
4792 
4793 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4794  MVT RetVT, SrcVT;
4795 
4796  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4797  return false;
4798  if (!isTypeLegal(I->getType(), RetVT))
4799  return false;
4800 
4801  unsigned Opc;
4802  if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4803  Opc = AArch64::FMOVWSr;
4804  else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4805  Opc = AArch64::FMOVXDr;
4806  else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4807  Opc = AArch64::FMOVSWr;
4808  else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4809  Opc = AArch64::FMOVDXr;
4810  else
4811  return false;
4812 
4813  const TargetRegisterClass *RC = nullptr;
4814  switch (RetVT.SimpleTy) {
4815  default: llvm_unreachable("Unexpected value type.");
4816  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4817  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4818  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4819  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4820  }
4821  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4822  if (!Op0Reg)
4823  return false;
4824  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4825  unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4826 
4827  if (!ResultReg)
4828  return false;
4829 
4830  updateValueMap(I, ResultReg);
4831  return true;
4832 }
4833 
4834 bool AArch64FastISel::selectFRem(const Instruction *I) {
4835  MVT RetVT;
4836  if (!isTypeLegal(I->getType(), RetVT))
4837  return false;
4838 
4839  RTLIB::Libcall LC;
4840  switch (RetVT.SimpleTy) {
4841  default:
4842  return false;
4843  case MVT::f32:
4844  LC = RTLIB::REM_F32;
4845  break;
4846  case MVT::f64:
4847  LC = RTLIB::REM_F64;
4848  break;
4849  }
4850 
4851  ArgListTy Args;
4852  Args.reserve(I->getNumOperands());
4853 
4854  // Populate the argument list.
4855  for (auto &Arg : I->operands()) {
4856  ArgListEntry Entry;
4857  Entry.Val = Arg;
4858  Entry.Ty = Arg->getType();
4859  Args.push_back(Entry);
4860  }
4861 
4862  CallLoweringInfo CLI;
4863  MCContext &Ctx = MF->getContext();
4864  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4865  TLI.getLibcallName(LC), std::move(Args));
4866  if (!lowerCallTo(CLI))
4867  return false;
4868  updateValueMap(I, CLI.ResultReg);
4869  return true;
4870 }
4871 
4872 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4873  MVT VT;
4874  if (!isTypeLegal(I->getType(), VT))
4875  return false;
4876 
4877  if (!isa<ConstantInt>(I->getOperand(1)))
4878  return selectBinaryOp(I, ISD::SDIV);
4879 
4880  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4881  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4882  !(C.isPowerOf2() || (-C).isPowerOf2()))
4883  return selectBinaryOp(I, ISD::SDIV);
4884 
4885  unsigned Lg2 = C.countTrailingZeros();
4886  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4887  if (!Src0Reg)
4888  return false;
4889  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4890 
4891  if (cast<BinaryOperator>(I)->isExact()) {
4892  unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4893  if (!ResultReg)
4894  return false;
4895  updateValueMap(I, ResultReg);
4896  return true;
4897  }
4898 
4899  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4900  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4901  if (!AddReg)
4902  return false;
4903 
4904  // (Src0 < 0) ? Pow2 - 1 : 0;
4905  if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4906  return false;
4907 
4908  unsigned SelectOpc;
4909  const TargetRegisterClass *RC;
4910  if (VT == MVT::i64) {
4911  SelectOpc = AArch64::CSELXr;
4912  RC = &AArch64::GPR64RegClass;
4913  } else {
4914  SelectOpc = AArch64::CSELWr;
4915  RC = &AArch64::GPR32RegClass;
4916  }
4917  unsigned SelectReg =
4918  fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4919  Src0IsKill, AArch64CC::LT);
4920  if (!SelectReg)
4921  return false;
4922 
4923  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4924  // negate the result.
4925  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4926  unsigned ResultReg;
4927  if (C.isNegative())
4928  ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4929  SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4930  else
4931  ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4932 
4933  if (!ResultReg)
4934  return false;
4935 
4936  updateValueMap(I, ResultReg);
4937  return true;
4938 }
4939 
4940 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4941 /// have to duplicate it for AArch64, because otherwise we would fail during the
4942 /// sign-extend emission.
4943 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4944  unsigned IdxN = getRegForValue(Idx);
4945  if (IdxN == 0)
4946  // Unhandled operand. Halt "fast" selection and bail.
4947  return std::pair<unsigned, bool>(0, false);
4948 
4949  bool IdxNIsKill = hasTrivialKill(Idx);
4950 
4951  // If the index is smaller or larger than intptr_t, truncate or extend it.
4952  MVT PtrVT = TLI.getPointerTy(DL);
4953  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4954  if (IdxVT.bitsLT(PtrVT)) {
4955  IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4956  IdxNIsKill = true;
4957  } else if (IdxVT.bitsGT(PtrVT))
4958  llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4959  return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4960 }
4961 
4962 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4963 /// duplicate it for AArch64, because otherwise we would bail out even for
4964 /// simple cases. This is because the standard fastEmit functions don't cover
4965 /// MUL at all and ADD is lowered very inefficientily.
4966 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4967  unsigned N = getRegForValue(I->getOperand(0));
4968  if (!N)
4969  return false;
4970  bool NIsKill = hasTrivialKill(I->getOperand(0));
4971 
4972  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4973  // into a single N = N + TotalOffset.
4974  uint64_t TotalOffs = 0;
4975  MVT VT = TLI.getPointerTy(DL);
4976  for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4977  GTI != E; ++GTI) {
4978  const Value *Idx = GTI.getOperand();
4979  if (auto *StTy = GTI.getStructTypeOrNull()) {
4980  unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4981  // N = N + Offset
4982  if (Field)
4983  TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4984  } else {
4985  Type *Ty = GTI.getIndexedType();
4986 
4987  // If this is a constant subscript, handle it quickly.
4988  if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4989  if (CI->isZero())
4990  continue;
4991  // N = N + Offset
4992  TotalOffs +=
4993  DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4994  continue;
4995  }
4996  if (TotalOffs) {
4997  N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4998  if (!N)
4999  return false;
5000  NIsKill = true;
5001  TotalOffs = 0;
5002  }
5003 
5004  // N = N + Idx * ElementSize;
5005  uint64_t ElementSize = DL.getTypeAllocSize(Ty);
5006  std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
5007  unsigned IdxN = Pair.first;
5008  bool IdxNIsKill = Pair.second;
5009  if (!IdxN)
5010  return false;
5011 
5012  if (ElementSize != 1) {
5013  unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5014  if (!C)
5015  return false;
5016  IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
5017  if (!IdxN)
5018  return false;
5019  IdxNIsKill = true;
5020  }
5021  N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
5022  if (!N)
5023  return false;
5024  }
5025  }
5026  if (TotalOffs) {
5027  N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
5028  if (!N)
5029  return false;
5030  }
5031  updateValueMap(I, N);
5032  return true;
5033 }
5034 
5035 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5036  assert(TM.getOptLevel() == CodeGenOpt::None &&
5037  "cmpxchg survived AtomicExpand at optlevel > -O0");
5038 
5039  auto *RetPairTy = cast<StructType>(I->getType());
5040  Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5041  assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5042  "cmpxchg has a non-i1 status result");
5043 
5044  MVT VT;
5045  if (!isTypeLegal(RetTy, VT))
5046  return false;
5047 
5048  const TargetRegisterClass *ResRC;
5049  unsigned Opc, CmpOpc;
5050  // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5051  // extractvalue selection doesn't support that.
5052  if (VT == MVT::i32) {
5053  Opc = AArch64::CMP_SWAP_32;
5054  CmpOpc = AArch64::SUBSWrs;
5055  ResRC = &AArch64::GPR32RegClass;
5056  } else if (VT == MVT::i64) {
5057  Opc = AArch64::CMP_SWAP_64;
5058  CmpOpc = AArch64::SUBSXrs;
5059  ResRC = &AArch64::GPR64RegClass;
5060  } else {
5061  return false;
5062  }
5063 
5064  const MCInstrDesc &II = TII.get(Opc);
5065 
5066  const unsigned AddrReg = constrainOperandRegClass(
5067  II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5068  const unsigned DesiredReg = constrainOperandRegClass(
5069  II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5070  const unsigned NewReg = constrainOperandRegClass(
5071  II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5072 
5073  const unsigned ResultReg1 = createResultReg(ResRC);
5074  const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5075  const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5076 
5077  // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5078  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5079  .addDef(ResultReg1)
5080  .addDef(ScratchReg)
5081  .addUse(AddrReg)
5082  .addUse(DesiredReg)
5083  .addUse(NewReg);
5084 
5085  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5086  .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5087  .addUse(ResultReg1)
5088  .addUse(DesiredReg)
5089  .addImm(0);
5090 
5091  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5092  .addDef(ResultReg2)
5093  .addUse(AArch64::WZR)
5094  .addUse(AArch64::WZR)
5096 
5097  assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5098  updateValueMap(I, ResultReg1, 2);
5099  return true;
5100 }
5101 
5102 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5103  switch (I->getOpcode()) {
5104  default:
5105  break;
5106  case Instruction::Add:
5107  case Instruction::Sub:
5108  return selectAddSub(I);
5109  case Instruction::Mul:
5110  return selectMul(I);
5111  case Instruction::SDiv:
5112  return selectSDiv(I);
5113  case Instruction::SRem:
5114  if (!selectBinaryOp(I, ISD::SREM))
5115  return selectRem(I, ISD::SREM);
5116  return true;
5117  case Instruction::URem:
5118  if (!selectBinaryOp(I, ISD::UREM))
5119  return selectRem(I, ISD::UREM);
5120  return true;
5121  case Instruction::Shl:
5122  case Instruction::LShr:
5123  case Instruction::AShr:
5124  return selectShift(I);
5125  case Instruction::And:
5126  case Instruction::Or:
5127  case Instruction::Xor:
5128  return selectLogicalOp(I);
5129  case Instruction::Br:
5130  return selectBranch(I);
5131  case Instruction::IndirectBr:
5132  return selectIndirectBr(I);
5133  case Instruction::BitCast:
5134  if (!FastISel::selectBitCast(I))
5135  return selectBitCast(I);
5136  return true;
5137  case Instruction::FPToSI:
5138  if (!selectCast(I, ISD::FP_TO_SINT))
5139  return selectFPToInt(I, /*Signed=*/true);
5140  return true;
5141  case Instruction::FPToUI:
5142  return selectFPToInt(I, /*Signed=*/false);
5143  case Instruction::ZExt:
5144  case Instruction::SExt:
5145  return selectIntExt(I);
5146  case Instruction::Trunc:
5147  if (!selectCast(I, ISD::TRUNCATE))
5148  return selectTrunc(I);
5149  return true;
5150  case Instruction::FPExt:
5151  return selectFPExt(I);
5152  case Instruction::FPTrunc:
5153  return selectFPTrunc(I);
5154  case Instruction::SIToFP:
5155  if (!selectCast(I, ISD::SINT_TO_FP))
5156  return selectIntToFP(I, /*Signed=*/true);
5157  return true;
5158  case Instruction::UIToFP:
5159  return selectIntToFP(I, /*Signed=*/false);
5160  case Instruction::Load:
5161  return selectLoad(I);
5162  case Instruction::Store:
5163  return selectStore(I);
5164  case Instruction::FCmp:
5165  case Instruction::ICmp:
5166  return selectCmp(I);
5167  case Instruction::Select:
5168  return selectSelect(I);
5169  case Instruction::Ret:
5170  return selectRet(I);
5171  case Instruction::FRem:
5172  return selectFRem(I);
5173  case Instruction::GetElementPtr:
5174  return selectGetElementPtr(I);
5175  case Instruction::AtomicCmpXchg:
5176  return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5177  }
5178 
5179  // fall-back to target-independent instruction selection.
5180  return selectOperator(I, I->getOpcode());
5181 }
5182 
5183 namespace llvm {
5184 
5186  const TargetLibraryInfo *LibInfo) {
5187  return new AArch64FastISel(FuncInfo, LibInfo);
5188 }
5189 
5190 } // end namespace llvm
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
void setFrameAddressIsTaken(bool T)
uint64_t CallInst * C
Return a value (possibly void), from a function.
static MVT getIntegerVT(unsigned BitWidth)
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:722
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1569
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
LLVMContext & Context
Atomic ordering constants.
unsigned constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, const MachineOperand &RegMO, unsigned OpIdx)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:40
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Register getLocReg() const
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
bool hasCustomCallingConv() const
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:264
An instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:530
bool isVector() const
Return true if this is a vector value type.
static CondCode getInvertedCondCode(CondCode Code)
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:178
unsigned Reg
unsigned getSubReg() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
unsigned less or equal
Definition: InstrTypes.h:758
unsigned less than
Definition: InstrTypes.h:757
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:738
unsigned getValNo() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:745
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:748
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
unsigned const TargetRegisterInfo * TRI
F(f)
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:167
Value * getCondition() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
void reserve(size_type N)
Definition: SmallVector.h:369
Value * getLength() const
bool isAnyArgRegReserved(const MachineFunction &MF) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1241
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:743
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1638
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:563
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:742
A description of a memory reference used in the backend.
This class represents the LLVM &#39;select&#39; instruction.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:831
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:96
Class to represent struct types.
Definition: DerivedTypes.h:233
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool isUnsigned() const
Definition: InstrTypes.h:908
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:739
This file contains the simple types necessary to represent the attributes associated with functions a...
SimpleValueType SimpleTy
bool RetCC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
LocInfo getLocInfo() const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
This file implements a class to represent arbitrary precision integral constant values and operations...
bool is64BitVector() const
Return true if this is a 64-bit vector type.
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
unsigned getSizeInBits() const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
Context object for machine code objects.
Definition: MCContext.h:65
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:200
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:84
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:66
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:888
iterator_range< succ_op_iterator > successors()
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:408
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:502
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
const AArch64RegisterInfo * getRegisterInfo() const override
static bool isZExtLoad(const MachineInstr *LI)
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:308
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
iterator_range< User::op_iterator > arg_operands()
Definition: InstrTypes.h:1233
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
SI Pre allocate WWM Registers
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getKillRegState(bool B)
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:548
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:131
bool is128BitVector() const
Return true if this is a 128-bit vector type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:228
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:146
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:614
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV)...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:363
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:168
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:657
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
static Register UseReg(const MachineOperand &MO)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Indirect Branch Instruction.
static bool isIntExtFree(const Instruction *I)
Check if the sign-/zero-extend will be a noop.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:433
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool useSmallAddressing() const
static bool isSExtLoad(const MachineInstr *LI)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
op_range operands()
Definition: User.h:237
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:741
Register getFrameRegister(const MachineFunction &MF) const override
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const Value * getCondition() const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:749
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
static unsigned getImplicitScaleFactor(MVT VT)
Determine the implicit scale factor that is applied by a memory operation for a given value type...
Extended Value Type.
Definition: ValueTypes.h:33
size_t size() const
Definition: SmallVector.h:52
bool isVolatile() const
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:747
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
signed greater than
Definition: InstrTypes.h:759
The memory access writes data.
const APFloat & getValueAPF() const
Definition: Constants.h:302
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
bool isReleaseOrStronger(AtomicOrdering ao)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:736
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static AArch64_AM::ShiftExtendType getExtendType(unsigned Imm)
getExtendType - Extract the extend type for operands of arithmetic ops.
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:746