LLVM  17.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/CodeGen/FastISel.h"
40 #include "llvm/IR/Argument.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/CallingConv.h"
44 #include "llvm/IR/Constant.h"
45 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/DataLayout.h"
47 #include "llvm/IR/DerivedTypes.h"
48 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/Intrinsics.h"
56 #include "llvm/IR/Operator.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/User.h"
59 #include "llvm/IR/Value.h"
60 #include "llvm/MC/MCInstrDesc.h"
61 #include "llvm/MC/MCRegisterInfo.h"
62 #include "llvm/MC/MCSymbol.h"
64 #include "llvm/Support/Casting.h"
65 #include "llvm/Support/CodeGen.h"
66 #include "llvm/Support/Compiler.h"
70 #include <algorithm>
71 #include <cassert>
72 #include <cstdint>
73 #include <iterator>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 namespace {
79 
80 class AArch64FastISel final : public FastISel {
81  class Address {
82  public:
83  using BaseKind = enum {
84  RegBase,
85  FrameIndexBase
86  };
87 
88  private:
89  BaseKind Kind = RegBase;
91  union {
92  unsigned Reg;
93  int FI;
94  } Base;
95  unsigned OffsetReg = 0;
96  unsigned Shift = 0;
97  int64_t Offset = 0;
98  const GlobalValue *GV = nullptr;
99 
100  public:
101  Address() { Base.Reg = 0; }
102 
103  void setKind(BaseKind K) { Kind = K; }
104  BaseKind getKind() const { return Kind; }
105  void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
106  AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
107  bool isRegBase() const { return Kind == RegBase; }
108  bool isFIBase() const { return Kind == FrameIndexBase; }
109 
110  void setReg(unsigned Reg) {
111  assert(isRegBase() && "Invalid base register access!");
112  Base.Reg = Reg;
113  }
114 
115  unsigned getReg() const {
116  assert(isRegBase() && "Invalid base register access!");
117  return Base.Reg;
118  }
119 
120  void setOffsetReg(unsigned Reg) {
121  OffsetReg = Reg;
122  }
123 
124  unsigned getOffsetReg() const {
125  return OffsetReg;
126  }
127 
128  void setFI(unsigned FI) {
129  assert(isFIBase() && "Invalid base frame index access!");
130  Base.FI = FI;
131  }
132 
133  unsigned getFI() const {
134  assert(isFIBase() && "Invalid base frame index access!");
135  return Base.FI;
136  }
137 
138  void setOffset(int64_t O) { Offset = O; }
139  int64_t getOffset() { return Offset; }
140  void setShift(unsigned S) { Shift = S; }
141  unsigned getShift() { return Shift; }
142 
143  void setGlobalValue(const GlobalValue *G) { GV = G; }
144  const GlobalValue *getGlobalValue() { return GV; }
145  };
146 
147  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
148  /// make the right decision when generating code for different targets.
149  const AArch64Subtarget *Subtarget;
151 
152  bool fastLowerArguments() override;
153  bool fastLowerCall(CallLoweringInfo &CLI) override;
154  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
155 
156 private:
157  // Selection routines.
158  bool selectAddSub(const Instruction *I);
159  bool selectLogicalOp(const Instruction *I);
160  bool selectLoad(const Instruction *I);
161  bool selectStore(const Instruction *I);
162  bool selectBranch(const Instruction *I);
163  bool selectIndirectBr(const Instruction *I);
164  bool selectCmp(const Instruction *I);
165  bool selectSelect(const Instruction *I);
166  bool selectFPExt(const Instruction *I);
167  bool selectFPTrunc(const Instruction *I);
168  bool selectFPToInt(const Instruction *I, bool Signed);
169  bool selectIntToFP(const Instruction *I, bool Signed);
170  bool selectRem(const Instruction *I, unsigned ISDOpcode);
171  bool selectRet(const Instruction *I);
172  bool selectTrunc(const Instruction *I);
173  bool selectIntExt(const Instruction *I);
174  bool selectMul(const Instruction *I);
175  bool selectShift(const Instruction *I);
176  bool selectBitCast(const Instruction *I);
177  bool selectFRem(const Instruction *I);
178  bool selectSDiv(const Instruction *I);
179  bool selectGetElementPtr(const Instruction *I);
180  bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
181 
182  // Utility helper routines.
183  bool isTypeLegal(Type *Ty, MVT &VT);
184  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
185  bool isValueAvailable(const Value *V) const;
186  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
187  bool computeCallAddress(const Value *V, Address &Addr);
188  bool simplifyAddress(Address &Addr, MVT VT);
189  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
191  unsigned ScaleFactor, MachineMemOperand *MMO);
192  bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
193  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
194  MaybeAlign Alignment);
195  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
196  const Value *Cond);
197  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
198  bool optimizeSelect(const SelectInst *SI);
199  unsigned getRegForGEPIndex(const Value *Idx);
200 
201  // Emit helper routines.
202  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
203  const Value *RHS, bool SetFlags = false,
204  bool WantResult = true, bool IsZExt = false);
205  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
206  unsigned RHSReg, bool SetFlags = false,
207  bool WantResult = true);
208  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
209  uint64_t Imm, bool SetFlags = false,
210  bool WantResult = true);
211  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
212  unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
213  uint64_t ShiftImm, bool SetFlags = false,
214  bool WantResult = true);
215  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
216  unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
217  uint64_t ShiftImm, bool SetFlags = false,
218  bool WantResult = true);
219 
220  // Emit functions.
221  bool emitCompareAndBranch(const BranchInst *BI);
222  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
223  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
224  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
225  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
226  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
227  MachineMemOperand *MMO = nullptr);
228  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
229  MachineMemOperand *MMO = nullptr);
230  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
231  MachineMemOperand *MMO = nullptr);
232  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
233  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
234  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
235  bool SetFlags = false, bool WantResult = true,
236  bool IsZExt = false);
237  unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
238  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
239  bool SetFlags = false, bool WantResult = true,
240  bool IsZExt = false);
241  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
242  bool WantResult = true);
243  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
244  AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
245  bool WantResult = true);
246  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
247  const Value *RHS);
248  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
249  uint64_t Imm);
250  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251  unsigned RHSReg, uint64_t ShiftImm);
252  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
253  unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
254  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
257  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
258  bool IsZExt = true);
259  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
260  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
261  bool IsZExt = true);
262  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
263  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
264  bool IsZExt = false);
265 
266  unsigned materializeInt(const ConstantInt *CI, MVT VT);
267  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
268  unsigned materializeGV(const GlobalValue *GV);
269 
270  // Call handling routines.
271 private:
272  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
273  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
274  unsigned &NumBytes);
275  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
276 
277 public:
278  // Backend specific FastISel code.
279  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
280  unsigned fastMaterializeConstant(const Constant *C) override;
281  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
282 
283  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
284  const TargetLibraryInfo *LibInfo)
285  : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
286  Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
287  Context = &FuncInfo.Fn->getContext();
288  }
289 
290  bool fastSelectInstruction(const Instruction *I) override;
291 
292 #include "AArch64GenFastISel.inc"
293 };
294 
295 } // end anonymous namespace
296 
297 /// Check if the sign-/zero-extend will be a noop.
298 static bool isIntExtFree(const Instruction *I) {
299  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
300  "Unexpected integer extend instruction.");
301  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
302  "Unexpected value type.");
303  bool IsZExt = isa<ZExtInst>(I);
304 
305  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
306  if (LI->hasOneUse())
307  return true;
308 
309  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
310  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
311  return true;
312 
313  return false;
314 }
315 
316 /// Determine the implicit scale factor that is applied by a memory
317 /// operation for a given value type.
318 static unsigned getImplicitScaleFactor(MVT VT) {
319  switch (VT.SimpleTy) {
320  default:
321  return 0; // invalid
322  case MVT::i1: // fall-through
323  case MVT::i8:
324  return 1;
325  case MVT::i16:
326  return 2;
327  case MVT::i32: // fall-through
328  case MVT::f32:
329  return 4;
330  case MVT::i64: // fall-through
331  case MVT::f64:
332  return 8;
333  }
334 }
335 
336 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
337  if (CC == CallingConv::WebKit_JS)
338  return CC_AArch64_WebKit_JS;
339  if (CC == CallingConv::GHC)
340  return CC_AArch64_GHC;
343  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
344 }
345 
346 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
347  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
348  "Alloca should always return a pointer.");
349 
350  // Don't handle dynamic allocas.
351  if (!FuncInfo.StaticAllocaMap.count(AI))
352  return 0;
353 
355  FuncInfo.StaticAllocaMap.find(AI);
356 
357  if (SI != FuncInfo.StaticAllocaMap.end()) {
358  Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
359  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
360  ResultReg)
361  .addFrameIndex(SI->second)
362  .addImm(0)
363  .addImm(0);
364  return ResultReg;
365  }
366 
367  return 0;
368 }
369 
370 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
371  if (VT > MVT::i64)
372  return 0;
373 
374  if (!CI->isZero())
375  return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
376 
377  // Create a copy from the zero register to materialize a "0" value.
378  const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
379  : &AArch64::GPR32RegClass;
380  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
381  Register ResultReg = createResultReg(RC);
382  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
383  ResultReg).addReg(ZeroReg, getKillRegState(true));
384  return ResultReg;
385 }
386 
387 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
388  // Positive zero (+0.0) has to be materialized with a fmov from the zero
389  // register, because the immediate version of fmov cannot encode zero.
390  if (CFP->isNullValue())
391  return fastMaterializeFloatZero(CFP);
392 
393  if (VT != MVT::f32 && VT != MVT::f64)
394  return 0;
395 
396  const APFloat Val = CFP->getValueAPF();
397  bool Is64Bit = (VT == MVT::f64);
398  // This checks to see if we can use FMOV instructions to materialize
399  // a constant, otherwise we have to materialize via the constant pool.
400  int Imm =
401  Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
402  if (Imm != -1) {
403  unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
404  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
405  }
406 
407  // For the large code model materialize the FP constant in code.
408  if (TM.getCodeModel() == CodeModel::Large) {
409  unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
410  const TargetRegisterClass *RC = Is64Bit ?
411  &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
412 
413  Register TmpReg = createResultReg(RC);
414  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
416 
417  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
418  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
419  TII.get(TargetOpcode::COPY), ResultReg)
420  .addReg(TmpReg, getKillRegState(true));
421 
422  return ResultReg;
423  }
424 
425  // Materialize via constant pool. MachineConstantPool wants an explicit
426  // alignment.
427  Align Alignment = DL.getPrefTypeAlign(CFP->getType());
428 
429  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
430  Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
431  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
432  ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
433 
434  unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
435  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
436  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
437  .addReg(ADRPReg)
439  return ResultReg;
440 }
441 
442 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
443  // We can't handle thread-local variables quickly yet.
444  if (GV->isThreadLocal())
445  return 0;
446 
447  // MachO still uses GOT for large code-model accesses, but ELF requires
448  // movz/movk sequences, which FastISel doesn't handle yet.
449  if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
450  return 0;
451 
452  unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
453 
454  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
455  if (!DestEVT.isSimple())
456  return 0;
457 
458  Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
459  unsigned ResultReg;
460 
461  if (OpFlags & AArch64II::MO_GOT) {
462  // ADRP + LDRX
463  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
464  ADRPReg)
465  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
466 
467  unsigned LdrOpc;
468  if (Subtarget->isTargetILP32()) {
469  ResultReg = createResultReg(&AArch64::GPR32RegClass);
470  LdrOpc = AArch64::LDRWui;
471  } else {
472  ResultReg = createResultReg(&AArch64::GPR64RegClass);
473  LdrOpc = AArch64::LDRXui;
474  }
475  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
476  ResultReg)
477  .addReg(ADRPReg)
479  AArch64II::MO_NC | OpFlags);
480  if (!Subtarget->isTargetILP32())
481  return ResultReg;
482 
483  // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
484  // so we must extend the result on ILP32.
485  Register Result64 = createResultReg(&AArch64::GPR64RegClass);
486  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
487  TII.get(TargetOpcode::SUBREG_TO_REG))
488  .addDef(Result64)
489  .addImm(0)
490  .addReg(ResultReg, RegState::Kill)
491  .addImm(AArch64::sub_32);
492  return Result64;
493  } else {
494  // ADRP + ADDX
495  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
496  ADRPReg)
497  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
498 
499  if (OpFlags & AArch64II::MO_TAGGED) {
500  // MO_TAGGED on the page indicates a tagged address. Set the tag now.
501  // We do so by creating a MOVK that sets bits 48-63 of the register to
502  // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
503  // the small code model so we can assume a binary size of <= 4GB, which
504  // makes the untagged PC relative offset positive. The binary must also be
505  // loaded into address range [0, 2^48). Both of these properties need to
506  // be ensured at runtime when using tagged addresses.
507  //
508  // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
509  // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
510  // are not exactly 1:1 with FastISel so we cannot easily abstract this
511  // out. At some point, it would be nice to find a way to not have this
512  // duplciate code.
513  unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);
514  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
515  DstReg)
516  .addReg(ADRPReg)
517  .addGlobalAddress(GV, /*Offset=*/0x100000000,
519  .addImm(48);
520  ADRPReg = DstReg;
521  }
522 
523  ResultReg = createResultReg(&AArch64::GPR64spRegClass);
524  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
525  ResultReg)
526  .addReg(ADRPReg)
527  .addGlobalAddress(GV, 0,
529  .addImm(0);
530  }
531  return ResultReg;
532 }
533 
534 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
535  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
536 
537  // Only handle simple types.
538  if (!CEVT.isSimple())
539  return 0;
540  MVT VT = CEVT.getSimpleVT();
541  // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
542  // 'null' pointers need to have a somewhat special treatment.
543  if (isa<ConstantPointerNull>(C)) {
544  assert(VT == MVT::i64 && "Expected 64-bit pointers");
545  return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
546  }
547 
548  if (const auto *CI = dyn_cast<ConstantInt>(C))
549  return materializeInt(CI, VT);
550  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
551  return materializeFP(CFP, VT);
552  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
553  return materializeGV(GV);
554 
555  return 0;
556 }
557 
558 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
559  assert(CFP->isNullValue() &&
560  "Floating-point constant is not a positive zero.");
561  MVT VT;
562  if (!isTypeLegal(CFP->getType(), VT))
563  return 0;
564 
565  if (VT != MVT::f32 && VT != MVT::f64)
566  return 0;
567 
568  bool Is64Bit = (VT == MVT::f64);
569  unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
570  unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
571  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
572 }
573 
574 /// Check if the multiply is by a power-of-2 constant.
575 static bool isMulPowOf2(const Value *I) {
576  if (const auto *MI = dyn_cast<MulOperator>(I)) {
577  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
578  if (C->getValue().isPowerOf2())
579  return true;
580  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
581  if (C->getValue().isPowerOf2())
582  return true;
583  }
584  return false;
585 }
586 
587 // Computes the address to get to an object.
588 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
589 {
590  const User *U = nullptr;
591  unsigned Opcode = Instruction::UserOp1;
592  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
593  // Don't walk into other basic blocks unless the object is an alloca from
594  // another block, otherwise it may not have a virtual register assigned.
595  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
596  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
597  Opcode = I->getOpcode();
598  U = I;
599  }
600  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
601  Opcode = C->getOpcode();
602  U = C;
603  }
604 
605  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
606  if (Ty->getAddressSpace() > 255)
607  // Fast instruction selection doesn't support the special
608  // address spaces.
609  return false;
610 
611  switch (Opcode) {
612  default:
613  break;
614  case Instruction::BitCast:
615  // Look through bitcasts.
616  return computeAddress(U->getOperand(0), Addr, Ty);
617 
618  case Instruction::IntToPtr:
619  // Look past no-op inttoptrs.
620  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
621  TLI.getPointerTy(DL))
622  return computeAddress(U->getOperand(0), Addr, Ty);
623  break;
624 
625  case Instruction::PtrToInt:
626  // Look past no-op ptrtoints.
627  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
628  return computeAddress(U->getOperand(0), Addr, Ty);
629  break;
630 
631  case Instruction::GetElementPtr: {
632  Address SavedAddr = Addr;
633  uint64_t TmpOffset = Addr.getOffset();
634 
635  // Iterate through the GEP folding the constants into offsets where
636  // we can.
637  for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
638  GTI != E; ++GTI) {
639  const Value *Op = GTI.getOperand();
640  if (StructType *STy = GTI.getStructTypeOrNull()) {
641  const StructLayout *SL = DL.getStructLayout(STy);
642  unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
643  TmpOffset += SL->getElementOffset(Idx);
644  } else {
645  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
646  while (true) {
647  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
648  // Constant-offset addressing.
649  TmpOffset += CI->getSExtValue() * S;
650  break;
651  }
652  if (canFoldAddIntoGEP(U, Op)) {
653  // A compatible add with a constant operand. Fold the constant.
654  ConstantInt *CI =
655  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
656  TmpOffset += CI->getSExtValue() * S;
657  // Iterate on the other operand.
658  Op = cast<AddOperator>(Op)->getOperand(0);
659  continue;
660  }
661  // Unsupported
662  goto unsupported_gep;
663  }
664  }
665  }
666 
667  // Try to grab the base operand now.
668  Addr.setOffset(TmpOffset);
669  if (computeAddress(U->getOperand(0), Addr, Ty))
670  return true;
671 
672  // We failed, restore everything and try the other options.
673  Addr = SavedAddr;
674 
675  unsupported_gep:
676  break;
677  }
678  case Instruction::Alloca: {
679  const AllocaInst *AI = cast<AllocaInst>(Obj);
681  FuncInfo.StaticAllocaMap.find(AI);
682  if (SI != FuncInfo.StaticAllocaMap.end()) {
683  Addr.setKind(Address::FrameIndexBase);
684  Addr.setFI(SI->second);
685  return true;
686  }
687  break;
688  }
689  case Instruction::Add: {
690  // Adds of constants are common and easy enough.
691  const Value *LHS = U->getOperand(0);
692  const Value *RHS = U->getOperand(1);
693 
694  if (isa<ConstantInt>(LHS))
695  std::swap(LHS, RHS);
696 
697  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
698  Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
699  return computeAddress(LHS, Addr, Ty);
700  }
701 
702  Address Backup = Addr;
703  if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
704  return true;
705  Addr = Backup;
706 
707  break;
708  }
709  case Instruction::Sub: {
710  // Subs of constants are common and easy enough.
711  const Value *LHS = U->getOperand(0);
712  const Value *RHS = U->getOperand(1);
713 
714  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
715  Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
716  return computeAddress(LHS, Addr, Ty);
717  }
718  break;
719  }
720  case Instruction::Shl: {
721  if (Addr.getOffsetReg())
722  break;
723 
724  const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
725  if (!CI)
726  break;
727 
728  unsigned Val = CI->getZExtValue();
729  if (Val < 1 || Val > 3)
730  break;
731 
732  uint64_t NumBytes = 0;
733  if (Ty && Ty->isSized()) {
734  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
735  NumBytes = NumBits / 8;
736  if (!isPowerOf2_64(NumBits))
737  NumBytes = 0;
738  }
739 
740  if (NumBytes != (1ULL << Val))
741  break;
742 
743  Addr.setShift(Val);
744  Addr.setExtendType(AArch64_AM::LSL);
745 
746  const Value *Src = U->getOperand(0);
747  if (const auto *I = dyn_cast<Instruction>(Src)) {
748  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
749  // Fold the zext or sext when it won't become a noop.
750  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
751  if (!isIntExtFree(ZE) &&
752  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
753  Addr.setExtendType(AArch64_AM::UXTW);
754  Src = ZE->getOperand(0);
755  }
756  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
757  if (!isIntExtFree(SE) &&
758  SE->getOperand(0)->getType()->isIntegerTy(32)) {
759  Addr.setExtendType(AArch64_AM::SXTW);
760  Src = SE->getOperand(0);
761  }
762  }
763  }
764  }
765 
766  if (const auto *AI = dyn_cast<BinaryOperator>(Src))
767  if (AI->getOpcode() == Instruction::And) {
768  const Value *LHS = AI->getOperand(0);
769  const Value *RHS = AI->getOperand(1);
770 
771  if (const auto *C = dyn_cast<ConstantInt>(LHS))
772  if (C->getValue() == 0xffffffff)
773  std::swap(LHS, RHS);
774 
775  if (const auto *C = dyn_cast<ConstantInt>(RHS))
776  if (C->getValue() == 0xffffffff) {
777  Addr.setExtendType(AArch64_AM::UXTW);
778  Register Reg = getRegForValue(LHS);
779  if (!Reg)
780  return false;
781  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
782  Addr.setOffsetReg(Reg);
783  return true;
784  }
785  }
786 
787  Register Reg = getRegForValue(Src);
788  if (!Reg)
789  return false;
790  Addr.setOffsetReg(Reg);
791  return true;
792  }
793  case Instruction::Mul: {
794  if (Addr.getOffsetReg())
795  break;
796 
797  if (!isMulPowOf2(U))
798  break;
799 
800  const Value *LHS = U->getOperand(0);
801  const Value *RHS = U->getOperand(1);
802 
803  // Canonicalize power-of-2 value to the RHS.
804  if (const auto *C = dyn_cast<ConstantInt>(LHS))
805  if (C->getValue().isPowerOf2())
806  std::swap(LHS, RHS);
807 
808  assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
809  const auto *C = cast<ConstantInt>(RHS);
810  unsigned Val = C->getValue().logBase2();
811  if (Val < 1 || Val > 3)
812  break;
813 
814  uint64_t NumBytes = 0;
815  if (Ty && Ty->isSized()) {
816  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
817  NumBytes = NumBits / 8;
818  if (!isPowerOf2_64(NumBits))
819  NumBytes = 0;
820  }
821 
822  if (NumBytes != (1ULL << Val))
823  break;
824 
825  Addr.setShift(Val);
826  Addr.setExtendType(AArch64_AM::LSL);
827 
828  const Value *Src = LHS;
829  if (const auto *I = dyn_cast<Instruction>(Src)) {
830  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
831  // Fold the zext or sext when it won't become a noop.
832  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
833  if (!isIntExtFree(ZE) &&
834  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
835  Addr.setExtendType(AArch64_AM::UXTW);
836  Src = ZE->getOperand(0);
837  }
838  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
839  if (!isIntExtFree(SE) &&
840  SE->getOperand(0)->getType()->isIntegerTy(32)) {
841  Addr.setExtendType(AArch64_AM::SXTW);
842  Src = SE->getOperand(0);
843  }
844  }
845  }
846  }
847 
848  Register Reg = getRegForValue(Src);
849  if (!Reg)
850  return false;
851  Addr.setOffsetReg(Reg);
852  return true;
853  }
854  case Instruction::And: {
855  if (Addr.getOffsetReg())
856  break;
857 
858  if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
859  break;
860 
861  const Value *LHS = U->getOperand(0);
862  const Value *RHS = U->getOperand(1);
863 
864  if (const auto *C = dyn_cast<ConstantInt>(LHS))
865  if (C->getValue() == 0xffffffff)
866  std::swap(LHS, RHS);
867 
868  if (const auto *C = dyn_cast<ConstantInt>(RHS))
869  if (C->getValue() == 0xffffffff) {
870  Addr.setShift(0);
871  Addr.setExtendType(AArch64_AM::LSL);
872  Addr.setExtendType(AArch64_AM::UXTW);
873 
874  Register Reg = getRegForValue(LHS);
875  if (!Reg)
876  return false;
877  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
878  Addr.setOffsetReg(Reg);
879  return true;
880  }
881  break;
882  }
883  case Instruction::SExt:
884  case Instruction::ZExt: {
885  if (!Addr.getReg() || Addr.getOffsetReg())
886  break;
887 
888  const Value *Src = nullptr;
889  // Fold the zext or sext when it won't become a noop.
890  if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
891  if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
892  Addr.setExtendType(AArch64_AM::UXTW);
893  Src = ZE->getOperand(0);
894  }
895  } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
896  if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
897  Addr.setExtendType(AArch64_AM::SXTW);
898  Src = SE->getOperand(0);
899  }
900  }
901 
902  if (!Src)
903  break;
904 
905  Addr.setShift(0);
906  Register Reg = getRegForValue(Src);
907  if (!Reg)
908  return false;
909  Addr.setOffsetReg(Reg);
910  return true;
911  }
912  } // end switch
913 
914  if (Addr.isRegBase() && !Addr.getReg()) {
915  Register Reg = getRegForValue(Obj);
916  if (!Reg)
917  return false;
918  Addr.setReg(Reg);
919  return true;
920  }
921 
922  if (!Addr.getOffsetReg()) {
923  Register Reg = getRegForValue(Obj);
924  if (!Reg)
925  return false;
926  Addr.setOffsetReg(Reg);
927  return true;
928  }
929 
930  return false;
931 }
932 
933 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
934  const User *U = nullptr;
935  unsigned Opcode = Instruction::UserOp1;
936  bool InMBB = true;
937 
938  if (const auto *I = dyn_cast<Instruction>(V)) {
939  Opcode = I->getOpcode();
940  U = I;
941  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
942  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
943  Opcode = C->getOpcode();
944  U = C;
945  }
946 
947  switch (Opcode) {
948  default: break;
949  case Instruction::BitCast:
950  // Look past bitcasts if its operand is in the same BB.
951  if (InMBB)
952  return computeCallAddress(U->getOperand(0), Addr);
953  break;
954  case Instruction::IntToPtr:
955  // Look past no-op inttoptrs if its operand is in the same BB.
956  if (InMBB &&
957  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
958  TLI.getPointerTy(DL))
959  return computeCallAddress(U->getOperand(0), Addr);
960  break;
961  case Instruction::PtrToInt:
962  // Look past no-op ptrtoints if its operand is in the same BB.
963  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
964  return computeCallAddress(U->getOperand(0), Addr);
965  break;
966  }
967 
968  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
969  Addr.setGlobalValue(GV);
970  return true;
971  }
972 
973  // If all else fails, try to materialize the value in a register.
974  if (!Addr.getGlobalValue()) {
975  Addr.setReg(getRegForValue(V));
976  return Addr.getReg() != 0;
977  }
978 
979  return false;
980 }
981 
982 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
983  EVT evt = TLI.getValueType(DL, Ty, true);
984 
985  if (Subtarget->isTargetILP32() && Ty->isPointerTy())
986  return false;
987 
988  // Only handle simple types.
989  if (evt == MVT::Other || !evt.isSimple())
990  return false;
991  VT = evt.getSimpleVT();
992 
993  // This is a legal type, but it's not something we handle in fast-isel.
994  if (VT == MVT::f128)
995  return false;
996 
997  // Handle all other legal types, i.e. a register that will directly hold this
998  // value.
999  return TLI.isTypeLegal(VT);
1000 }
1001 
1002 /// Determine if the value type is supported by FastISel.
1003 ///
1004 /// FastISel for AArch64 can handle more value types than are legal. This adds
1005 /// simple value type such as i1, i8, and i16.
1006 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1007  if (Ty->isVectorTy() && !IsVectorAllowed)
1008  return false;
1009 
1010  if (isTypeLegal(Ty, VT))
1011  return true;
1012 
1013  // If this is a type than can be sign or zero-extended to a basic operation
1014  // go ahead and accept it now.
1015  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1016  return true;
1017 
1018  return false;
1019 }
1020 
1021 bool AArch64FastISel::isValueAvailable(const Value *V) const {
1022  if (!isa<Instruction>(V))
1023  return true;
1024 
1025  const auto *I = cast<Instruction>(V);
1026  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1027 }
1028 
1029 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1030  if (Subtarget->isTargetILP32())
1031  return false;
1032 
1033  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1034  if (!ScaleFactor)
1035  return false;
1036 
1037  bool ImmediateOffsetNeedsLowering = false;
1038  bool RegisterOffsetNeedsLowering = false;
1039  int64_t Offset = Addr.getOffset();
1040  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1041  ImmediateOffsetNeedsLowering = true;
1042  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1043  !isUInt<12>(Offset / ScaleFactor))
1044  ImmediateOffsetNeedsLowering = true;
1045 
1046  // Cannot encode an offset register and an immediate offset in the same
1047  // instruction. Fold the immediate offset into the load/store instruction and
1048  // emit an additional add to take care of the offset register.
1049  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1050  RegisterOffsetNeedsLowering = true;
1051 
1052  // Cannot encode zero register as base.
1053  if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1054  RegisterOffsetNeedsLowering = true;
1055 
1056  // If this is a stack pointer and the offset needs to be simplified then put
1057  // the alloca address into a register, set the base type back to register and
1058  // continue. This should almost never happen.
1059  if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1060  {
1061  Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1062  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1063  ResultReg)
1064  .addFrameIndex(Addr.getFI())
1065  .addImm(0)
1066  .addImm(0);
1067  Addr.setKind(Address::RegBase);
1068  Addr.setReg(ResultReg);
1069  }
1070 
1071  if (RegisterOffsetNeedsLowering) {
1072  unsigned ResultReg = 0;
1073  if (Addr.getReg()) {
1074  if (Addr.getExtendType() == AArch64_AM::SXTW ||
1075  Addr.getExtendType() == AArch64_AM::UXTW )
1076  ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1077  Addr.getOffsetReg(), Addr.getExtendType(),
1078  Addr.getShift());
1079  else
1080  ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1081  Addr.getOffsetReg(), AArch64_AM::LSL,
1082  Addr.getShift());
1083  } else {
1084  if (Addr.getExtendType() == AArch64_AM::UXTW)
1085  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1086  Addr.getShift(), /*IsZExt=*/true);
1087  else if (Addr.getExtendType() == AArch64_AM::SXTW)
1088  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1089  Addr.getShift(), /*IsZExt=*/false);
1090  else
1091  ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1092  Addr.getShift());
1093  }
1094  if (!ResultReg)
1095  return false;
1096 
1097  Addr.setReg(ResultReg);
1098  Addr.setOffsetReg(0);
1099  Addr.setShift(0);
1100  Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1101  }
1102 
1103  // Since the offset is too large for the load/store instruction get the
1104  // reg+offset into a register.
1105  if (ImmediateOffsetNeedsLowering) {
1106  unsigned ResultReg;
1107  if (Addr.getReg())
1108  // Try to fold the immediate into the add instruction.
1109  ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1110  else
1111  ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1112 
1113  if (!ResultReg)
1114  return false;
1115  Addr.setReg(ResultReg);
1116  Addr.setOffset(0);
1117  }
1118  return true;
1119 }
1120 
1121 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1122  const MachineInstrBuilder &MIB,
1124  unsigned ScaleFactor,
1125  MachineMemOperand *MMO) {
1126  int64_t Offset = Addr.getOffset() / ScaleFactor;
1127  // Frame base works a bit differently. Handle it separately.
1128  if (Addr.isFIBase()) {
1129  int FI = Addr.getFI();
1130  // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1131  // and alignment should be based on the VT.
1132  MMO = FuncInfo.MF->getMachineMemOperand(
1133  MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1134  MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1135  // Now add the rest of the operands.
1136  MIB.addFrameIndex(FI).addImm(Offset);
1137  } else {
1138  assert(Addr.isRegBase() && "Unexpected address kind.");
1139  const MCInstrDesc &II = MIB->getDesc();
1140  unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1141  Addr.setReg(
1142  constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1143  Addr.setOffsetReg(
1144  constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1145  if (Addr.getOffsetReg()) {
1146  assert(Addr.getOffset() == 0 && "Unexpected offset");
1147  bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1148  Addr.getExtendType() == AArch64_AM::SXTX;
1149  MIB.addReg(Addr.getReg());
1150  MIB.addReg(Addr.getOffsetReg());
1151  MIB.addImm(IsSigned);
1152  MIB.addImm(Addr.getShift() != 0);
1153  } else
1154  MIB.addReg(Addr.getReg()).addImm(Offset);
1155  }
1156 
1157  if (MMO)
1158  MIB.addMemOperand(MMO);
1159 }
1160 
1161 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1162  const Value *RHS, bool SetFlags,
1163  bool WantResult, bool IsZExt) {
1165  bool NeedExtend = false;
1166  switch (RetVT.SimpleTy) {
1167  default:
1168  return 0;
1169  case MVT::i1:
1170  NeedExtend = true;
1171  break;
1172  case MVT::i8:
1173  NeedExtend = true;
1174  ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1175  break;
1176  case MVT::i16:
1177  NeedExtend = true;
1178  ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1179  break;
1180  case MVT::i32: // fall-through
1181  case MVT::i64:
1182  break;
1183  }
1184  MVT SrcVT = RetVT;
1185  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1186 
1187  // Canonicalize immediates to the RHS first.
1188  if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1189  std::swap(LHS, RHS);
1190 
1191  // Canonicalize mul by power of 2 to the RHS.
1192  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1193  if (isMulPowOf2(LHS))
1194  std::swap(LHS, RHS);
1195 
1196  // Canonicalize shift immediate to the RHS.
1197  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1198  if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1199  if (isa<ConstantInt>(SI->getOperand(1)))
1200  if (SI->getOpcode() == Instruction::Shl ||
1201  SI->getOpcode() == Instruction::LShr ||
1202  SI->getOpcode() == Instruction::AShr )
1203  std::swap(LHS, RHS);
1204 
1205  Register LHSReg = getRegForValue(LHS);
1206  if (!LHSReg)
1207  return 0;
1208 
1209  if (NeedExtend)
1210  LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1211 
1212  unsigned ResultReg = 0;
1213  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1214  uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1215  if (C->isNegative())
1216  ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1217  WantResult);
1218  else
1219  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1220  WantResult);
1221  } else if (const auto *C = dyn_cast<Constant>(RHS))
1222  if (C->isNullValue())
1223  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1224 
1225  if (ResultReg)
1226  return ResultReg;
1227 
1228  // Only extend the RHS within the instruction if there is a valid extend type.
1229  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1230  isValueAvailable(RHS)) {
1231  if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1232  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1233  if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1234  Register RHSReg = getRegForValue(SI->getOperand(0));
1235  if (!RHSReg)
1236  return 0;
1237  return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
1238  C->getZExtValue(), SetFlags, WantResult);
1239  }
1240  Register RHSReg = getRegForValue(RHS);
1241  if (!RHSReg)
1242  return 0;
1243  return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1244  SetFlags, WantResult);
1245  }
1246 
1247  // Check if the mul can be folded into the instruction.
1248  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1249  if (isMulPowOf2(RHS)) {
1250  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1251  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1252 
1253  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1254  if (C->getValue().isPowerOf2())
1255  std::swap(MulLHS, MulRHS);
1256 
1257  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1258  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1259  Register RHSReg = getRegForValue(MulLHS);
1260  if (!RHSReg)
1261  return 0;
1262  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1263  ShiftVal, SetFlags, WantResult);
1264  if (ResultReg)
1265  return ResultReg;
1266  }
1267  }
1268 
1269  // Check if the shift can be folded into the instruction.
1270  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1271  if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1272  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1274  switch (SI->getOpcode()) {
1275  default: break;
1276  case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1277  case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1278  case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1279  }
1280  uint64_t ShiftVal = C->getZExtValue();
1281  if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1282  Register RHSReg = getRegForValue(SI->getOperand(0));
1283  if (!RHSReg)
1284  return 0;
1285  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1286  ShiftVal, SetFlags, WantResult);
1287  if (ResultReg)
1288  return ResultReg;
1289  }
1290  }
1291  }
1292  }
1293 
1294  Register RHSReg = getRegForValue(RHS);
1295  if (!RHSReg)
1296  return 0;
1297 
1298  if (NeedExtend)
1299  RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1300 
1301  return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1302 }
1303 
1304 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1305  unsigned RHSReg, bool SetFlags,
1306  bool WantResult) {
1307  assert(LHSReg && RHSReg && "Invalid register number.");
1308 
1309  if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1310  RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1311  return 0;
1312 
1313  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1314  return 0;
1315 
1316  static const unsigned OpcTable[2][2][2] = {
1317  { { AArch64::SUBWrr, AArch64::SUBXrr },
1318  { AArch64::ADDWrr, AArch64::ADDXrr } },
1319  { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1320  { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1321  };
1322  bool Is64Bit = RetVT == MVT::i64;
1323  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1324  const TargetRegisterClass *RC =
1325  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1326  unsigned ResultReg;
1327  if (WantResult)
1328  ResultReg = createResultReg(RC);
1329  else
1330  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1331 
1332  const MCInstrDesc &II = TII.get(Opc);
1333  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1334  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1335  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1336  .addReg(LHSReg)
1337  .addReg(RHSReg);
1338  return ResultReg;
1339 }
1340 
1341 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1342  uint64_t Imm, bool SetFlags,
1343  bool WantResult) {
1344  assert(LHSReg && "Invalid register number.");
1345 
1346  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1347  return 0;
1348 
1349  unsigned ShiftImm;
1350  if (isUInt<12>(Imm))
1351  ShiftImm = 0;
1352  else if ((Imm & 0xfff000) == Imm) {
1353  ShiftImm = 12;
1354  Imm >>= 12;
1355  } else
1356  return 0;
1357 
1358  static const unsigned OpcTable[2][2][2] = {
1359  { { AArch64::SUBWri, AArch64::SUBXri },
1360  { AArch64::ADDWri, AArch64::ADDXri } },
1361  { { AArch64::SUBSWri, AArch64::SUBSXri },
1362  { AArch64::ADDSWri, AArch64::ADDSXri } }
1363  };
1364  bool Is64Bit = RetVT == MVT::i64;
1365  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1366  const TargetRegisterClass *RC;
1367  if (SetFlags)
1368  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1369  else
1370  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1371  unsigned ResultReg;
1372  if (WantResult)
1373  ResultReg = createResultReg(RC);
1374  else
1375  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1376 
1377  const MCInstrDesc &II = TII.get(Opc);
1378  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1379  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1380  .addReg(LHSReg)
1381  .addImm(Imm)
1382  .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1383  return ResultReg;
1384 }
1385 
1386 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1387  unsigned RHSReg,
1388  AArch64_AM::ShiftExtendType ShiftType,
1389  uint64_t ShiftImm, bool SetFlags,
1390  bool WantResult) {
1391  assert(LHSReg && RHSReg && "Invalid register number.");
1392  assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1393  RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1394 
1395  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1396  return 0;
1397 
1398  // Don't deal with undefined shifts.
1399  if (ShiftImm >= RetVT.getSizeInBits())
1400  return 0;
1401 
1402  static const unsigned OpcTable[2][2][2] = {
1403  { { AArch64::SUBWrs, AArch64::SUBXrs },
1404  { AArch64::ADDWrs, AArch64::ADDXrs } },
1405  { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1406  { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1407  };
1408  bool Is64Bit = RetVT == MVT::i64;
1409  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1410  const TargetRegisterClass *RC =
1411  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1412  unsigned ResultReg;
1413  if (WantResult)
1414  ResultReg = createResultReg(RC);
1415  else
1416  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1417 
1418  const MCInstrDesc &II = TII.get(Opc);
1419  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1420  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1421  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1422  .addReg(LHSReg)
1423  .addReg(RHSReg)
1424  .addImm(getShifterImm(ShiftType, ShiftImm));
1425  return ResultReg;
1426 }
1427 
1428 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1429  unsigned RHSReg,
1431  uint64_t ShiftImm, bool SetFlags,
1432  bool WantResult) {
1433  assert(LHSReg && RHSReg && "Invalid register number.");
1434  assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1435  RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1436 
1437  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1438  return 0;
1439 
1440  if (ShiftImm >= 4)
1441  return 0;
1442 
1443  static const unsigned OpcTable[2][2][2] = {
1444  { { AArch64::SUBWrx, AArch64::SUBXrx },
1445  { AArch64::ADDWrx, AArch64::ADDXrx } },
1446  { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1447  { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1448  };
1449  bool Is64Bit = RetVT == MVT::i64;
1450  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1451  const TargetRegisterClass *RC = nullptr;
1452  if (SetFlags)
1453  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1454  else
1455  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1456  unsigned ResultReg;
1457  if (WantResult)
1458  ResultReg = createResultReg(RC);
1459  else
1460  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1461 
1462  const MCInstrDesc &II = TII.get(Opc);
1463  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1464  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1465  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1466  .addReg(LHSReg)
1467  .addReg(RHSReg)
1468  .addImm(getArithExtendImm(ExtType, ShiftImm));
1469  return ResultReg;
1470 }
1471 
1472 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1473  Type *Ty = LHS->getType();
1474  EVT EVT = TLI.getValueType(DL, Ty, true);
1475  if (!EVT.isSimple())
1476  return false;
1477  MVT VT = EVT.getSimpleVT();
1478 
1479  switch (VT.SimpleTy) {
1480  default:
1481  return false;
1482  case MVT::i1:
1483  case MVT::i8:
1484  case MVT::i16:
1485  case MVT::i32:
1486  case MVT::i64:
1487  return emitICmp(VT, LHS, RHS, IsZExt);
1488  case MVT::f32:
1489  case MVT::f64:
1490  return emitFCmp(VT, LHS, RHS);
1491  }
1492 }
1493 
1494 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1495  bool IsZExt) {
1496  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1497  IsZExt) != 0;
1498 }
1499 
1500 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1501  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1502  /*SetFlags=*/true, /*WantResult=*/false) != 0;
1503 }
1504 
1505 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1506  if (RetVT != MVT::f32 && RetVT != MVT::f64)
1507  return false;
1508 
1509  // Check to see if the 2nd operand is a constant that we can encode directly
1510  // in the compare.
1511  bool UseImm = false;
1512  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1513  if (CFP->isZero() && !CFP->isNegative())
1514  UseImm = true;
1515 
1516  Register LHSReg = getRegForValue(LHS);
1517  if (!LHSReg)
1518  return false;
1519 
1520  if (UseImm) {
1521  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1522  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1523  .addReg(LHSReg);
1524  return true;
1525  }
1526 
1527  Register RHSReg = getRegForValue(RHS);
1528  if (!RHSReg)
1529  return false;
1530 
1531  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1532  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1533  .addReg(LHSReg)
1534  .addReg(RHSReg);
1535  return true;
1536 }
1537 
1538 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1539  bool SetFlags, bool WantResult, bool IsZExt) {
1540  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1541  IsZExt);
1542 }
1543 
1544 /// This method is a wrapper to simplify add emission.
1545 ///
1546 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1547 /// that fails, then try to materialize the immediate into a register and use
1548 /// emitAddSub_rr instead.
1549 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1550  unsigned ResultReg;
1551  if (Imm < 0)
1552  ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1553  else
1554  ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1555 
1556  if (ResultReg)
1557  return ResultReg;
1558 
1559  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1560  if (!CReg)
1561  return 0;
1562 
1563  ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1564  return ResultReg;
1565 }
1566 
1567 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1568  bool SetFlags, bool WantResult, bool IsZExt) {
1569  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1570  IsZExt);
1571 }
1572 
1573 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1574  unsigned RHSReg, bool WantResult) {
1575  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1576  /*SetFlags=*/true, WantResult);
1577 }
1578 
1579 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1580  unsigned RHSReg,
1581  AArch64_AM::ShiftExtendType ShiftType,
1582  uint64_t ShiftImm, bool WantResult) {
1583  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1584  ShiftImm, /*SetFlags=*/true, WantResult);
1585 }
1586 
1587 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1588  const Value *LHS, const Value *RHS) {
1589  // Canonicalize immediates to the RHS first.
1590  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1591  std::swap(LHS, RHS);
1592 
1593  // Canonicalize mul by power-of-2 to the RHS.
1594  if (LHS->hasOneUse() && isValueAvailable(LHS))
1595  if (isMulPowOf2(LHS))
1596  std::swap(LHS, RHS);
1597 
1598  // Canonicalize shift immediate to the RHS.
1599  if (LHS->hasOneUse() && isValueAvailable(LHS))
1600  if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1601  if (isa<ConstantInt>(SI->getOperand(1)))
1602  std::swap(LHS, RHS);
1603 
1604  Register LHSReg = getRegForValue(LHS);
1605  if (!LHSReg)
1606  return 0;
1607 
1608  unsigned ResultReg = 0;
1609  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1610  uint64_t Imm = C->getZExtValue();
1611  ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1612  }
1613  if (ResultReg)
1614  return ResultReg;
1615 
1616  // Check if the mul can be folded into the instruction.
1617  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1618  if (isMulPowOf2(RHS)) {
1619  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1620  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1621 
1622  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1623  if (C->getValue().isPowerOf2())
1624  std::swap(MulLHS, MulRHS);
1625 
1626  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1627  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1628 
1629  Register RHSReg = getRegForValue(MulLHS);
1630  if (!RHSReg)
1631  return 0;
1632  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1633  if (ResultReg)
1634  return ResultReg;
1635  }
1636  }
1637 
1638  // Check if the shift can be folded into the instruction.
1639  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1640  if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1641  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1642  uint64_t ShiftVal = C->getZExtValue();
1643  Register RHSReg = getRegForValue(SI->getOperand(0));
1644  if (!RHSReg)
1645  return 0;
1646  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1647  if (ResultReg)
1648  return ResultReg;
1649  }
1650  }
1651 
1652  Register RHSReg = getRegForValue(RHS);
1653  if (!RHSReg)
1654  return 0;
1655 
1656  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1657  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1658  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1659  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1660  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1661  }
1662  return ResultReg;
1663 }
1664 
1665 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1666  unsigned LHSReg, uint64_t Imm) {
1667  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1668  "ISD nodes are not consecutive!");
1669  static const unsigned OpcTable[3][2] = {
1670  { AArch64::ANDWri, AArch64::ANDXri },
1671  { AArch64::ORRWri, AArch64::ORRXri },
1672  { AArch64::EORWri, AArch64::EORXri }
1673  };
1674  const TargetRegisterClass *RC;
1675  unsigned Opc;
1676  unsigned RegSize;
1677  switch (RetVT.SimpleTy) {
1678  default:
1679  return 0;
1680  case MVT::i1:
1681  case MVT::i8:
1682  case MVT::i16:
1683  case MVT::i32: {
1684  unsigned Idx = ISDOpc - ISD::AND;
1685  Opc = OpcTable[Idx][0];
1686  RC = &AArch64::GPR32spRegClass;
1687  RegSize = 32;
1688  break;
1689  }
1690  case MVT::i64:
1691  Opc = OpcTable[ISDOpc - ISD::AND][1];
1692  RC = &AArch64::GPR64spRegClass;
1693  RegSize = 64;
1694  break;
1695  }
1696 
1698  return 0;
1699 
1700  Register ResultReg =
1701  fastEmitInst_ri(Opc, RC, LHSReg,
1703  if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1704  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1705  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1706  }
1707  return ResultReg;
1708 }
1709 
1710 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1711  unsigned LHSReg, unsigned RHSReg,
1712  uint64_t ShiftImm) {
1713  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1714  "ISD nodes are not consecutive!");
1715  static const unsigned OpcTable[3][2] = {
1716  { AArch64::ANDWrs, AArch64::ANDXrs },
1717  { AArch64::ORRWrs, AArch64::ORRXrs },
1718  { AArch64::EORWrs, AArch64::EORXrs }
1719  };
1720 
1721  // Don't deal with undefined shifts.
1722  if (ShiftImm >= RetVT.getSizeInBits())
1723  return 0;
1724 
1725  const TargetRegisterClass *RC;
1726  unsigned Opc;
1727  switch (RetVT.SimpleTy) {
1728  default:
1729  return 0;
1730  case MVT::i1:
1731  case MVT::i8:
1732  case MVT::i16:
1733  case MVT::i32:
1734  Opc = OpcTable[ISDOpc - ISD::AND][0];
1735  RC = &AArch64::GPR32RegClass;
1736  break;
1737  case MVT::i64:
1738  Opc = OpcTable[ISDOpc - ISD::AND][1];
1739  RC = &AArch64::GPR64RegClass;
1740  break;
1741  }
1742  Register ResultReg =
1743  fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1745  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1746  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1747  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1748  }
1749  return ResultReg;
1750 }
1751 
1752 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1753  uint64_t Imm) {
1754  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1755 }
1756 
1757 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1758  bool WantZExt, MachineMemOperand *MMO) {
1759  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1760  return 0;
1761 
1762  // Simplify this down to something we can handle.
1763  if (!simplifyAddress(Addr, VT))
1764  return 0;
1765 
1766  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1767  if (!ScaleFactor)
1768  llvm_unreachable("Unexpected value type.");
1769 
1770  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1771  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1772  bool UseScaled = true;
1773  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1774  UseScaled = false;
1775  ScaleFactor = 1;
1776  }
1777 
1778  static const unsigned GPOpcTable[2][8][4] = {
1779  // Sign-extend.
1780  { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1781  AArch64::LDURXi },
1782  { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1783  AArch64::LDURXi },
1784  { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1785  AArch64::LDRXui },
1786  { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1787  AArch64::LDRXui },
1788  { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1789  AArch64::LDRXroX },
1790  { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1791  AArch64::LDRXroX },
1792  { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1793  AArch64::LDRXroW },
1794  { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1795  AArch64::LDRXroW }
1796  },
1797  // Zero-extend.
1798  { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1799  AArch64::LDURXi },
1800  { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1801  AArch64::LDURXi },
1802  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1803  AArch64::LDRXui },
1804  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1805  AArch64::LDRXui },
1806  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1807  AArch64::LDRXroX },
1808  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1809  AArch64::LDRXroX },
1810  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1811  AArch64::LDRXroW },
1812  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1813  AArch64::LDRXroW }
1814  }
1815  };
1816 
1817  static const unsigned FPOpcTable[4][2] = {
1818  { AArch64::LDURSi, AArch64::LDURDi },
1819  { AArch64::LDRSui, AArch64::LDRDui },
1820  { AArch64::LDRSroX, AArch64::LDRDroX },
1821  { AArch64::LDRSroW, AArch64::LDRDroW }
1822  };
1823 
1824  unsigned Opc;
1825  const TargetRegisterClass *RC;
1826  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1827  Addr.getOffsetReg();
1828  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1829  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1830  Addr.getExtendType() == AArch64_AM::SXTW)
1831  Idx++;
1832 
1833  bool IsRet64Bit = RetVT == MVT::i64;
1834  switch (VT.SimpleTy) {
1835  default:
1836  llvm_unreachable("Unexpected value type.");
1837  case MVT::i1: // Intentional fall-through.
1838  case MVT::i8:
1839  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1840  RC = (IsRet64Bit && !WantZExt) ?
1841  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842  break;
1843  case MVT::i16:
1844  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1845  RC = (IsRet64Bit && !WantZExt) ?
1846  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1847  break;
1848  case MVT::i32:
1849  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1850  RC = (IsRet64Bit && !WantZExt) ?
1851  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1852  break;
1853  case MVT::i64:
1854  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1855  RC = &AArch64::GPR64RegClass;
1856  break;
1857  case MVT::f32:
1858  Opc = FPOpcTable[Idx][0];
1859  RC = &AArch64::FPR32RegClass;
1860  break;
1861  case MVT::f64:
1862  Opc = FPOpcTable[Idx][1];
1863  RC = &AArch64::FPR64RegClass;
1864  break;
1865  }
1866 
1867  // Create the base instruction, then add the operands.
1868  Register ResultReg = createResultReg(RC);
1869  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1870  TII.get(Opc), ResultReg);
1871  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1872 
1873  // Loading an i1 requires special handling.
1874  if (VT == MVT::i1) {
1875  unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1876  assert(ANDReg && "Unexpected AND instruction emission failure.");
1877  ResultReg = ANDReg;
1878  }
1879 
1880  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1881  // the 32bit reg to a 64bit reg.
1882  if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1883  Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1884  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1885  TII.get(AArch64::SUBREG_TO_REG), Reg64)
1886  .addImm(0)
1887  .addReg(ResultReg, getKillRegState(true))
1888  .addImm(AArch64::sub_32);
1889  ResultReg = Reg64;
1890  }
1891  return ResultReg;
1892 }
1893 
1894 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1895  MVT VT;
1896  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1897  return false;
1898 
1899  if (VT.isVector())
1900  return selectOperator(I, I->getOpcode());
1901 
1902  unsigned ResultReg;
1903  switch (I->getOpcode()) {
1904  default:
1905  llvm_unreachable("Unexpected instruction.");
1906  case Instruction::Add:
1907  ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1908  break;
1909  case Instruction::Sub:
1910  ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1911  break;
1912  }
1913  if (!ResultReg)
1914  return false;
1915 
1916  updateValueMap(I, ResultReg);
1917  return true;
1918 }
1919 
1920 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1921  MVT VT;
1922  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1923  return false;
1924 
1925  if (VT.isVector())
1926  return selectOperator(I, I->getOpcode());
1927 
1928  unsigned ResultReg;
1929  switch (I->getOpcode()) {
1930  default:
1931  llvm_unreachable("Unexpected instruction.");
1932  case Instruction::And:
1933  ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1934  break;
1935  case Instruction::Or:
1936  ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1937  break;
1938  case Instruction::Xor:
1939  ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1940  break;
1941  }
1942  if (!ResultReg)
1943  return false;
1944 
1945  updateValueMap(I, ResultReg);
1946  return true;
1947 }
1948 
1949 bool AArch64FastISel::selectLoad(const Instruction *I) {
1950  MVT VT;
1951  // Verify we have a legal type before going any further. Currently, we handle
1952  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1953  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1954  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1955  cast<LoadInst>(I)->isAtomic())
1956  return false;
1957 
1958  const Value *SV = I->getOperand(0);
1959  if (TLI.supportSwiftError()) {
1960  // Swifterror values can come from either a function parameter with
1961  // swifterror attribute or an alloca with swifterror attribute.
1962  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1963  if (Arg->hasSwiftErrorAttr())
1964  return false;
1965  }
1966 
1967  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1968  if (Alloca->isSwiftError())
1969  return false;
1970  }
1971  }
1972 
1973  // See if we can handle this address.
1974  Address Addr;
1975  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1976  return false;
1977 
1978  // Fold the following sign-/zero-extend into the load instruction.
1979  bool WantZExt = true;
1980  MVT RetVT = VT;
1981  const Value *IntExtVal = nullptr;
1982  if (I->hasOneUse()) {
1983  if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1984  if (isTypeSupported(ZE->getType(), RetVT))
1985  IntExtVal = ZE;
1986  else
1987  RetVT = VT;
1988  } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1989  if (isTypeSupported(SE->getType(), RetVT))
1990  IntExtVal = SE;
1991  else
1992  RetVT = VT;
1993  WantZExt = false;
1994  }
1995  }
1996 
1997  unsigned ResultReg =
1998  emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1999  if (!ResultReg)
2000  return false;
2001 
2002  // There are a few different cases we have to handle, because the load or the
2003  // sign-/zero-extend might not be selected by FastISel if we fall-back to
2004  // SelectionDAG. There is also an ordering issue when both instructions are in
2005  // different basic blocks.
2006  // 1.) The load instruction is selected by FastISel, but the integer extend
2007  // not. This usually happens when the integer extend is in a different
2008  // basic block and SelectionDAG took over for that basic block.
2009  // 2.) The load instruction is selected before the integer extend. This only
2010  // happens when the integer extend is in a different basic block.
2011  // 3.) The load instruction is selected by SelectionDAG and the integer extend
2012  // by FastISel. This happens if there are instructions between the load
2013  // and the integer extend that couldn't be selected by FastISel.
2014  if (IntExtVal) {
2015  // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2016  // could select it. Emit a copy to subreg if necessary. FastISel will remove
2017  // it when it selects the integer extend.
2018  Register Reg = lookUpRegForValue(IntExtVal);
2019  auto *MI = MRI.getUniqueVRegDef(Reg);
2020  if (!MI) {
2021  if (RetVT == MVT::i64 && VT <= MVT::i32) {
2022  if (WantZExt) {
2023  // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2024  MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2025  ResultReg = std::prev(I)->getOperand(0).getReg();
2026  removeDeadCode(I, std::next(I));
2027  } else
2028  ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2029  AArch64::sub_32);
2030  }
2031  updateValueMap(I, ResultReg);
2032  return true;
2033  }
2034 
2035  // The integer extend has already been emitted - delete all the instructions
2036  // that have been emitted by the integer extend lowering code and use the
2037  // result from the load instruction directly.
2038  while (MI) {
2039  Reg = 0;
2040  for (auto &Opnd : MI->uses()) {
2041  if (Opnd.isReg()) {
2042  Reg = Opnd.getReg();
2043  break;
2044  }
2045  }
2047  removeDeadCode(I, std::next(I));
2048  MI = nullptr;
2049  if (Reg)
2051  }
2052  updateValueMap(IntExtVal, ResultReg);
2053  return true;
2054  }
2055 
2056  updateValueMap(I, ResultReg);
2057  return true;
2058 }
2059 
2060 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2061  unsigned AddrReg,
2062  MachineMemOperand *MMO) {
2063  unsigned Opc;
2064  switch (VT.SimpleTy) {
2065  default: return false;
2066  case MVT::i8: Opc = AArch64::STLRB; break;
2067  case MVT::i16: Opc = AArch64::STLRH; break;
2068  case MVT::i32: Opc = AArch64::STLRW; break;
2069  case MVT::i64: Opc = AArch64::STLRX; break;
2070  }
2071 
2072  const MCInstrDesc &II = TII.get(Opc);
2073  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2074  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2075  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2076  .addReg(SrcReg)
2077  .addReg(AddrReg)
2078  .addMemOperand(MMO);
2079  return true;
2080 }
2081 
2082 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2083  MachineMemOperand *MMO) {
2084  if (!TLI.allowsMisalignedMemoryAccesses(VT))
2085  return false;
2086 
2087  // Simplify this down to something we can handle.
2088  if (!simplifyAddress(Addr, VT))
2089  return false;
2090 
2091  unsigned ScaleFactor = getImplicitScaleFactor(VT);
2092  if (!ScaleFactor)
2093  llvm_unreachable("Unexpected value type.");
2094 
2095  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2096  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2097  bool UseScaled = true;
2098  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2099  UseScaled = false;
2100  ScaleFactor = 1;
2101  }
2102 
2103  static const unsigned OpcTable[4][6] = {
2104  { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2105  AArch64::STURSi, AArch64::STURDi },
2106  { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2107  AArch64::STRSui, AArch64::STRDui },
2108  { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2109  AArch64::STRSroX, AArch64::STRDroX },
2110  { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2111  AArch64::STRSroW, AArch64::STRDroW }
2112  };
2113 
2114  unsigned Opc;
2115  bool VTIsi1 = false;
2116  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2117  Addr.getOffsetReg();
2118  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2119  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2120  Addr.getExtendType() == AArch64_AM::SXTW)
2121  Idx++;
2122 
2123  switch (VT.SimpleTy) {
2124  default: llvm_unreachable("Unexpected value type.");
2125  case MVT::i1: VTIsi1 = true; [[fallthrough]];
2126  case MVT::i8: Opc = OpcTable[Idx][0]; break;
2127  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2128  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2129  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2130  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2131  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2132  }
2133 
2134  // Storing an i1 requires special handling.
2135  if (VTIsi1 && SrcReg != AArch64::WZR) {
2136  unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2137  assert(ANDReg && "Unexpected AND instruction emission failure.");
2138  SrcReg = ANDReg;
2139  }
2140  // Create the base instruction, then add the operands.
2141  const MCInstrDesc &II = TII.get(Opc);
2142  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2143  MachineInstrBuilder MIB =
2144  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2145  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2146 
2147  return true;
2148 }
2149 
2150 bool AArch64FastISel::selectStore(const Instruction *I) {
2151  MVT VT;
2152  const Value *Op0 = I->getOperand(0);
2153  // Verify we have a legal type before going any further. Currently, we handle
2154  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2155  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2156  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2157  return false;
2158 
2159  const Value *PtrV = I->getOperand(1);
2160  if (TLI.supportSwiftError()) {
2161  // Swifterror values can come from either a function parameter with
2162  // swifterror attribute or an alloca with swifterror attribute.
2163  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2164  if (Arg->hasSwiftErrorAttr())
2165  return false;
2166  }
2167 
2168  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2169  if (Alloca->isSwiftError())
2170  return false;
2171  }
2172  }
2173 
2174  // Get the value to be stored into a register. Use the zero register directly
2175  // when possible to avoid an unnecessary copy and a wasted register.
2176  unsigned SrcReg = 0;
2177  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2178  if (CI->isZero())
2179  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2180  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2181  if (CF->isZero() && !CF->isNegative()) {
2182  VT = MVT::getIntegerVT(VT.getSizeInBits());
2183  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2184  }
2185  }
2186 
2187  if (!SrcReg)
2188  SrcReg = getRegForValue(Op0);
2189 
2190  if (!SrcReg)
2191  return false;
2192 
2193  auto *SI = cast<StoreInst>(I);
2194 
2195  // Try to emit a STLR for seq_cst/release.
2196  if (SI->isAtomic()) {
2197  AtomicOrdering Ord = SI->getOrdering();
2198  // The non-atomic instructions are sufficient for relaxed stores.
2199  if (isReleaseOrStronger(Ord)) {
2200  // The STLR addressing mode only supports a base reg; pass that directly.
2201  Register AddrReg = getRegForValue(PtrV);
2202  return emitStoreRelease(VT, SrcReg, AddrReg,
2203  createMachineMemOperandFor(I));
2204  }
2205  }
2206 
2207  // See if we can handle this address.
2208  Address Addr;
2209  if (!computeAddress(PtrV, Addr, Op0->getType()))
2210  return false;
2211 
2212  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2213  return false;
2214  return true;
2215 }
2216 
2218  switch (Pred) {
2219  case CmpInst::FCMP_ONE:
2220  case CmpInst::FCMP_UEQ:
2221  default:
2222  // AL is our "false" for now. The other two need more compares.
2223  return AArch64CC::AL;
2224  case CmpInst::ICMP_EQ:
2225  case CmpInst::FCMP_OEQ:
2226  return AArch64CC::EQ;
2227  case CmpInst::ICMP_SGT:
2228  case CmpInst::FCMP_OGT:
2229  return AArch64CC::GT;
2230  case CmpInst::ICMP_SGE:
2231  case CmpInst::FCMP_OGE:
2232  return AArch64CC::GE;
2233  case CmpInst::ICMP_UGT:
2234  case CmpInst::FCMP_UGT:
2235  return AArch64CC::HI;
2236  case CmpInst::FCMP_OLT:
2237  return AArch64CC::MI;
2238  case CmpInst::ICMP_ULE:
2239  case CmpInst::FCMP_OLE:
2240  return AArch64CC::LS;
2241  case CmpInst::FCMP_ORD:
2242  return AArch64CC::VC;
2243  case CmpInst::FCMP_UNO:
2244  return AArch64CC::VS;
2245  case CmpInst::FCMP_UGE:
2246  return AArch64CC::PL;
2247  case CmpInst::ICMP_SLT:
2248  case CmpInst::FCMP_ULT:
2249  return AArch64CC::LT;
2250  case CmpInst::ICMP_SLE:
2251  case CmpInst::FCMP_ULE:
2252  return AArch64CC::LE;
2253  case CmpInst::FCMP_UNE:
2254  case CmpInst::ICMP_NE:
2255  return AArch64CC::NE;
2256  case CmpInst::ICMP_UGE:
2257  return AArch64CC::HS;
2258  case CmpInst::ICMP_ULT:
2259  return AArch64CC::LO;
2260  }
2261 }
2262 
2263 /// Try to emit a combined compare-and-branch instruction.
2264 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2265  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2266  // will not be produced, as they are conditional branch instructions that do
2267  // not set flags.
2268  if (FuncInfo.MF->getFunction().hasFnAttribute(
2269  Attribute::SpeculativeLoadHardening))
2270  return false;
2271 
2272  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2273  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2274  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2275 
2276  const Value *LHS = CI->getOperand(0);
2277  const Value *RHS = CI->getOperand(1);
2278 
2279  MVT VT;
2280  if (!isTypeSupported(LHS->getType(), VT))
2281  return false;
2282 
2283  unsigned BW = VT.getSizeInBits();
2284  if (BW > 64)
2285  return false;
2286 
2287  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2288  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2289 
2290  // Try to take advantage of fallthrough opportunities.
2291  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2292  std::swap(TBB, FBB);
2294  }
2295 
2296  int TestBit = -1;
2297  bool IsCmpNE;
2298  switch (Predicate) {
2299  default:
2300  return false;
2301  case CmpInst::ICMP_EQ:
2302  case CmpInst::ICMP_NE:
2303  if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2304  std::swap(LHS, RHS);
2305 
2306  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2307  return false;
2308 
2309  if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2310  if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2311  const Value *AndLHS = AI->getOperand(0);
2312  const Value *AndRHS = AI->getOperand(1);
2313 
2314  if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2315  if (C->getValue().isPowerOf2())
2316  std::swap(AndLHS, AndRHS);
2317 
2318  if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2319  if (C->getValue().isPowerOf2()) {
2320  TestBit = C->getValue().logBase2();
2321  LHS = AndLHS;
2322  }
2323  }
2324 
2325  if (VT == MVT::i1)
2326  TestBit = 0;
2327 
2328  IsCmpNE = Predicate == CmpInst::ICMP_NE;
2329  break;
2330  case CmpInst::ICMP_SLT:
2331  case CmpInst::ICMP_SGE:
2332  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2333  return false;
2334 
2335  TestBit = BW - 1;
2336  IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2337  break;
2338  case CmpInst::ICMP_SGT:
2339  case CmpInst::ICMP_SLE:
2340  if (!isa<ConstantInt>(RHS))
2341  return false;
2342 
2343  if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2344  return false;
2345 
2346  TestBit = BW - 1;
2347  IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2348  break;
2349  } // end switch
2350 
2351  static const unsigned OpcTable[2][2][2] = {
2352  { {AArch64::CBZW, AArch64::CBZX },
2353  {AArch64::CBNZW, AArch64::CBNZX} },
2354  { {AArch64::TBZW, AArch64::TBZX },
2355  {AArch64::TBNZW, AArch64::TBNZX} }
2356  };
2357 
2358  bool IsBitTest = TestBit != -1;
2359  bool Is64Bit = BW == 64;
2360  if (TestBit < 32 && TestBit >= 0)
2361  Is64Bit = false;
2362 
2363  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2364  const MCInstrDesc &II = TII.get(Opc);
2365 
2366  Register SrcReg = getRegForValue(LHS);
2367  if (!SrcReg)
2368  return false;
2369 
2370  if (BW == 64 && !Is64Bit)
2371  SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2372 
2373  if ((BW < 32) && !IsBitTest)
2374  SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2375 
2376  // Emit the combined compare and branch instruction.
2377  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2378  MachineInstrBuilder MIB =
2379  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2380  .addReg(SrcReg);
2381  if (IsBitTest)
2382  MIB.addImm(TestBit);
2383  MIB.addMBB(TBB);
2384 
2385  finishCondBranch(BI->getParent(), TBB, FBB);
2386  return true;
2387 }
2388 
2389 bool AArch64FastISel::selectBranch(const Instruction *I) {
2390  const BranchInst *BI = cast<BranchInst>(I);
2391  if (BI->isUnconditional()) {
2392  MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2393  fastEmitBranch(MSucc, BI->getDebugLoc());
2394  return true;
2395  }
2396 
2397  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2398  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2399 
2400  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2401  if (CI->hasOneUse() && isValueAvailable(CI)) {
2402  // Try to optimize or fold the cmp.
2403  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2404  switch (Predicate) {
2405  default:
2406  break;
2407  case CmpInst::FCMP_FALSE:
2408  fastEmitBranch(FBB, MIMD.getDL());
2409  return true;
2410  case CmpInst::FCMP_TRUE:
2411  fastEmitBranch(TBB, MIMD.getDL());
2412  return true;
2413  }
2414 
2415  // Try to emit a combined compare-and-branch first.
2416  if (emitCompareAndBranch(BI))
2417  return true;
2418 
2419  // Try to take advantage of fallthrough opportunities.
2420  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2421  std::swap(TBB, FBB);
2423  }
2424 
2425  // Emit the cmp.
2426  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2427  return false;
2428 
2429  // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2430  // instruction.
2433  switch (Predicate) {
2434  default:
2435  break;
2436  case CmpInst::FCMP_UEQ:
2437  ExtraCC = AArch64CC::EQ;
2438  CC = AArch64CC::VS;
2439  break;
2440  case CmpInst::FCMP_ONE:
2441  ExtraCC = AArch64CC::MI;
2442  CC = AArch64CC::GT;
2443  break;
2444  }
2445  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2446 
2447  // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2448  if (ExtraCC != AArch64CC::AL) {
2449  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2450  .addImm(ExtraCC)
2451  .addMBB(TBB);
2452  }
2453 
2454  // Emit the branch.
2455  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2456  .addImm(CC)
2457  .addMBB(TBB);
2458 
2459  finishCondBranch(BI->getParent(), TBB, FBB);
2460  return true;
2461  }
2462  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2463  uint64_t Imm = CI->getZExtValue();
2464  MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2465  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2466  .addMBB(Target);
2467 
2468  // Obtain the branch probability and add the target to the successor list.
2469  if (FuncInfo.BPI) {
2470  auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2471  BI->getParent(), Target->getBasicBlock());
2472  FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2473  } else
2474  FuncInfo.MBB->addSuccessorWithoutProb(Target);
2475  return true;
2476  } else {
2478  if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2479  // Fake request the condition, otherwise the intrinsic might be completely
2480  // optimized away.
2481  Register CondReg = getRegForValue(BI->getCondition());
2482  if (!CondReg)
2483  return false;
2484 
2485  // Emit the branch.
2486  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2487  .addImm(CC)
2488  .addMBB(TBB);
2489 
2490  finishCondBranch(BI->getParent(), TBB, FBB);
2491  return true;
2492  }
2493  }
2494 
2495  Register CondReg = getRegForValue(BI->getCondition());
2496  if (CondReg == 0)
2497  return false;
2498 
2499  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2500  unsigned Opcode = AArch64::TBNZW;
2501  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2502  std::swap(TBB, FBB);
2503  Opcode = AArch64::TBZW;
2504  }
2505 
2506  const MCInstrDesc &II = TII.get(Opcode);
2507  Register ConstrainedCondReg
2508  = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2509  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2510  .addReg(ConstrainedCondReg)
2511  .addImm(0)
2512  .addMBB(TBB);
2513 
2514  finishCondBranch(BI->getParent(), TBB, FBB);
2515  return true;
2516 }
2517 
2518 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2519  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2520  Register AddrReg = getRegForValue(BI->getOperand(0));
2521  if (AddrReg == 0)
2522  return false;
2523 
2524  // Emit the indirect branch.
2525  const MCInstrDesc &II = TII.get(AArch64::BR);
2526  AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2527  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2528 
2529  // Make sure the CFG is up-to-date.
2530  for (const auto *Succ : BI->successors())
2531  FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2532 
2533  return true;
2534 }
2535 
2536 bool AArch64FastISel::selectCmp(const Instruction *I) {
2537  const CmpInst *CI = cast<CmpInst>(I);
2538 
2539  // Vectors of i1 are weird: bail out.
2540  if (CI->getType()->isVectorTy())
2541  return false;
2542 
2543  // Try to optimize or fold the cmp.
2544  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2545  unsigned ResultReg = 0;
2546  switch (Predicate) {
2547  default:
2548  break;
2549  case CmpInst::FCMP_FALSE:
2550  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2551  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2552  TII.get(TargetOpcode::COPY), ResultReg)
2553  .addReg(AArch64::WZR, getKillRegState(true));
2554  break;
2555  case CmpInst::FCMP_TRUE:
2556  ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2557  break;
2558  }
2559 
2560  if (ResultReg) {
2561  updateValueMap(I, ResultReg);
2562  return true;
2563  }
2564 
2565  // Emit the cmp.
2566  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2567  return false;
2568 
2569  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2570 
2571  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2572  // condition codes are inverted, because they are used by CSINC.
2573  static unsigned CondCodeTable[2][2] = {
2576  };
2577  unsigned *CondCodes = nullptr;
2578  switch (Predicate) {
2579  default:
2580  break;
2581  case CmpInst::FCMP_UEQ:
2582  CondCodes = &CondCodeTable[0][0];
2583  break;
2584  case CmpInst::FCMP_ONE:
2585  CondCodes = &CondCodeTable[1][0];
2586  break;
2587  }
2588 
2589  if (CondCodes) {
2590  Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2591  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2592  TmpReg1)
2593  .addReg(AArch64::WZR, getKillRegState(true))
2594  .addReg(AArch64::WZR, getKillRegState(true))
2595  .addImm(CondCodes[0]);
2596  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2597  ResultReg)
2598  .addReg(TmpReg1, getKillRegState(true))
2599  .addReg(AArch64::WZR, getKillRegState(true))
2600  .addImm(CondCodes[1]);
2601 
2602  updateValueMap(I, ResultReg);
2603  return true;
2604  }
2605 
2606  // Now set a register based on the comparison.
2608  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2610  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2611  ResultReg)
2612  .addReg(AArch64::WZR, getKillRegState(true))
2613  .addReg(AArch64::WZR, getKillRegState(true))
2614  .addImm(invertedCC);
2615 
2616  updateValueMap(I, ResultReg);
2617  return true;
2618 }
2619 
2620 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2621 /// value.
2623  if (!SI->getType()->isIntegerTy(1))
2624  return false;
2625 
2626  const Value *Src1Val, *Src2Val;
2627  unsigned Opc = 0;
2628  bool NeedExtraOp = false;
2629  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2630  if (CI->isOne()) {
2631  Src1Val = SI->getCondition();
2632  Src2Val = SI->getFalseValue();
2633  Opc = AArch64::ORRWrr;
2634  } else {
2635  assert(CI->isZero());
2636  Src1Val = SI->getFalseValue();
2637  Src2Val = SI->getCondition();
2638  Opc = AArch64::BICWrr;
2639  }
2640  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2641  if (CI->isOne()) {
2642  Src1Val = SI->getCondition();
2643  Src2Val = SI->getTrueValue();
2644  Opc = AArch64::ORRWrr;
2645  NeedExtraOp = true;
2646  } else {
2647  assert(CI->isZero());
2648  Src1Val = SI->getCondition();
2649  Src2Val = SI->getTrueValue();
2650  Opc = AArch64::ANDWrr;
2651  }
2652  }
2653 
2654  if (!Opc)
2655  return false;
2656 
2657  Register Src1Reg = getRegForValue(Src1Val);
2658  if (!Src1Reg)
2659  return false;
2660 
2661  Register Src2Reg = getRegForValue(Src2Val);
2662  if (!Src2Reg)
2663  return false;
2664 
2665  if (NeedExtraOp)
2666  Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2667 
2668  Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2669  Src2Reg);
2670  updateValueMap(SI, ResultReg);
2671  return true;
2672 }
2673 
2674 bool AArch64FastISel::selectSelect(const Instruction *I) {
2675  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2676  MVT VT;
2677  if (!isTypeSupported(I->getType(), VT))
2678  return false;
2679 
2680  unsigned Opc;
2681  const TargetRegisterClass *RC;
2682  switch (VT.SimpleTy) {
2683  default:
2684  return false;
2685  case MVT::i1:
2686  case MVT::i8:
2687  case MVT::i16:
2688  case MVT::i32:
2689  Opc = AArch64::CSELWr;
2690  RC = &AArch64::GPR32RegClass;
2691  break;
2692  case MVT::i64:
2693  Opc = AArch64::CSELXr;
2694  RC = &AArch64::GPR64RegClass;
2695  break;
2696  case MVT::f32:
2697  Opc = AArch64::FCSELSrrr;
2698  RC = &AArch64::FPR32RegClass;
2699  break;
2700  case MVT::f64:
2701  Opc = AArch64::FCSELDrrr;
2702  RC = &AArch64::FPR64RegClass;
2703  break;
2704  }
2705 
2706  const SelectInst *SI = cast<SelectInst>(I);
2707  const Value *Cond = SI->getCondition();
2710 
2711  if (optimizeSelect(SI))
2712  return true;
2713 
2714  // Try to pickup the flags, so we don't have to emit another compare.
2715  if (foldXALUIntrinsic(CC, I, Cond)) {
2716  // Fake request the condition to force emission of the XALU intrinsic.
2717  Register CondReg = getRegForValue(Cond);
2718  if (!CondReg)
2719  return false;
2720  } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2721  isValueAvailable(Cond)) {
2722  const auto *Cmp = cast<CmpInst>(Cond);
2723  // Try to optimize or fold the cmp.
2724  CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2725  const Value *FoldSelect = nullptr;
2726  switch (Predicate) {
2727  default:
2728  break;
2729  case CmpInst::FCMP_FALSE:
2730  FoldSelect = SI->getFalseValue();
2731  break;
2732  case CmpInst::FCMP_TRUE:
2733  FoldSelect = SI->getTrueValue();
2734  break;
2735  }
2736 
2737  if (FoldSelect) {
2738  Register SrcReg = getRegForValue(FoldSelect);
2739  if (!SrcReg)
2740  return false;
2741 
2742  updateValueMap(I, SrcReg);
2743  return true;
2744  }
2745 
2746  // Emit the cmp.
2747  if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2748  return false;
2749 
2750  // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2752  switch (Predicate) {
2753  default:
2754  break;
2755  case CmpInst::FCMP_UEQ:
2756  ExtraCC = AArch64CC::EQ;
2757  CC = AArch64CC::VS;
2758  break;
2759  case CmpInst::FCMP_ONE:
2760  ExtraCC = AArch64CC::MI;
2761  CC = AArch64CC::GT;
2762  break;
2763  }
2764  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2765  } else {
2766  Register CondReg = getRegForValue(Cond);
2767  if (!CondReg)
2768  return false;
2769 
2770  const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2771  CondReg = constrainOperandRegClass(II, CondReg, 1);
2772 
2773  // Emit a TST instruction (ANDS wzr, reg, #imm).
2774  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2775  AArch64::WZR)
2776  .addReg(CondReg)
2778  }
2779 
2780  Register Src1Reg = getRegForValue(SI->getTrueValue());
2781  Register Src2Reg = getRegForValue(SI->getFalseValue());
2782 
2783  if (!Src1Reg || !Src2Reg)
2784  return false;
2785 
2786  if (ExtraCC != AArch64CC::AL)
2787  Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2788 
2789  Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2790  updateValueMap(I, ResultReg);
2791  return true;
2792 }
2793 
2794 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2795  Value *V = I->getOperand(0);
2796  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2797  return false;
2798 
2799  Register Op = getRegForValue(V);
2800  if (Op == 0)
2801  return false;
2802 
2803  Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2804  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2805  ResultReg).addReg(Op);
2806  updateValueMap(I, ResultReg);
2807  return true;
2808 }
2809 
2810 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2811  Value *V = I->getOperand(0);
2812  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2813  return false;
2814 
2815  Register Op = getRegForValue(V);
2816  if (Op == 0)
2817  return false;
2818 
2819  Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2820  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2821  ResultReg).addReg(Op);
2822  updateValueMap(I, ResultReg);
2823  return true;
2824 }
2825 
2826 // FPToUI and FPToSI
2827 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2828  MVT DestVT;
2829  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2830  return false;
2831 
2832  Register SrcReg = getRegForValue(I->getOperand(0));
2833  if (SrcReg == 0)
2834  return false;
2835 
2836  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2837  if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2838  return false;
2839 
2840  unsigned Opc;
2841  if (SrcVT == MVT::f64) {
2842  if (Signed)
2843  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2844  else
2845  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2846  } else {
2847  if (Signed)
2848  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2849  else
2850  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2851  }
2852  Register ResultReg = createResultReg(
2853  DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2854  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2855  .addReg(SrcReg);
2856  updateValueMap(I, ResultReg);
2857  return true;
2858 }
2859 
2860 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2861  MVT DestVT;
2862  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2863  return false;
2864  // Let regular ISEL handle FP16
2865  if (DestVT == MVT::f16)
2866  return false;
2867 
2868  assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2869  "Unexpected value type.");
2870 
2871  Register SrcReg = getRegForValue(I->getOperand(0));
2872  if (!SrcReg)
2873  return false;
2874 
2875  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2876 
2877  // Handle sign-extension.
2878  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2879  SrcReg =
2880  emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2881  if (!SrcReg)
2882  return false;
2883  }
2884 
2885  unsigned Opc;
2886  if (SrcVT == MVT::i64) {
2887  if (Signed)
2888  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2889  else
2890  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2891  } else {
2892  if (Signed)
2893  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2894  else
2895  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2896  }
2897 
2898  Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2899  updateValueMap(I, ResultReg);
2900  return true;
2901 }
2902 
2903 bool AArch64FastISel::fastLowerArguments() {
2904  if (!FuncInfo.CanLowerReturn)
2905  return false;
2906 
2907  const Function *F = FuncInfo.Fn;
2908  if (F->isVarArg())
2909  return false;
2910 
2911  CallingConv::ID CC = F->getCallingConv();
2912  if (CC != CallingConv::C && CC != CallingConv::Swift)
2913  return false;
2914 
2915  if (Subtarget->hasCustomCallingConv())
2916  return false;
2917 
2918  // Only handle simple cases of up to 8 GPR and FPR each.
2919  unsigned GPRCnt = 0;
2920  unsigned FPRCnt = 0;
2921  for (auto const &Arg : F->args()) {
2922  if (Arg.hasAttribute(Attribute::ByVal) ||
2923  Arg.hasAttribute(Attribute::InReg) ||
2924  Arg.hasAttribute(Attribute::StructRet) ||
2925  Arg.hasAttribute(Attribute::SwiftSelf) ||
2926  Arg.hasAttribute(Attribute::SwiftAsync) ||
2927  Arg.hasAttribute(Attribute::SwiftError) ||
2928  Arg.hasAttribute(Attribute::Nest))
2929  return false;
2930 
2931  Type *ArgTy = Arg.getType();
2932  if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2933  return false;
2934 
2935  EVT ArgVT = TLI.getValueType(DL, ArgTy);
2936  if (!ArgVT.isSimple())
2937  return false;
2938 
2939  MVT VT = ArgVT.getSimpleVT().SimpleTy;
2940  if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2941  return false;
2942 
2943  if (VT.isVector() &&
2944  (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2945  return false;
2946 
2947  if (VT >= MVT::i1 && VT <= MVT::i64)
2948  ++GPRCnt;
2949  else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2950  VT.is128BitVector())
2951  ++FPRCnt;
2952  else
2953  return false;
2954 
2955  if (GPRCnt > 8 || FPRCnt > 8)
2956  return false;
2957  }
2958 
2959  static const MCPhysReg Registers[6][8] = {
2960  { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2961  AArch64::W5, AArch64::W6, AArch64::W7 },
2962  { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2963  AArch64::X5, AArch64::X6, AArch64::X7 },
2964  { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2965  AArch64::H5, AArch64::H6, AArch64::H7 },
2966  { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2967  AArch64::S5, AArch64::S6, AArch64::S7 },
2968  { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2969  AArch64::D5, AArch64::D6, AArch64::D7 },
2970  { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2971  AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2972  };
2973 
2974  unsigned GPRIdx = 0;
2975  unsigned FPRIdx = 0;
2976  for (auto const &Arg : F->args()) {
2977  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2978  unsigned SrcReg;
2979  const TargetRegisterClass *RC;
2980  if (VT >= MVT::i1 && VT <= MVT::i32) {
2981  SrcReg = Registers[0][GPRIdx++];
2982  RC = &AArch64::GPR32RegClass;
2983  VT = MVT::i32;
2984  } else if (VT == MVT::i64) {
2985  SrcReg = Registers[1][GPRIdx++];
2986  RC = &AArch64::GPR64RegClass;
2987  } else if (VT == MVT::f16) {
2988  SrcReg = Registers[2][FPRIdx++];
2989  RC = &AArch64::FPR16RegClass;
2990  } else if (VT == MVT::f32) {
2991  SrcReg = Registers[3][FPRIdx++];
2992  RC = &AArch64::FPR32RegClass;
2993  } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2994  SrcReg = Registers[4][FPRIdx++];
2995  RC = &AArch64::FPR64RegClass;
2996  } else if (VT.is128BitVector()) {
2997  SrcReg = Registers[5][FPRIdx++];
2998  RC = &AArch64::FPR128RegClass;
2999  } else
3000  llvm_unreachable("Unexpected value type.");
3001 
3002  Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3003  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3004  // Without this, EmitLiveInCopies may eliminate the livein if its only
3005  // use is a bitcast (which isn't turned into an instruction).
3006  Register ResultReg = createResultReg(RC);
3007  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3008  TII.get(TargetOpcode::COPY), ResultReg)
3009  .addReg(DstReg, getKillRegState(true));
3010  updateValueMap(&Arg, ResultReg);
3011  }
3012  return true;
3013 }
3014 
3015 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3016  SmallVectorImpl<MVT> &OutVTs,
3017  unsigned &NumBytes) {
3018  CallingConv::ID CC = CLI.CallConv;
3020  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3021  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3022 
3023  // Get a count of how many bytes are to be pushed on the stack.
3024  NumBytes = CCInfo.getNextStackOffset();
3025 
3026  // Issue CALLSEQ_START
3027  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3028  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3029  .addImm(NumBytes).addImm(0);
3030 
3031  // Process the args.
3032  for (CCValAssign &VA : ArgLocs) {
3033  const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3034  MVT ArgVT = OutVTs[VA.getValNo()];
3035 
3036  Register ArgReg = getRegForValue(ArgVal);
3037  if (!ArgReg)
3038  return false;
3039 
3040  // Handle arg promotion: SExt, ZExt, AExt.
3041  switch (VA.getLocInfo()) {
3042  case CCValAssign::Full:
3043  break;
3044  case CCValAssign::SExt: {
3045  MVT DestVT = VA.getLocVT();
3046  MVT SrcVT = ArgVT;
3047  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3048  if (!ArgReg)
3049  return false;
3050  break;
3051  }
3052  case CCValAssign::AExt:
3053  // Intentional fall-through.
3054  case CCValAssign::ZExt: {
3055  MVT DestVT = VA.getLocVT();
3056  MVT SrcVT = ArgVT;
3057  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3058  if (!ArgReg)
3059  return false;
3060  break;
3061  }
3062  default:
3063  llvm_unreachable("Unknown arg promotion!");
3064  }
3065 
3066  // Now copy/store arg to correct locations.
3067  if (VA.isRegLoc() && !VA.needsCustom()) {
3068  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3069  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3070  CLI.OutRegs.push_back(VA.getLocReg());
3071  } else if (VA.needsCustom()) {
3072  // FIXME: Handle custom args.
3073  return false;
3074  } else {
3075  assert(VA.isMemLoc() && "Assuming store on stack.");
3076 
3077  // Don't emit stores for undef values.
3078  if (isa<UndefValue>(ArgVal))
3079  continue;
3080 
3081  // Need to store on the stack.
3082  unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3083 
3084  unsigned BEAlign = 0;
3085  if (ArgSize < 8 && !Subtarget->isLittleEndian())
3086  BEAlign = 8 - ArgSize;
3087 
3088  Address Addr;
3089  Addr.setKind(Address::RegBase);
3090  Addr.setReg(AArch64::SP);
3091  Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3092 
3093  Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3094  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3095  MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3096  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3097 
3098  if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3099  return false;
3100  }
3101  }
3102  return true;
3103 }
3104 
3105 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3106  unsigned NumBytes) {
3107  CallingConv::ID CC = CLI.CallConv;
3108 
3109  // Issue CALLSEQ_END
3110  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3111  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3112  .addImm(NumBytes).addImm(0);
3113 
3114  // Now the return value.
3115  if (RetVT != MVT::isVoid) {
3117  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3118  CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3119 
3120  // Only handle a single return value.
3121  if (RVLocs.size() != 1)
3122  return false;
3123 
3124  // Copy all of the result registers out of their specified physreg.
3125  MVT CopyVT = RVLocs[0].getValVT();
3126 
3127  // TODO: Handle big-endian results
3128  if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3129  return false;
3130 
3131  Register ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3132  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3133  TII.get(TargetOpcode::COPY), ResultReg)
3134  .addReg(RVLocs[0].getLocReg());
3135  CLI.InRegs.push_back(RVLocs[0].getLocReg());
3136 
3137  CLI.ResultReg = ResultReg;
3138  CLI.NumResultRegs = 1;
3139  }
3140 
3141  return true;
3142 }
3143 
3144 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3145  CallingConv::ID CC = CLI.CallConv;
3146  bool IsTailCall = CLI.IsTailCall;
3147  bool IsVarArg = CLI.IsVarArg;
3148  const Value *Callee = CLI.Callee;
3149  MCSymbol *Symbol = CLI.Symbol;
3150 
3151  if (!Callee && !Symbol)
3152  return false;
3153 
3154  // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3155  // a bti instruction following the call.
3156  if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3157  !Subtarget->noBTIAtReturnTwice() &&
3158  MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3159  return false;
3160 
3161  // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3162  if (CLI.CB && CLI.CB->isIndirectCall() &&
3163  CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3164  return false;
3165 
3166  // Allow SelectionDAG isel to handle tail calls.
3167  if (IsTailCall)
3168  return false;
3169 
3170  // FIXME: we could and should support this, but for now correctness at -O0 is
3171  // more important.
3172  if (Subtarget->isTargetILP32())
3173  return false;
3174 
3175  CodeModel::Model CM = TM.getCodeModel();
3176  // Only support the small-addressing and large code models.
3177  if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3178  return false;
3179 
3180  // FIXME: Add large code model support for ELF.
3181  if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3182  return false;
3183 
3184  // Let SDISel handle vararg functions.
3185  if (IsVarArg)
3186  return false;
3187 
3188  // FIXME: Only handle *simple* calls for now.
3189  MVT RetVT;
3190  if (CLI.RetTy->isVoidTy())
3191  RetVT = MVT::isVoid;
3192  else if (!isTypeLegal(CLI.RetTy, RetVT))
3193  return false;
3194 
3195  for (auto Flag : CLI.OutFlags)
3196  if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3197  Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3198  return false;
3199 
3200  // Set up the argument vectors.
3201  SmallVector<MVT, 16> OutVTs;
3202  OutVTs.reserve(CLI.OutVals.size());
3203 
3204  for (auto *Val : CLI.OutVals) {
3205  MVT VT;
3206  if (!isTypeLegal(Val->getType(), VT) &&
3207  !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3208  return false;
3209 
3210  // We don't handle vector parameters yet.
3211  if (VT.isVector() || VT.getSizeInBits() > 64)
3212  return false;
3213 
3214  OutVTs.push_back(VT);
3215  }
3216 
3217  Address Addr;
3218  if (Callee && !computeCallAddress(Callee, Addr))
3219  return false;
3220 
3221  // The weak function target may be zero; in that case we must use indirect
3222  // addressing via a stub on windows as it may be out of range for a
3223  // PC-relative jump.
3224  if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3225  Addr.getGlobalValue()->hasExternalWeakLinkage())
3226  return false;
3227 
3228  // Handle the arguments now that we've gotten them.
3229  unsigned NumBytes;
3230  if (!processCallArgs(CLI, OutVTs, NumBytes))
3231  return false;
3232 
3233  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3234  if (RegInfo->isAnyArgRegReserved(*MF))
3235  RegInfo->emitReservedArgRegCallError(*MF);
3236 
3237  // Issue the call.
3238  MachineInstrBuilder MIB;
3239  if (Subtarget->useSmallAddressing()) {
3240  const MCInstrDesc &II =
3241  TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3242  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3243  if (Symbol)
3244  MIB.addSym(Symbol, 0);
3245  else if (Addr.getGlobalValue())
3246  MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3247  else if (Addr.getReg()) {
3248  Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3249  MIB.addReg(Reg);
3250  } else
3251  return false;
3252  } else {
3253  unsigned CallReg = 0;
3254  if (Symbol) {
3255  Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3256  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3257  ADRPReg)
3259 
3260  CallReg = createResultReg(&AArch64::GPR64RegClass);
3261  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3262  TII.get(AArch64::LDRXui), CallReg)
3263  .addReg(ADRPReg)
3264  .addSym(Symbol,
3266  } else if (Addr.getGlobalValue())
3267  CallReg = materializeGV(Addr.getGlobalValue());
3268  else if (Addr.getReg())
3269  CallReg = Addr.getReg();
3270 
3271  if (!CallReg)
3272  return false;
3273 
3274  const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3275  CallReg = constrainOperandRegClass(II, CallReg, 0);
3276  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3277  }
3278 
3279  // Add implicit physical register uses to the call.
3280  for (auto Reg : CLI.OutRegs)
3282 
3283  // Add a register mask with the call-preserved registers.
3284  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3285  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3286 
3287  CLI.Call = MIB;
3288 
3289  // Finish off the call including any return values.
3290  return finishCall(CLI, RetVT, NumBytes);
3291 }
3292 
3293 bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3294  if (Alignment)
3295  return Len / Alignment->value() <= 4;
3296  else
3297  return Len < 32;
3298 }
3299 
3300 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3301  uint64_t Len, MaybeAlign Alignment) {
3302  // Make sure we don't bloat code by inlining very large memcpy's.
3303  if (!isMemCpySmall(Len, Alignment))
3304  return false;
3305 
3306  int64_t UnscaledOffset = 0;
3307  Address OrigDest = Dest;
3308  Address OrigSrc = Src;
3309 
3310  while (Len) {
3311  MVT VT;
3312  if (!Alignment || *Alignment >= 8) {
3313  if (Len >= 8)
3314  VT = MVT::i64;
3315  else if (Len >= 4)
3316  VT = MVT::i32;
3317  else if (Len >= 2)
3318  VT = MVT::i16;
3319  else {
3320  VT = MVT::i8;
3321  }
3322  } else {
3323  assert(Alignment && "Alignment is set in this branch");
3324  // Bound based on alignment.
3325  if (Len >= 4 && *Alignment == 4)
3326  VT = MVT::i32;
3327  else if (Len >= 2 && *Alignment == 2)
3328  VT = MVT::i16;
3329  else {
3330  VT = MVT::i8;
3331  }
3332  }
3333 
3334  unsigned ResultReg = emitLoad(VT, VT, Src);
3335  if (!ResultReg)
3336  return false;
3337 
3338  if (!emitStore(VT, ResultReg, Dest))
3339  return false;
3340 
3341  int64_t Size = VT.getSizeInBits() / 8;
3342  Len -= Size;
3343  UnscaledOffset += Size;
3344 
3345  // We need to recompute the unscaled offset for each iteration.
3346  Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3347  Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3348  }
3349 
3350  return true;
3351 }
3352 
3353 /// Check if it is possible to fold the condition from the XALU intrinsic
3354 /// into the user. The condition code will only be updated on success.
3355 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3356  const Instruction *I,
3357  const Value *Cond) {
3358  if (!isa<ExtractValueInst>(Cond))
3359  return false;
3360 
3361  const auto *EV = cast<ExtractValueInst>(Cond);
3362  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3363  return false;
3364 
3365  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3366  MVT RetVT;
3367  const Function *Callee = II->getCalledFunction();
3368  Type *RetTy =
3369  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3370  if (!isTypeLegal(RetTy, RetVT))
3371  return false;
3372 
3373  if (RetVT != MVT::i32 && RetVT != MVT::i64)
3374  return false;
3375 
3376  const Value *LHS = II->getArgOperand(0);
3377  const Value *RHS = II->getArgOperand(1);
3378 
3379  // Canonicalize immediate to the RHS.
3380  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3381  std::swap(LHS, RHS);
3382 
3383  // Simplify multiplies.
3384  Intrinsic::ID IID = II->getIntrinsicID();
3385  switch (IID) {
3386  default:
3387  break;
3388  case Intrinsic::smul_with_overflow:
3389  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3390  if (C->getValue() == 2)
3391  IID = Intrinsic::sadd_with_overflow;
3392  break;
3393  case Intrinsic::umul_with_overflow:
3394  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3395  if (C->getValue() == 2)
3396  IID = Intrinsic::uadd_with_overflow;
3397  break;
3398  }
3399 
3400  AArch64CC::CondCode TmpCC;
3401  switch (IID) {
3402  default:
3403  return false;
3404  case Intrinsic::sadd_with_overflow:
3405  case Intrinsic::ssub_with_overflow:
3406  TmpCC = AArch64CC::VS;
3407  break;
3408  case Intrinsic::uadd_with_overflow:
3409  TmpCC = AArch64CC::HS;
3410  break;
3411  case Intrinsic::usub_with_overflow:
3412  TmpCC = AArch64CC::LO;
3413  break;
3414  case Intrinsic::smul_with_overflow:
3415  case Intrinsic::umul_with_overflow:
3416  TmpCC = AArch64CC::NE;
3417  break;
3418  }
3419 
3420  // Check if both instructions are in the same basic block.
3421  if (!isValueAvailable(II))
3422  return false;
3423 
3424  // Make sure nothing is in the way
3427  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3428  // We only expect extractvalue instructions between the intrinsic and the
3429  // instruction to be selected.
3430  if (!isa<ExtractValueInst>(Itr))
3431  return false;
3432 
3433  // Check that the extractvalue operand comes from the intrinsic.
3434  const auto *EVI = cast<ExtractValueInst>(Itr);
3435  if (EVI->getAggregateOperand() != II)
3436  return false;
3437  }
3438 
3439  CC = TmpCC;
3440  return true;
3441 }
3442 
3443 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3444  // FIXME: Handle more intrinsics.
3445  switch (II->getIntrinsicID()) {
3446  default: return false;
3447  case Intrinsic::frameaddress: {
3448  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3449  MFI.setFrameAddressIsTaken(true);
3450 
3451  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3452  Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3453  Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3454  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3455  TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3456  // Recursively load frame address
3457  // ldr x0, [fp]
3458  // ldr x0, [x0]
3459  // ldr x0, [x0]
3460  // ...
3461  unsigned DestReg;
3462  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3463  while (Depth--) {
3464  DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3465  SrcReg, 0);
3466  assert(DestReg && "Unexpected LDR instruction emission failure.");
3467  SrcReg = DestReg;
3468  }
3469 
3470  updateValueMap(II, SrcReg);
3471  return true;
3472  }
3473  case Intrinsic::sponentry: {
3474  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3475 
3476  // SP = FP + Fixed Object + 16
3477  int FI = MFI.CreateFixedObject(4, 0, false);
3478  Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3479  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3480  TII.get(AArch64::ADDXri), ResultReg)
3481  .addFrameIndex(FI)
3482  .addImm(0)
3483  .addImm(0);
3484 
3485  updateValueMap(II, ResultReg);
3486  return true;
3487  }
3488  case Intrinsic::memcpy:
3489  case Intrinsic::memmove: {
3490  const auto *MTI = cast<MemTransferInst>(II);
3491  // Don't handle volatile.
3492  if (MTI->isVolatile())
3493  return false;
3494 
3495  // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3496  // we would emit dead code because we don't currently handle memmoves.
3497  bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3498  if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3499  // Small memcpy's are common enough that we want to do them without a call
3500  // if possible.
3501  uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3502  MaybeAlign Alignment;
3503  if (MTI->getDestAlign() || MTI->getSourceAlign())
3504  Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3505  MTI->getSourceAlign().valueOrOne());
3506  if (isMemCpySmall(Len, Alignment)) {
3507  Address Dest, Src;
3508  if (!computeAddress(MTI->getRawDest(), Dest) ||
3509  !computeAddress(MTI->getRawSource(), Src))
3510  return false;
3511  if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3512  return true;
3513  }
3514  }
3515 
3516  if (!MTI->getLength()->getType()->isIntegerTy(64))
3517  return false;
3518 
3519  if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3520  // Fast instruction selection doesn't support the special
3521  // address spaces.
3522  return false;
3523 
3524  const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3525  return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3526  }
3527  case Intrinsic::memset: {
3528  const MemSetInst *MSI = cast<MemSetInst>(II);
3529  // Don't handle volatile.
3530  if (MSI->isVolatile())
3531  return false;
3532 
3533  if (!MSI->getLength()->getType()->isIntegerTy(64))
3534  return false;
3535 
3536  if (MSI->getDestAddressSpace() > 255)
3537  // Fast instruction selection doesn't support the special
3538  // address spaces.
3539  return false;
3540 
3541  return lowerCallTo(II, "memset", II->arg_size() - 1);
3542  }
3543  case Intrinsic::sin:
3544  case Intrinsic::cos:
3545  case Intrinsic::pow: {
3546  MVT RetVT;
3547  if (!isTypeLegal(II->getType(), RetVT))
3548  return false;
3549 
3550  if (RetVT != MVT::f32 && RetVT != MVT::f64)
3551  return false;
3552 
3553  static const RTLIB::Libcall LibCallTable[3][2] = {
3554  { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3555  { RTLIB::COS_F32, RTLIB::COS_F64 },
3556  { RTLIB::POW_F32, RTLIB::POW_F64 }
3557  };
3558  RTLIB::Libcall LC;
3559  bool Is64Bit = RetVT == MVT::f64;
3560  switch (II->getIntrinsicID()) {
3561  default:
3562  llvm_unreachable("Unexpected intrinsic.");
3563  case Intrinsic::sin:
3564  LC = LibCallTable[0][Is64Bit];
3565  break;
3566  case Intrinsic::cos:
3567  LC = LibCallTable[1][Is64Bit];
3568  break;
3569  case Intrinsic::pow:
3570  LC = LibCallTable[2][Is64Bit];
3571  break;
3572  }
3573 
3574  ArgListTy Args;
3575  Args.reserve(II->arg_size());
3576 
3577  // Populate the argument list.
3578  for (auto &Arg : II->args()) {
3579  ArgListEntry Entry;
3580  Entry.Val = Arg;
3581  Entry.Ty = Arg->getType();
3582  Args.push_back(Entry);
3583  }
3584 
3585  CallLoweringInfo CLI;
3586  MCContext &Ctx = MF->getContext();
3587  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3588  TLI.getLibcallName(LC), std::move(Args));
3589  if (!lowerCallTo(CLI))
3590  return false;
3591  updateValueMap(II, CLI.ResultReg);
3592  return true;
3593  }
3594  case Intrinsic::fabs: {
3595  MVT VT;
3596  if (!isTypeLegal(II->getType(), VT))
3597  return false;
3598 
3599  unsigned Opc;
3600  switch (VT.SimpleTy) {
3601  default:
3602  return false;
3603  case MVT::f32:
3604  Opc = AArch64::FABSSr;
3605  break;
3606  case MVT::f64:
3607  Opc = AArch64::FABSDr;
3608  break;
3609  }
3610  Register SrcReg = getRegForValue(II->getOperand(0));
3611  if (!SrcReg)
3612  return false;
3613  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3614  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3615  .addReg(SrcReg);
3616  updateValueMap(II, ResultReg);
3617  return true;
3618  }
3619  case Intrinsic::trap:
3620  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3621  .addImm(1);
3622  return true;
3623  case Intrinsic::debugtrap:
3624  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3625  .addImm(0xF000);
3626  return true;
3627 
3628  case Intrinsic::sqrt: {
3629  Type *RetTy = II->getCalledFunction()->getReturnType();
3630 
3631  MVT VT;
3632  if (!isTypeLegal(RetTy, VT))
3633  return false;
3634 
3635  Register Op0Reg = getRegForValue(II->getOperand(0));
3636  if (!Op0Reg)
3637  return false;
3638 
3639  unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3640  if (!ResultReg)
3641  return false;
3642 
3643  updateValueMap(II, ResultReg);
3644  return true;
3645  }
3646  case Intrinsic::sadd_with_overflow:
3647  case Intrinsic::uadd_with_overflow:
3648  case Intrinsic::ssub_with_overflow:
3649  case Intrinsic::usub_with_overflow:
3650  case Intrinsic::smul_with_overflow:
3651  case Intrinsic::umul_with_overflow: {
3652  // This implements the basic lowering of the xalu with overflow intrinsics.
3653  const Function *Callee = II->getCalledFunction();
3654  auto *Ty = cast<StructType>(Callee->getReturnType());
3655  Type *RetTy = Ty->getTypeAtIndex(0U);
3656 
3657  MVT VT;
3658  if (!isTypeLegal(RetTy, VT))
3659  return false;
3660 
3661  if (VT != MVT::i32 && VT != MVT::i64)
3662  return false;
3663 
3664  const Value *LHS = II->getArgOperand(0);
3665  const Value *RHS = II->getArgOperand(1);
3666  // Canonicalize immediate to the RHS.
3667  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3668  std::swap(LHS, RHS);
3669 
3670  // Simplify multiplies.
3671  Intrinsic::ID IID = II->getIntrinsicID();
3672  switch (IID) {
3673  default:
3674  break;
3675  case Intrinsic::smul_with_overflow:
3676  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3677  if (C->getValue() == 2) {
3678  IID = Intrinsic::sadd_with_overflow;
3679  RHS = LHS;
3680  }
3681  break;
3682  case Intrinsic::umul_with_overflow:
3683  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3684  if (C->getValue() == 2) {
3685  IID = Intrinsic::uadd_with_overflow;
3686  RHS = LHS;
3687  }
3688  break;
3689  }
3690 
3691  unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3693  switch (IID) {
3694  default: llvm_unreachable("Unexpected intrinsic!");
3695  case Intrinsic::sadd_with_overflow:
3696  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3697  CC = AArch64CC::VS;
3698  break;
3699  case Intrinsic::uadd_with_overflow:
3700  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3701  CC = AArch64CC::HS;
3702  break;
3703  case Intrinsic::ssub_with_overflow:
3704  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3705  CC = AArch64CC::VS;
3706  break;
3707  case Intrinsic::usub_with_overflow:
3708  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3709  CC = AArch64CC::LO;
3710  break;
3711  case Intrinsic::smul_with_overflow: {
3712  CC = AArch64CC::NE;
3713  Register LHSReg = getRegForValue(LHS);
3714  if (!LHSReg)
3715  return false;
3716 
3717  Register RHSReg = getRegForValue(RHS);
3718  if (!RHSReg)
3719  return false;
3720 
3721  if (VT == MVT::i32) {
3722  MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3723  Register MulSubReg =
3724  fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3725  // cmp xreg, wreg, sxtw
3726  emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3727  AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3728  /*WantResult=*/false);
3729  MulReg = MulSubReg;
3730  } else {
3731  assert(VT == MVT::i64 && "Unexpected value type.");
3732  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3733  // reused in the next instruction.
3734  MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3735  unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3736  emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3737  /*WantResult=*/false);
3738  }
3739  break;
3740  }
3741  case Intrinsic::umul_with_overflow: {
3742  CC = AArch64CC::NE;
3743  Register LHSReg = getRegForValue(LHS);
3744  if (!LHSReg)
3745  return false;
3746 
3747  Register RHSReg = getRegForValue(RHS);
3748  if (!RHSReg)
3749  return false;
3750 
3751  if (VT == MVT::i32) {
3752  MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3753  // tst xreg, #0xffffffff00000000
3754  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3755  TII.get(AArch64::ANDSXri), AArch64::XZR)
3756  .addReg(MulReg)
3757  .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3758  MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3759  } else {
3760  assert(VT == MVT::i64 && "Unexpected value type.");
3761  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3762  // reused in the next instruction.
3763  MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3764  unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3765  emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3766  }
3767  break;
3768  }
3769  }
3770 
3771  if (MulReg) {
3772  ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3773  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3774  TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3775  }
3776 
3777  if (!ResultReg1)
3778  return false;
3779 
3780  ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3781  AArch64::WZR, AArch64::WZR,
3783  (void)ResultReg2;
3784  assert((ResultReg1 + 1) == ResultReg2 &&
3785  "Nonconsecutive result registers.");
3786  updateValueMap(II, ResultReg1, 2);
3787  return true;
3788  }
3789  }
3790  return false;
3791 }
3792 
3793 bool AArch64FastISel::selectRet(const Instruction *I) {
3794  const ReturnInst *Ret = cast<ReturnInst>(I);
3795  const Function &F = *I->getParent()->getParent();
3796 
3797  if (!FuncInfo.CanLowerReturn)
3798  return false;
3799 
3800  if (F.isVarArg())
3801  return false;
3802 
3803  if (TLI.supportSwiftError() &&
3804  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3805  return false;
3806 
3807  if (TLI.supportSplitCSR(FuncInfo.MF))
3808  return false;
3809 
3810  // Build a list of return value registers.
3811  SmallVector<unsigned, 4> RetRegs;
3812 
3813  if (Ret->getNumOperands() > 0) {
3814  CallingConv::ID CC = F.getCallingConv();
3816  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3817 
3818  // Analyze operands of the call, assigning locations to each operand.
3820  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3823  CCInfo.AnalyzeReturn(Outs, RetCC);
3824 
3825  // Only handle a single return value for now.
3826  if (ValLocs.size() != 1)
3827  return false;
3828 
3829  CCValAssign &VA = ValLocs[0];
3830  const Value *RV = Ret->getOperand(0);
3831 
3832  // Don't bother handling odd stuff for now.
3833  if ((VA.getLocInfo() != CCValAssign::Full) &&
3834  (VA.getLocInfo() != CCValAssign::BCvt))
3835  return false;
3836 
3837  // Only handle register returns for now.
3838  if (!VA.isRegLoc())
3839  return false;
3840 
3841  Register Reg = getRegForValue(RV);
3842  if (Reg == 0)
3843  return false;
3844 
3845  unsigned SrcReg = Reg + VA.getValNo();
3846  Register DestReg = VA.getLocReg();
3847  // Avoid a cross-class copy. This is very unlikely.
3848  if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3849  return false;
3850 
3851  EVT RVEVT = TLI.getValueType(DL, RV->getType());
3852  if (!RVEVT.isSimple())
3853  return false;
3854 
3855  // Vectors (of > 1 lane) in big endian need tricky handling.
3856  if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3857  !Subtarget->isLittleEndian())
3858  return false;
3859 
3860  MVT RVVT = RVEVT.getSimpleVT();
3861  if (RVVT == MVT::f128)
3862  return false;
3863 
3864  MVT DestVT = VA.getValVT();
3865  // Special handling for extended integers.
3866  if (RVVT != DestVT) {
3867  if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3868  return false;
3869 
3870  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3871  return false;
3872 
3873  bool IsZExt = Outs[0].Flags.isZExt();
3874  SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3875  if (SrcReg == 0)
3876  return false;
3877  }
3878 
3879  // "Callee" (i.e. value producer) zero extends pointers at function
3880  // boundary.
3881  if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3882  SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3883 
3884  // Make the copy.
3885  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3886  TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3887 
3888  // Add register to return instruction.
3889  RetRegs.push_back(VA.getLocReg());
3890  }
3891 
3892  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3893  TII.get(AArch64::RET_ReallyLR));
3894  for (unsigned RetReg : RetRegs)
3895  MIB.addReg(RetReg, RegState::Implicit);
3896  return true;
3897 }
3898 
3899 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3900  Type *DestTy = I->getType();
3901  Value *Op = I->getOperand(0);
3902  Type *SrcTy = Op->getType();
3903 
3904  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3905  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3906  if (!SrcEVT.isSimple())
3907  return false;
3908  if (!DestEVT.isSimple())
3909  return false;
3910 
3911  MVT SrcVT = SrcEVT.getSimpleVT();
3912  MVT DestVT = DestEVT.getSimpleVT();
3913 
3914  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3915  SrcVT != MVT::i8)
3916  return false;
3917  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3918  DestVT != MVT::i1)
3919  return false;
3920 
3921  Register SrcReg = getRegForValue(Op);
3922  if (!SrcReg)
3923  return false;
3924 
3925  // If we're truncating from i64 to a smaller non-legal type then generate an
3926  // AND. Otherwise, we know the high bits are undefined and a truncate only
3927  // generate a COPY. We cannot mark the source register also as result
3928  // register, because this can incorrectly transfer the kill flag onto the
3929  // source register.
3930  unsigned ResultReg;
3931  if (SrcVT == MVT::i64) {
3932  uint64_t Mask = 0;
3933  switch (DestVT.SimpleTy) {
3934  default:
3935  // Trunc i64 to i32 is handled by the target-independent fast-isel.
3936  return false;
3937  case MVT::i1:
3938  Mask = 0x1;
3939  break;
3940  case MVT::i8:
3941  Mask = 0xff;
3942  break;
3943  case MVT::i16:
3944  Mask = 0xffff;
3945  break;
3946  }
3947  // Issue an extract_subreg to get the lower 32-bits.
3948  Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3949  AArch64::sub_32);
3950  // Create the AND instruction which performs the actual truncation.
3951  ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3952  assert(ResultReg && "Unexpected AND instruction emission failure.");
3953  } else {
3954  ResultReg = createResultReg(&AArch64::GPR32RegClass);
3955  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3956  TII.get(TargetOpcode::COPY), ResultReg)
3957  .addReg(SrcReg);
3958  }
3959 
3960  updateValueMap(I, ResultReg);
3961  return true;
3962 }
3963 
3964 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3965  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3966  DestVT == MVT::i64) &&
3967  "Unexpected value type.");
3968  // Handle i8 and i16 as i32.
3969  if (DestVT == MVT::i8 || DestVT == MVT::i16)
3970  DestVT = MVT::i32;
3971 
3972  if (IsZExt) {
3973  unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
3974  assert(ResultReg && "Unexpected AND instruction emission failure.");
3975  if (DestVT == MVT::i64) {
3976  // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
3977  // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
3978  Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3979  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3980  TII.get(AArch64::SUBREG_TO_REG), Reg64)
3981  .addImm(0)
3982  .addReg(ResultReg)
3983  .addImm(AArch64::sub_32);
3984  ResultReg = Reg64;
3985  }
3986  return ResultReg;
3987  } else {
3988  if (DestVT == MVT::i64) {
3989  // FIXME: We're SExt i1 to i64.
3990  return 0;
3991  }
3992  return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3993  0, 0);
3994  }
3995 }
3996 
3997 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3998  unsigned Opc, ZReg;
3999  switch (RetVT.SimpleTy) {
4000  default: return 0;
4001  case MVT::i8:
4002  case MVT::i16:
4003  case MVT::i32:
4004  RetVT = MVT::i32;
4005  Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4006  case MVT::i64:
4007  Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4008  }
4009 
4010  const TargetRegisterClass *RC =
4011  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4012  return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4013 }
4014 
4015 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4016  if (RetVT != MVT::i64)
4017  return 0;
4018 
4019  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4020  Op0, Op1, AArch64::XZR);
4021 }
4022 
4023 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4024  if (RetVT != MVT::i64)
4025  return 0;
4026 
4027  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4028  Op0, Op1, AArch64::XZR);
4029 }
4030 
4031 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4032  unsigned Op1Reg) {
4033  unsigned Opc = 0;
4034  bool NeedTrunc = false;
4035  uint64_t Mask = 0;
4036  switch (RetVT.SimpleTy) {
4037  default: return 0;
4038  case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4039  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4040  case MVT::i32: Opc = AArch64::LSLVWr; break;
4041  case MVT::i64: Opc = AArch64::LSLVXr; break;
4042  }
4043 
4044  const TargetRegisterClass *RC =
4045  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4046  if (NeedTrunc)
4047  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4048 
4049  Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4050  if (NeedTrunc)
4051  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4052  return ResultReg;
4053 }
4054 
4055 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4056  uint64_t Shift, bool IsZExt) {
4057  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4058  "Unexpected source/return type pair.");
4059  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4060  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4061  "Unexpected source value type.");
4062  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4063  RetVT == MVT::i64) && "Unexpected return value type.");
4064 
4065  bool Is64Bit = (RetVT == MVT::i64);
4066  unsigned RegSize = Is64Bit ? 64 : 32;
4067  unsigned DstBits = RetVT.getSizeInBits();
4068  unsigned SrcBits = SrcVT.getSizeInBits();
4069  const TargetRegisterClass *RC =
4070  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4071 
4072  // Just emit a copy for "zero" shifts.
4073  if (Shift == 0) {
4074  if (RetVT == SrcVT) {
4075  Register ResultReg = createResultReg(RC);
4076  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4077  TII.get(TargetOpcode::COPY), ResultReg)
4078  .addReg(Op0);
4079  return ResultReg;
4080  } else
4081  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4082  }
4083 
4084  // Don't deal with undefined shifts.
4085  if (Shift >= DstBits)
4086  return 0;
4087 
4088  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4089  // {S|U}BFM Wd, Wn, #r, #s
4090  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4091 
4092  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4093  // %2 = shl i16 %1, 4
4094  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4095  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4096  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4097  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4098 
4099  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4100  // %2 = shl i16 %1, 8
4101  // Wd<32+7-24,32-24> = Wn<7:0>
4102  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4103  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4104  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4105 
4106  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4107  // %2 = shl i16 %1, 12
4108  // Wd<32+3-20,32-20> = Wn<3:0>
4109  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4110  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4111  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4112 
4113  unsigned ImmR = RegSize - Shift;
4114  // Limit the width to the length of the source type.
4115  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4116  static const unsigned OpcTable[2][2] = {
4117  {AArch64::SBFMWri, AArch64::SBFMXri},
4118  {AArch64::UBFMWri, AArch64::UBFMXri}
4119  };
4120  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4121  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4122  Register TmpReg = MRI.createVirtualRegister(RC);
4123  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4124  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4125  .addImm(0)
4126  .addReg(Op0)
4127  .addImm(AArch64::sub_32);
4128  Op0 = TmpReg;
4129  }
4130  return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4131 }
4132 
4133 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4134  unsigned Op1Reg) {
4135  unsigned Opc = 0;
4136  bool NeedTrunc = false;
4137  uint64_t Mask = 0;
4138  switch (RetVT.SimpleTy) {
4139  default: return 0;
4140  case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4141  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4142  case MVT::i32: Opc = AArch64::LSRVWr; break;
4143  case MVT::i64: Opc = AArch64::LSRVXr; break;
4144  }
4145 
4146  const TargetRegisterClass *RC =
4147  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4148  if (NeedTrunc) {
4149  Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4150  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4151  }
4152  Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4153  if (NeedTrunc)
4154  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4155  return ResultReg;
4156 }
4157 
4158 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4159  uint64_t Shift, bool IsZExt) {
4160  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4161  "Unexpected source/return type pair.");
4162  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4163  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4164  "Unexpected source value type.");
4165  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4166  RetVT == MVT::i64) && "Unexpected return value type.");
4167 
4168  bool Is64Bit = (RetVT == MVT::i64);
4169  unsigned RegSize = Is64Bit ? 64 : 32;
4170  unsigned DstBits = RetVT.getSizeInBits();
4171  unsigned SrcBits = SrcVT.getSizeInBits();
4172  const TargetRegisterClass *RC =
4173  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4174 
4175  // Just emit a copy for "zero" shifts.
4176  if (Shift == 0) {
4177  if (RetVT == SrcVT) {
4178  Register ResultReg = createResultReg(RC);
4179  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4180  TII.get(TargetOpcode::COPY), ResultReg)
4181  .addReg(Op0);
4182  return ResultReg;
4183  } else
4184  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4185  }
4186 
4187  // Don't deal with undefined shifts.
4188  if (Shift >= DstBits)
4189  return 0;
4190 
4191  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4192  // {S|U}BFM Wd, Wn, #r, #s
4193  // Wd<s-r:0> = Wn<s:r> when r <= s
4194 
4195  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4196  // %2 = lshr i16 %1, 4
4197  // Wd<7-4:0> = Wn<7:4>
4198  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4199  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4200  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4201 
4202  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4203  // %2 = lshr i16 %1, 8
4204  // Wd<7-7,0> = Wn<7:7>
4205  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4206  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4207  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4208 
4209  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4210  // %2 = lshr i16 %1, 12
4211  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4212  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4213  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4214  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4215 
4216  if (Shift >= SrcBits && IsZExt)
4217  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4218 
4219  // It is not possible to fold a sign-extend into the LShr instruction. In this
4220  // case emit a sign-extend.
4221  if (!IsZExt) {
4222  Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4223  if (!Op0)
4224  return 0;
4225  SrcVT = RetVT;
4226  SrcBits = SrcVT.getSizeInBits();
4227  IsZExt = true;
4228  }
4229 
4230  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4231  unsigned ImmS = SrcBits - 1;
4232  static const unsigned OpcTable[2][2] = {
4233  {AArch64::SBFMWri, AArch64::SBFMXri},
4234  {AArch64::UBFMWri, AArch64::UBFMXri}
4235  };
4236  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4237  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4238  Register TmpReg = MRI.createVirtualRegister(RC);
4239  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4240  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4241  .addImm(0)
4242  .addReg(Op0)
4243  .addImm(AArch64::sub_32);
4244  Op0 = TmpReg;
4245  }
4246  return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4247 }
4248 
4249 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4250  unsigned Op1Reg) {
4251  unsigned Opc = 0;
4252  bool NeedTrunc = false;
4253  uint64_t Mask = 0;
4254  switch (RetVT.SimpleTy) {
4255  default: return 0;
4256  case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4257  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4258  case MVT::i32: Opc = AArch64::ASRVWr; break;
4259  case MVT::i64: Opc = AArch64::ASRVXr; break;
4260  }
4261 
4262  const TargetRegisterClass *RC =
4263  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4264  if (NeedTrunc) {
4265  Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4266  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4267  }
4268  Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4269  if (NeedTrunc)
4270  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4271  return ResultReg;
4272 }
4273 
4274 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4275  uint64_t Shift, bool IsZExt) {
4276  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4277  "Unexpected source/return type pair.");
4278  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4279  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4280  "Unexpected source value type.");
4281  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4282  RetVT == MVT::i64) && "Unexpected return value type.");
4283 
4284  bool Is64Bit = (RetVT == MVT::i64);
4285  unsigned RegSize = Is64Bit ? 64 : 32;
4286  unsigned DstBits = RetVT.getSizeInBits();
4287  unsigned SrcBits = SrcVT.getSizeInBits();
4288  const TargetRegisterClass *RC =
4289  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4290 
4291  // Just emit a copy for "zero" shifts.
4292  if (Shift == 0) {
4293  if (RetVT == SrcVT) {
4294  Register ResultReg = createResultReg(RC);
4295  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4296  TII.get(TargetOpcode::COPY), ResultReg)
4297  .addReg(Op0);
4298  return ResultReg;
4299  } else
4300  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4301  }
4302 
4303  // Don't deal with undefined shifts.
4304  if (Shift >= DstBits)
4305  return 0;
4306 
4307  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4308  // {S|U}BFM Wd, Wn, #r, #s
4309  // Wd<s-r:0> = Wn<s:r> when r <= s
4310 
4311  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4312  // %2 = ashr i16 %1, 4
4313  // Wd<7-4:0> = Wn<7:4>
4314  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4315  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4316  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4317 
4318  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4319  // %2 = ashr i16 %1, 8
4320  // Wd<7-7,0> = Wn<7:7>
4321  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4322  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4323  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4324 
4325  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4326  // %2 = ashr i16 %1, 12
4327  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4328  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4329  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4330  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4331 
4332  if (Shift >= SrcBits && IsZExt)
4333  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4334 
4335  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4336  unsigned ImmS = SrcBits - 1;
4337  static const unsigned OpcTable[2][2] = {
4338  {AArch64::SBFMWri, AArch64::SBFMXri},
4339  {AArch64::UBFMWri, AArch64::UBFMXri}
4340  };
4341  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4342  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4343  Register TmpReg = MRI.createVirtualRegister(RC);
4344  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4345  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4346  .addImm(0)
4347  .addReg(Op0)
4348  .addImm(AArch64::sub_32);
4349  Op0 = TmpReg;
4350  }
4351  return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4352 }
4353 
4354 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4355  bool IsZExt) {
4356  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4357 
4358  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4359  // DestVT are odd things, so test to make sure that they are both types we can
4360  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4361  // bail out to SelectionDAG.
4362  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4363  (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4364  ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4365  (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4366  return 0;
4367 
4368  unsigned Opc;
4369  unsigned Imm = 0;
4370 
4371  switch (SrcVT.SimpleTy) {
4372  default:
4373  return 0;
4374  case MVT::i1:
4375  return emiti1Ext(SrcReg, DestVT, IsZExt);
4376  case MVT::i8:
4377  if (DestVT == MVT::i64)
4378  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4379  else
4380  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4381  Imm = 7;
4382  break;
4383  case MVT::i16:
4384  if (DestVT == MVT::i64)
4385  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4386  else
4387  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4388  Imm = 15;
4389  break;
4390  case MVT::i32:
4391  assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4392  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4393  Imm = 31;
4394  break;
4395  }
4396 
4397  // Handle i8 and i16 as i32.
4398  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4399  DestVT = MVT::i32;
4400  else if (DestVT == MVT::i64) {
4401  Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4402  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4403  TII.get(AArch64::SUBREG_TO_REG), Src64)
4404  .addImm(0)
4405  .addReg(SrcReg)
4406  .addImm(AArch64::sub_32);
4407  SrcReg = Src64;
4408  }
4409 
4410  const TargetRegisterClass *RC =
4411  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4412  return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4413 }
4414 
4415 static bool isZExtLoad(const MachineInstr *LI) {
4416  switch (LI->getOpcode()) {
4417  default:
4418  return false;
4419  case AArch64::LDURBBi:
4420  case AArch64::LDURHHi:
4421  case AArch64::LDURWi:
4422  case AArch64::LDRBBui:
4423  case AArch64::LDRHHui:
4424  case AArch64::LDRWui:
4425  case AArch64::LDRBBroX:
4426  case AArch64::LDRHHroX:
4427  case AArch64::LDRWroX:
4428  case AArch64::LDRBBroW:
4429  case AArch64::LDRHHroW:
4430  case AArch64::LDRWroW:
4431  return true;
4432  }
4433 }
4434 
4435 static bool isSExtLoad(const MachineInstr *LI) {
4436  switch (LI->getOpcode()) {
4437  default:
4438  return false;
4439  case AArch64::LDURSBWi:
4440  case AArch64::LDURSHWi:
4441  case AArch64::LDURSBXi:
4442  case AArch64::LDURSHXi:
4443  case AArch64::LDURSWi:
4444  case AArch64::LDRSBWui:
4445  case AArch64::LDRSHWui:
4446  case AArch64::LDRSBXui:
4447  case AArch64::LDRSHXui:
4448  case AArch64::LDRSWui:
4449  case AArch64::LDRSBWroX:
4450  case AArch64::LDRSHWroX:
4451  case AArch64::LDRSBXroX:
4452  case AArch64::LDRSHXroX:
4453  case AArch64::LDRSWroX:
4454  case AArch64::LDRSBWroW:
4455  case AArch64::LDRSHWroW:
4456  case AArch64::LDRSBXroW:
4457  case AArch64::LDRSHXroW:
4458  case AArch64::LDRSWroW:
4459  return true;
4460  }
4461 }
4462 
4463 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4464  MVT SrcVT) {
4465  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4466  if (!LI || !LI->hasOneUse())
4467  return false;
4468 
4469  // Check if the load instruction has already been selected.
4470  Register Reg = lookUpRegForValue(LI);
4471  if (!Reg)
4472  return false;
4473 
4475  if (!MI)
4476  return false;
4477 
4478  // Check if the correct load instruction has been emitted - SelectionDAG might
4479  // have emitted a zero-extending load, but we need a sign-extending load.
4480  bool IsZExt = isa<ZExtInst>(I);
4481  const auto *LoadMI = MI;
4482  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4483  LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4484  Register LoadReg = MI->getOperand(1).getReg();
4485  LoadMI = MRI.getUniqueVRegDef(LoadReg);
4486  assert(LoadMI && "Expected valid instruction");
4487  }
4488  if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4489  return false;
4490 
4491  // Nothing to be done.
4492  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4493  updateValueMap(I, Reg);
4494  return true;
4495  }
4496 
4497  if (IsZExt) {
4498  Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4499  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4500  TII.get(AArch64::SUBREG_TO_REG), Reg64)
4501  .addImm(0)
4502  .addReg(Reg, getKillRegState(true))
4503  .addImm(AArch64::sub_32);
4504  Reg = Reg64;
4505  } else {
4506  assert((MI->getOpcode() == TargetOpcode::COPY &&
4507  MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4508  "Expected copy instruction");
4509  Reg = MI->getOperand(1).getReg();
4511  removeDeadCode(I, std::next(I));
4512  }
4513  updateValueMap(I, Reg);
4514  return true;
4515 }
4516 
4517 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4518  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4519  "Unexpected integer extend instruction.");
4520  MVT RetVT;
4521  MVT SrcVT;
4522  if (!isTypeSupported(I->getType(), RetVT))
4523  return false;
4524 
4525  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4526  return false;
4527 
4528  // Try to optimize already sign-/zero-extended values from load instructions.
4529  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4530  return true;
4531 
4532  Register SrcReg = getRegForValue(I->getOperand(0));
4533  if (!SrcReg)
4534  return false;
4535 
4536  // Try to optimize already sign-/zero-extended values from function arguments.
4537  bool IsZExt = isa<ZExtInst>(I);
4538  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4539  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4540  if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4541  Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4542  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4543  TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4544  .addImm(0)
4545  .addReg(SrcReg)
4546  .addImm(AArch64::sub_32);
4547  SrcReg = ResultReg;
4548  }
4549 
4550  updateValueMap(I, SrcReg);
4551  return true;
4552  }
4553  }
4554 
4555  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4556  if (!ResultReg)
4557  return false;
4558 
4559  updateValueMap(I, ResultReg);
4560  return true;
4561 }
4562 
4563 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4564  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4565  if (!DestEVT.isSimple())
4566  return false;
4567 
4568  MVT DestVT = DestEVT.getSimpleVT();
4569  if (DestVT != MVT::i64 && DestVT != MVT::i32)
4570  return false;
4571 
4572  unsigned DivOpc;
4573  bool Is64bit = (DestVT == MVT::i64);
4574  switch (ISDOpcode) {
4575  default:
4576  return false;
4577  case ISD::SREM:
4578  DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4579  break;
4580  case ISD::UREM:
4581  DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4582  break;
4583  }
4584  unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4585  Register Src0Reg = getRegForValue(I->getOperand(0));
4586  if (!Src0Reg)
4587  return false;
4588 
4589  Register Src1Reg = getRegForValue(I->getOperand(1));
4590  if (!Src1Reg)
4591  return false;
4592 
4593  const TargetRegisterClass *RC =
4594  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4595  Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4596  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4597  // The remainder is computed as numerator - (quotient * denominator) using the
4598  // MSUB instruction.
4599  Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4600  updateValueMap(I, ResultReg);
4601  return true;
4602 }
4603 
4604 bool AArch64FastISel::selectMul(const Instruction *I) {
4605  MVT VT;
4606  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4607  return false;
4608 
4609  if (VT.isVector())
4610  return selectBinaryOp(I, ISD::MUL);
4611 
4612  const Value *Src0 = I->getOperand(0);
4613  const Value *Src1 = I->getOperand(1);
4614  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4615  if (C->getValue().isPowerOf2())
4616  std::swap(Src0, Src1);
4617 
4618  // Try to simplify to a shift instruction.
4619  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4620  if (C->getValue().isPowerOf2()) {
4621  uint64_t ShiftVal = C->getValue().logBase2();
4622  MVT SrcVT = VT;
4623  bool IsZExt = true;
4624  if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4625  if (!isIntExtFree(ZExt)) {
4626  MVT VT;
4627  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4628  SrcVT = VT;
4629  IsZExt = true;
4630  Src0 = ZExt->getOperand(0);
4631  }
4632  }
4633  } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4634  if (!isIntExtFree(SExt)) {
4635  MVT VT;
4636  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4637  SrcVT = VT;
4638  IsZExt = false;
4639  Src0 = SExt->getOperand(0);
4640  }
4641  }
4642  }
4643 
4644  Register Src0Reg = getRegForValue(Src0);
4645  if (!Src0Reg)
4646  return false;
4647 
4648  unsigned ResultReg =
4649  emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4650 
4651  if (ResultReg) {
4652  updateValueMap(I, ResultReg);
4653  return true;
4654  }
4655  }
4656 
4657  Register Src0Reg = getRegForValue(I->getOperand(0));
4658  if (!Src0Reg)
4659  return false;
4660 
4661  Register Src1Reg = getRegForValue(I->getOperand(1));
4662  if (!Src1Reg)
4663  return false;
4664 
4665  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4666 
4667  if (!ResultReg)
4668  return false;
4669 
4670  updateValueMap(I, ResultReg);
4671  return true;
4672 }
4673 
4674 bool AArch64FastISel::selectShift(const Instruction *I) {
4675  MVT RetVT;
4676  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4677  return false;
4678 
4679  if (RetVT.isVector())
4680  return selectOperator(I, I->getOpcode());
4681 
4682  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4683  unsigned ResultReg = 0;
4684  uint64_t ShiftVal = C->getZExtValue();
4685  MVT SrcVT = RetVT;
4686  bool IsZExt = I->getOpcode() != Instruction::AShr;
4687  const Value *Op0 = I->getOperand(0);
4688  if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4689  if (!isIntExtFree(ZExt)) {
4690  MVT TmpVT;
4691  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4692  SrcVT = TmpVT;
4693  IsZExt = true;
4694  Op0 = ZExt->getOperand(0);
4695  }
4696  }
4697  } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4698  if (!isIntExtFree(SExt)) {
4699  MVT TmpVT;
4700  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4701  SrcVT = TmpVT;
4702  IsZExt = false;
4703  Op0 = SExt->getOperand(0);
4704  }
4705  }
4706  }
4707 
4708  Register Op0Reg = getRegForValue(Op0);
4709  if (!Op0Reg)
4710  return false;
4711 
4712  switch (I->getOpcode()) {
4713  default: llvm_unreachable("Unexpected instruction.");
4714  case Instruction::Shl:
4715  ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4716  break;
4717  case Instruction::AShr:
4718  ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4719  break;
4720  case Instruction::LShr:
4721  ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4722  break;
4723  }
4724  if (!ResultReg)
4725  return false;
4726 
4727  updateValueMap(I, ResultReg);
4728  return true;
4729  }
4730 
4731  Register Op0Reg = getRegForValue(I->getOperand(0));
4732  if (!Op0Reg)
4733  return false;
4734 
4735  Register Op1Reg = getRegForValue(I->getOperand(1));
4736  if (!Op1Reg)
4737  return false;
4738 
4739  unsigned ResultReg = 0;
4740  switch (I->getOpcode()) {
4741  default: llvm_unreachable("Unexpected instruction.");
4742  case Instruction::Shl:
4743  ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4744  break;
4745  case Instruction::AShr:
4746  ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4747  break;
4748  case Instruction::LShr:
4749  ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4750  break;
4751  }
4752 
4753  if (!ResultReg)
4754  return false;
4755 
4756  updateValueMap(I, ResultReg);
4757  return true;
4758 }
4759 
4760 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4761  MVT RetVT, SrcVT;
4762 
4763  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4764  return false;
4765  if (!isTypeLegal(I->getType(), RetVT))
4766  return false;
4767 
4768  unsigned Opc;
4769  if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4770  Opc = AArch64::FMOVWSr;
4771  else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4772  Opc = AArch64::FMOVXDr;
4773  else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4774  Opc = AArch64::FMOVSWr;
4775  else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4776  Opc = AArch64::FMOVDXr;
4777  else
4778  return false;
4779 
4780  const TargetRegisterClass *RC = nullptr;
4781  switch (RetVT.SimpleTy) {
4782  default: llvm_unreachable("Unexpected value type.");
4783  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4784  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4785  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4786  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4787  }
4788  Register Op0Reg = getRegForValue(I->getOperand(0));
4789  if (!Op0Reg)
4790  return false;
4791 
4792  Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4793  if (!ResultReg)
4794  return false;
4795 
4796  updateValueMap(I, ResultReg);
4797  return true;
4798 }
4799 
4800 bool AArch64FastISel::selectFRem(const Instruction *I) {
4801  MVT RetVT;
4802  if (!isTypeLegal(I->getType(), RetVT))
4803  return false;
4804 
4805  RTLIB::Libcall LC;
4806  switch (RetVT.SimpleTy) {
4807  default:
4808  return false;
4809  case MVT::f32:
4810  LC = RTLIB::REM_F32;
4811  break;
4812  case MVT::f64:
4813  LC = RTLIB::REM_F64;
4814  break;
4815  }
4816 
4817  ArgListTy Args;
4818  Args.reserve(I->getNumOperands());
4819 
4820  // Populate the argument list.
4821  for (auto &Arg : I->operands()) {
4822  ArgListEntry Entry;
4823  Entry.Val = Arg;
4824  Entry.Ty = Arg->getType();
4825  Args.push_back(Entry);
4826  }
4827 
4828  CallLoweringInfo CLI;
4829  MCContext &Ctx = MF->getContext();
4830  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4831  TLI.getLibcallName(LC), std::move(Args));
4832  if (!lowerCallTo(CLI))
4833  return false;
4834  updateValueMap(I, CLI.ResultReg);
4835  return true;
4836 }
4837 
4838 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4839  MVT VT;
4840  if (!isTypeLegal(I->getType(), VT))
4841  return false;
4842 
4843  if (!isa<ConstantInt>(I->getOperand(1)))
4844  return selectBinaryOp(I, ISD::SDIV);
4845 
4846  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4847  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4848  !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4849  return selectBinaryOp(I, ISD::SDIV);
4850 
4851  unsigned Lg2 = C.countTrailingZeros();
4852  Register Src0Reg = getRegForValue(I->getOperand(0));
4853  if (!Src0Reg)
4854  return false;
4855 
4856  if (cast<BinaryOperator>(I)->isExact()) {
4857  unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4858  if (!ResultReg)
4859  return false;
4860  updateValueMap(I, ResultReg);
4861  return true;
4862  }
4863 
4864  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4865  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4866  if (!AddReg)
4867  return false;
4868 
4869  // (Src0 < 0) ? Pow2 - 1 : 0;
4870  if (!emitICmp_ri(VT, Src0Reg, 0))
4871  return false;
4872 
4873  unsigned SelectOpc;
4874  const TargetRegisterClass *RC;
4875  if (VT == MVT::i64) {
4876  SelectOpc = AArch64::CSELXr;
4877  RC = &AArch64::GPR64RegClass;
4878  } else {
4879  SelectOpc = AArch64::CSELWr;
4880  RC = &AArch64::GPR32RegClass;
4881  }
4882  Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4883  AArch64CC::LT);
4884  if (!SelectReg)
4885  return false;
4886 
4887  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4888  // negate the result.
4889  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4890  unsigned ResultReg;
4891  if (C.isNegative())
4892  ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4893  AArch64_AM::ASR, Lg2);
4894  else
4895  ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4896 
4897  if (!ResultReg)
4898  return false;
4899 
4900  updateValueMap(I, ResultReg);
4901  return true;
4902 }
4903 
4904 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4905 /// have to duplicate it for AArch64, because otherwise we would fail during the
4906 /// sign-extend emission.
4907 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4908  Register IdxN = getRegForValue(Idx);
4909  if (IdxN == 0)
4910  // Unhandled operand. Halt "fast" selection and bail.
4911  return 0;
4912 
4913  // If the index is smaller or larger than intptr_t, truncate or extend it.
4914  MVT PtrVT = TLI.getPointerTy(DL);
4915  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4916  if (IdxVT.bitsLT(PtrVT)) {
4917  IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4918  } else if (IdxVT.bitsGT(PtrVT))
4919  llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4920  return IdxN;
4921 }
4922 
4923 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4924 /// duplicate it for AArch64, because otherwise we would bail out even for
4925 /// simple cases. This is because the standard fastEmit functions don't cover
4926 /// MUL at all and ADD is lowered very inefficientily.
4927 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4928  if (Subtarget->isTargetILP32())
4929  return false;
4930 
4931  Register N = getRegForValue(I->getOperand(0));
4932  if (!N)
4933  return false;
4934 
4935  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4936  // into a single N = N + TotalOffset.
4937  uint64_t TotalOffs = 0;
4938  MVT VT = TLI.getPointerTy(DL);
4940  GTI != E; ++GTI) {
4941  const Value *Idx = GTI.getOperand();
4942  if (auto *StTy = GTI.getStructTypeOrNull()) {
4943  unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4944  // N = N + Offset
4945  if (Field)
4946  TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4947  } else {
4948  Type *Ty = GTI.getIndexedType();
4949 
4950  // If this is a constant subscript, handle it quickly.
4951  if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4952  if (CI->isZero())
4953  continue;
4954  // N = N + Offset
4955  TotalOffs +=
4956  DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4957  continue;
4958  }
4959  if (TotalOffs) {
4960  N = emitAdd_ri_(VT, N, TotalOffs);
4961  if (!N)
4962  return false;
4963  TotalOffs = 0;
4964  }
4965 
4966  // N = N + Idx * ElementSize;
4967  uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4968  unsigned IdxN = getRegForGEPIndex(Idx);
4969  if (!IdxN)
4970  return false;
4971 
4972  if (ElementSize != 1) {
4973  unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4974  if (!C)
4975  return false;
4976  IdxN = emitMul_rr(VT, IdxN, C);
4977  if (!IdxN)
4978  return false;
4979  }
4980  N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
4981  if (!N)
4982  return false;
4983  }
4984  }
4985  if (TotalOffs) {
4986  N = emitAdd_ri_(VT, N, TotalOffs);
4987  if (!N)
4988  return false;
4989  }
4990  updateValueMap(I, N);
4991  return true;
4992 }
4993 
4994 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4995  assert(TM.getOptLevel() == CodeGenOpt::None &&
4996  "cmpxchg survived AtomicExpand at optlevel > -O0");
4997 
4998  auto *RetPairTy = cast<StructType>(I->getType());
4999  Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5000  assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5001  "cmpxchg has a non-i1 status result");
5002 
5003  MVT VT;
5004  if (!isTypeLegal(RetTy, VT))
5005  return false;
5006 
5007  const TargetRegisterClass *ResRC;
5008  unsigned Opc, CmpOpc;
5009  // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5010  // extractvalue selection doesn't support that.
5011  if (VT == MVT::i32) {
5012  Opc = AArch64::CMP_SWAP_32;
5013  CmpOpc = AArch64::SUBSWrs;
5014  ResRC = &AArch64::GPR32RegClass;
5015  } else if (VT == MVT::i64) {
5016  Opc = AArch64::CMP_SWAP_64;
5017  CmpOpc = AArch64::SUBSXrs;
5018  ResRC = &AArch64::GPR64RegClass;
5019  } else {
5020  return false;
5021  }
5022 
5023  const MCInstrDesc &II = TII.get(Opc);
5024 
5025  const Register AddrReg = constrainOperandRegClass(
5026  II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5027  const Register DesiredReg = constrainOperandRegClass(
5028  II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5029  const Register NewReg = constrainOperandRegClass(
5030  II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5031 
5032  const Register ResultReg1 = createResultReg(ResRC);
5033  const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5034  const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5035 
5036  // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5037  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5038  .addDef(ResultReg1)
5039  .addDef(ScratchReg)
5040  .addUse(AddrReg)
5041  .addUse(DesiredReg)
5042  .addUse(NewReg);
5043 
5044  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5045  .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5046  .addUse(ResultReg1)
5047  .addUse(DesiredReg)
5048  .addImm(0);
5049 
5050  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5051  .addDef(ResultReg2)
5052  .addUse(AArch64::WZR)
5053  .addUse(AArch64::WZR)
5055 
5056  assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5057  updateValueMap(I, ResultReg1, 2);
5058  return true;
5059 }
5060 
5061 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5062  if (TLI.fallBackToDAGISel(*I))
5063  return false;
5064  switch (I->getOpcode()) {
5065  default:
5066  break;
5067  case Instruction::Add:
5068  case Instruction::Sub:
5069  return selectAddSub(I);
5070  case Instruction::Mul:
5071  return selectMul(I);
5072  case Instruction::SDiv:
5073  return selectSDiv(I);
5074  case Instruction::SRem:
5075  if (!selectBinaryOp(I, ISD::SREM))
5076  return selectRem(I, ISD::SREM);
5077  return true;
5078  case Instruction::URem:
5079  if (!selectBinaryOp(I, ISD::UREM))
5080  return selectRem(I, ISD::UREM);
5081  return true;
5082  case Instruction::Shl:
5083  case Instruction::LShr:
5084  case Instruction::AShr:
5085  return selectShift(I);
5086  case Instruction::And:
5087  case Instruction::Or:
5088  case Instruction::Xor:
5089  return selectLogicalOp(I);
5090  case Instruction::Br:
5091  return selectBranch(I);
5092  case Instruction::IndirectBr:
5093  return selectIndirectBr(I);
5094  case Instruction::BitCast:
5095  if (!FastISel::selectBitCast(I))
5096  return selectBitCast(I);
5097  return true;
5098  case Instruction::FPToSI:
5099  if (!selectCast(I, ISD::FP_TO_SINT))
5100  return selectFPToInt(I, /*Signed=*/true);
5101  return true;
5102  case Instruction::FPToUI:
5103  return selectFPToInt(I, /*Signed=*/false);
5104  case Instruction::ZExt:
5105  case Instruction::SExt:
5106  return selectIntExt(I);
5107  case Instruction::Trunc:
5108  if (!selectCast(I, ISD::TRUNCATE))
5109  return selectTrunc(I);
5110  return true;
5111  case Instruction::FPExt:
5112  return selectFPExt(I);
5113  case Instruction::FPTrunc:
5114  return selectFPTrunc(I);
5115  case Instruction::SIToFP:
5116  if (!selectCast(I, ISD::SINT_TO_FP))
5117  return selectIntToFP(I, /*Signed=*/true);
5118  return true;
5119  case Instruction::UIToFP:
5120  return selectIntToFP(I, /*Signed=*/false);
5121  case Instruction::Load:
5122  return selectLoad(I);
5123  case Instruction::Store:
5124  return selectStore(I);
5125  case Instruction::FCmp:
5126  case Instruction::ICmp:
5127  return selectCmp(I);
5128  case Instruction::Select:
5129  return selectSelect(I);
5130  case Instruction::Ret:
5131  return selectRet(I);
5132  case Instruction::FRem:
5133  return selectFRem(I);
5134  case Instruction::GetElementPtr:
5135  return selectGetElementPtr(I);
5136  case Instruction::AtomicCmpXchg:
5137  return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5138  }
5139 
5140  // fall-back to target-independent instruction selection.
5141  return selectOperator(I, I->getOpcode());
5142 }
5143 
5145  const TargetLibraryInfo *LibInfo) {
5146 
5147  SMEAttrs CallerAttrs(*FuncInfo.Fn);
5148  if (CallerAttrs.hasZAState() ||
5149  (!CallerAttrs.hasStreamingInterface() && CallerAttrs.hasStreamingBody()))
5150  return nullptr;
5151  return new AArch64FastISel(FuncInfo, LibInfo);
5152 }
llvm::FunctionLoweringInfo::Fn
const Function * Fn
Definition: FunctionLoweringInfo.h:54
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:247
llvm::CmpInst::FCMP_ULE
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:733
llvm::AArch64II::MO_G3
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
Definition: AArch64BaseInfo.h:749
llvm::CCValAssign::ZExt
@ ZExt
Definition: CallingConvLower.h:36
ValueTypes.h
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::SMEAttrs::hasZAState
bool hasZAState() const
Definition: AArch64SMEAttributes.h:79
llvm::AArch64CC::Invalid
@ Invalid
Definition: AArch64BaseInfo.h:272
AArch64RegisterInfo.h
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:295
llvm::SMEAttrs::hasStreamingInterface
bool hasStreamingInterface() const
Definition: AArch64SMEAttributes.h:49
llvm::AArch64CC::LO
@ LO
Definition: AArch64BaseInfo.h:258
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4884
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1148
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:109
llvm::AArch64CC::HI
@ HI
Definition: AArch64BaseInfo.h:263
MachineInstr.h
MathExtras.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
AArch64MachineFunctionInfo.h
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
llvm::AArch64CC::AL
@ AL
Definition: AArch64BaseInfo.h:269
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:739
AArch64.h
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:3057
llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:256
llvm::CCValAssign::Full
@ Full
Definition: CallingConvLower.h:34
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::AArch64_AM::LSL
@ LSL
Definition: AArch64AddressingModes.h:35
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:157
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
optimizeSelect
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing optimizeSelect
Definition: README.txt:81
llvm::EVT::getVectorElementCount
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:322
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:249
AtomicOrdering.h
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:168
MCInstrDesc.h
llvm::AArch64CC::MI
@ MI
Definition: AArch64BaseInfo.h:259
RegSize
unsigned RegSize
Definition: AArch64MIPeepholeOpt.cpp:123
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:76
llvm::Function
Definition: Function.h:59
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
GetElementPtrTypeIterator.h
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:149
llvm::AllocaInst::getType
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:100
llvm::MemIntrinsicBase::getDestAddressSpace
unsigned getDestAddressSpace() const
Definition: IntrinsicInst.h:835
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:386
llvm::CmpInst::FCMP_ONE
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:726
ErrorHandling.h
llvm::getBLRCallOpcode
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opco