LLVM  14.0.0git
AArch64FastISel.cpp
Go to the documentation of this file.
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
20 #include "Utils/AArch64BaseInfo.h"
21 #include "llvm/ADT/APFloat.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/CodeGen/FastISel.h"
39 #include "llvm/IR/Argument.h"
40 #include "llvm/IR/Attributes.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/Constant.h"
44 #include "llvm/IR/Constants.h"
45 #include "llvm/IR/DataLayout.h"
46 #include "llvm/IR/DerivedTypes.h"
47 #include "llvm/IR/Function.h"
49 #include "llvm/IR/GlobalValue.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/IntrinsicInst.h"
54 #include "llvm/IR/Intrinsics.h"
55 #include "llvm/IR/Operator.h"
56 #include "llvm/IR/Type.h"
57 #include "llvm/IR/User.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/MC/MCInstrDesc.h"
60 #include "llvm/MC/MCRegisterInfo.h"
61 #include "llvm/MC/MCSymbol.h"
63 #include "llvm/Support/Casting.h"
64 #include "llvm/Support/CodeGen.h"
65 #include "llvm/Support/Compiler.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <iterator>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 namespace {
78 
79 class AArch64FastISel final : public FastISel {
80  class Address {
81  public:
82  using BaseKind = enum {
83  RegBase,
84  FrameIndexBase
85  };
86 
87  private:
88  BaseKind Kind = RegBase;
90  union {
91  unsigned Reg;
92  int FI;
93  } Base;
94  unsigned OffsetReg = 0;
95  unsigned Shift = 0;
96  int64_t Offset = 0;
97  const GlobalValue *GV = nullptr;
98 
99  public:
100  Address() { Base.Reg = 0; }
101 
102  void setKind(BaseKind K) { Kind = K; }
103  BaseKind getKind() const { return Kind; }
104  void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
105  AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
106  bool isRegBase() const { return Kind == RegBase; }
107  bool isFIBase() const { return Kind == FrameIndexBase; }
108 
109  void setReg(unsigned Reg) {
110  assert(isRegBase() && "Invalid base register access!");
111  Base.Reg = Reg;
112  }
113 
114  unsigned getReg() const {
115  assert(isRegBase() && "Invalid base register access!");
116  return Base.Reg;
117  }
118 
119  void setOffsetReg(unsigned Reg) {
120  OffsetReg = Reg;
121  }
122 
123  unsigned getOffsetReg() const {
124  return OffsetReg;
125  }
126 
127  void setFI(unsigned FI) {
128  assert(isFIBase() && "Invalid base frame index access!");
129  Base.FI = FI;
130  }
131 
132  unsigned getFI() const {
133  assert(isFIBase() && "Invalid base frame index access!");
134  return Base.FI;
135  }
136 
137  void setOffset(int64_t O) { Offset = O; }
138  int64_t getOffset() { return Offset; }
139  void setShift(unsigned S) { Shift = S; }
140  unsigned getShift() { return Shift; }
141 
142  void setGlobalValue(const GlobalValue *G) { GV = G; }
143  const GlobalValue *getGlobalValue() { return GV; }
144  };
145 
146  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
147  /// make the right decision when generating code for different targets.
148  const AArch64Subtarget *Subtarget;
150 
151  bool fastLowerArguments() override;
152  bool fastLowerCall(CallLoweringInfo &CLI) override;
153  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
154 
155 private:
156  // Selection routines.
157  bool selectAddSub(const Instruction *I);
158  bool selectLogicalOp(const Instruction *I);
159  bool selectLoad(const Instruction *I);
160  bool selectStore(const Instruction *I);
161  bool selectBranch(const Instruction *I);
162  bool selectIndirectBr(const Instruction *I);
163  bool selectCmp(const Instruction *I);
164  bool selectSelect(const Instruction *I);
165  bool selectFPExt(const Instruction *I);
166  bool selectFPTrunc(const Instruction *I);
167  bool selectFPToInt(const Instruction *I, bool Signed);
168  bool selectIntToFP(const Instruction *I, bool Signed);
169  bool selectRem(const Instruction *I, unsigned ISDOpcode);
170  bool selectRet(const Instruction *I);
171  bool selectTrunc(const Instruction *I);
172  bool selectIntExt(const Instruction *I);
173  bool selectMul(const Instruction *I);
174  bool selectShift(const Instruction *I);
175  bool selectBitCast(const Instruction *I);
176  bool selectFRem(const Instruction *I);
177  bool selectSDiv(const Instruction *I);
178  bool selectGetElementPtr(const Instruction *I);
179  bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
180 
181  // Utility helper routines.
182  bool isTypeLegal(Type *Ty, MVT &VT);
183  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
184  bool isValueAvailable(const Value *V) const;
185  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
186  bool computeCallAddress(const Value *V, Address &Addr);
187  bool simplifyAddress(Address &Addr, MVT VT);
188  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
190  unsigned ScaleFactor, MachineMemOperand *MMO);
191  bool isMemCpySmall(uint64_t Len, unsigned Alignment);
192  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
193  unsigned Alignment);
194  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
195  const Value *Cond);
196  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
197  bool optimizeSelect(const SelectInst *SI);
198  unsigned getRegForGEPIndex(const Value *Idx);
199 
200  // Emit helper routines.
201  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
202  const Value *RHS, bool SetFlags = false,
203  bool WantResult = true, bool IsZExt = false);
204  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
205  unsigned RHSReg, bool SetFlags = false,
206  bool WantResult = true);
207  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
208  uint64_t Imm, bool SetFlags = false,
209  bool WantResult = true);
210  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
211  unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
212  uint64_t ShiftImm, bool SetFlags = false,
213  bool WantResult = true);
214  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
215  unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
216  uint64_t ShiftImm, bool SetFlags = false,
217  bool WantResult = true);
218 
219  // Emit functions.
220  bool emitCompareAndBranch(const BranchInst *BI);
221  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
222  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
223  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
224  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
225  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
226  MachineMemOperand *MMO = nullptr);
227  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
228  MachineMemOperand *MMO = nullptr);
229  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
230  MachineMemOperand *MMO = nullptr);
231  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
232  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
233  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
234  bool SetFlags = false, bool WantResult = true,
235  bool IsZExt = false);
236  unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
237  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
238  bool SetFlags = false, bool WantResult = true,
239  bool IsZExt = false);
240  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
241  bool WantResult = true);
242  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
243  AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
244  bool WantResult = true);
245  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
246  const Value *RHS);
247  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
248  uint64_t Imm);
249  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
250  unsigned RHSReg, uint64_t ShiftImm);
251  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
252  unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
253  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
254  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
256  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
257  bool IsZExt = true);
258  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
259  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
260  bool IsZExt = true);
261  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
262  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
263  bool IsZExt = false);
264 
265  unsigned materializeInt(const ConstantInt *CI, MVT VT);
266  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
267  unsigned materializeGV(const GlobalValue *GV);
268 
269  // Call handling routines.
270 private:
271  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
272  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
273  unsigned &NumBytes);
274  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
275 
276 public:
277  // Backend specific FastISel code.
278  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
279  unsigned fastMaterializeConstant(const Constant *C) override;
280  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
281 
282  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
283  const TargetLibraryInfo *LibInfo)
284  : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
285  Subtarget =
286  &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
287  Context = &FuncInfo.Fn->getContext();
288  }
289 
290  bool fastSelectInstruction(const Instruction *I) override;
291 
292 #include "AArch64GenFastISel.inc"
293 };
294 
295 } // end anonymous namespace
296 
297 /// Check if the sign-/zero-extend will be a noop.
298 static bool isIntExtFree(const Instruction *I) {
299  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
300  "Unexpected integer extend instruction.");
301  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
302  "Unexpected value type.");
303  bool IsZExt = isa<ZExtInst>(I);
304 
305  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
306  if (LI->hasOneUse())
307  return true;
308 
309  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
310  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
311  return true;
312 
313  return false;
314 }
315 
316 /// Determine the implicit scale factor that is applied by a memory
317 /// operation for a given value type.
318 static unsigned getImplicitScaleFactor(MVT VT) {
319  switch (VT.SimpleTy) {
320  default:
321  return 0; // invalid
322  case MVT::i1: // fall-through
323  case MVT::i8:
324  return 1;
325  case MVT::i16:
326  return 2;
327  case MVT::i32: // fall-through
328  case MVT::f32:
329  return 4;
330  case MVT::i64: // fall-through
331  case MVT::f64:
332  return 8;
333  }
334 }
335 
336 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
337  if (CC == CallingConv::WebKit_JS)
338  return CC_AArch64_WebKit_JS;
339  if (CC == CallingConv::GHC)
340  return CC_AArch64_GHC;
341  if (CC == CallingConv::CFGuard_Check)
343  return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
344 }
345 
346 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
347  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
348  "Alloca should always return a pointer.");
349 
350  // Don't handle dynamic allocas.
351  if (!FuncInfo.StaticAllocaMap.count(AI))
352  return 0;
353 
355  FuncInfo.StaticAllocaMap.find(AI);
356 
357  if (SI != FuncInfo.StaticAllocaMap.end()) {
358  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
359  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
360  ResultReg)
361  .addFrameIndex(SI->second)
362  .addImm(0)
363  .addImm(0);
364  return ResultReg;
365  }
366 
367  return 0;
368 }
369 
370 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
371  if (VT > MVT::i64)
372  return 0;
373 
374  if (!CI->isZero())
375  return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
376 
377  // Create a copy from the zero register to materialize a "0" value.
378  const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
379  : &AArch64::GPR32RegClass;
380  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
381  unsigned ResultReg = createResultReg(RC);
382  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
383  ResultReg).addReg(ZeroReg, getKillRegState(true));
384  return ResultReg;
385 }
386 
387 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
388  // Positive zero (+0.0) has to be materialized with a fmov from the zero
389  // register, because the immediate version of fmov cannot encode zero.
390  if (CFP->isNullValue())
391  return fastMaterializeFloatZero(CFP);
392 
393  if (VT != MVT::f32 && VT != MVT::f64)
394  return 0;
395 
396  const APFloat Val = CFP->getValueAPF();
397  bool Is64Bit = (VT == MVT::f64);
398  // This checks to see if we can use FMOV instructions to materialize
399  // a constant, otherwise we have to materialize via the constant pool.
400  int Imm =
401  Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
402  if (Imm != -1) {
403  unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
404  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
405  }
406 
407  // For the large code model materialize the FP constant in code.
408  if (TM.getCodeModel() == CodeModel::Large) {
409  unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
410  const TargetRegisterClass *RC = Is64Bit ?
411  &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
412 
413  unsigned TmpReg = createResultReg(RC);
414  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
416 
417  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
418  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
419  TII.get(TargetOpcode::COPY), ResultReg)
420  .addReg(TmpReg, getKillRegState(true));
421 
422  return ResultReg;
423  }
424 
425  // Materialize via constant pool. MachineConstantPool wants an explicit
426  // alignment.
427  Align Alignment = DL.getPrefTypeAlign(CFP->getType());
428 
429  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
430  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
431  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
432  ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
433 
434  unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
435  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
436  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
437  .addReg(ADRPReg)
439  return ResultReg;
440 }
441 
442 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
443  // We can't handle thread-local variables quickly yet.
444  if (GV->isThreadLocal())
445  return 0;
446 
447  // MachO still uses GOT for large code-model accesses, but ELF requires
448  // movz/movk sequences, which FastISel doesn't handle yet.
449  if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
450  return 0;
451 
452  unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
453 
454  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
455  if (!DestEVT.isSimple())
456  return 0;
457 
458  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
459  unsigned ResultReg;
460 
461  if (OpFlags & AArch64II::MO_GOT) {
462  // ADRP + LDRX
463  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
464  ADRPReg)
465  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
466 
467  unsigned LdrOpc;
468  if (Subtarget->isTargetILP32()) {
469  ResultReg = createResultReg(&AArch64::GPR32RegClass);
470  LdrOpc = AArch64::LDRWui;
471  } else {
472  ResultReg = createResultReg(&AArch64::GPR64RegClass);
473  LdrOpc = AArch64::LDRXui;
474  }
475  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc),
476  ResultReg)
477  .addReg(ADRPReg)
479  AArch64II::MO_NC | OpFlags);
480  if (!Subtarget->isTargetILP32())
481  return ResultReg;
482 
483  // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
484  // so we must extend the result on ILP32.
485  unsigned Result64 = createResultReg(&AArch64::GPR64RegClass);
486  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
487  TII.get(TargetOpcode::SUBREG_TO_REG))
488  .addDef(Result64)
489  .addImm(0)
490  .addReg(ResultReg, RegState::Kill)
491  .addImm(AArch64::sub_32);
492  return Result64;
493  } else {
494  // ADRP + ADDX
495  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
496  ADRPReg)
497  .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
498 
499  ResultReg = createResultReg(&AArch64::GPR64spRegClass);
500  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
501  ResultReg)
502  .addReg(ADRPReg)
503  .addGlobalAddress(GV, 0,
505  .addImm(0);
506  }
507  return ResultReg;
508 }
509 
510 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
511  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
512 
513  // Only handle simple types.
514  if (!CEVT.isSimple())
515  return 0;
516  MVT VT = CEVT.getSimpleVT();
517  // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
518  // 'null' pointers need to have a somewhat special treatment.
519  if (isa<ConstantPointerNull>(C)) {
520  assert(VT == MVT::i64 && "Expected 64-bit pointers");
521  return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
522  }
523 
524  if (const auto *CI = dyn_cast<ConstantInt>(C))
525  return materializeInt(CI, VT);
526  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
527  return materializeFP(CFP, VT);
528  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
529  return materializeGV(GV);
530 
531  return 0;
532 }
533 
534 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
535  assert(CFP->isNullValue() &&
536  "Floating-point constant is not a positive zero.");
537  MVT VT;
538  if (!isTypeLegal(CFP->getType(), VT))
539  return 0;
540 
541  if (VT != MVT::f32 && VT != MVT::f64)
542  return 0;
543 
544  bool Is64Bit = (VT == MVT::f64);
545  unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
546  unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
547  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
548 }
549 
550 /// Check if the multiply is by a power-of-2 constant.
551 static bool isMulPowOf2(const Value *I) {
552  if (const auto *MI = dyn_cast<MulOperator>(I)) {
553  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
554  if (C->getValue().isPowerOf2())
555  return true;
556  if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
557  if (C->getValue().isPowerOf2())
558  return true;
559  }
560  return false;
561 }
562 
563 // Computes the address to get to an object.
564 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
565 {
566  const User *U = nullptr;
567  unsigned Opcode = Instruction::UserOp1;
568  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
569  // Don't walk into other basic blocks unless the object is an alloca from
570  // another block, otherwise it may not have a virtual register assigned.
571  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
572  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
573  Opcode = I->getOpcode();
574  U = I;
575  }
576  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
577  Opcode = C->getOpcode();
578  U = C;
579  }
580 
581  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
582  if (Ty->getAddressSpace() > 255)
583  // Fast instruction selection doesn't support the special
584  // address spaces.
585  return false;
586 
587  switch (Opcode) {
588  default:
589  break;
590  case Instruction::BitCast:
591  // Look through bitcasts.
592  return computeAddress(U->getOperand(0), Addr, Ty);
593 
594  case Instruction::IntToPtr:
595  // Look past no-op inttoptrs.
596  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
597  TLI.getPointerTy(DL))
598  return computeAddress(U->getOperand(0), Addr, Ty);
599  break;
600 
601  case Instruction::PtrToInt:
602  // Look past no-op ptrtoints.
603  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
604  return computeAddress(U->getOperand(0), Addr, Ty);
605  break;
606 
607  case Instruction::GetElementPtr: {
608  Address SavedAddr = Addr;
609  uint64_t TmpOffset = Addr.getOffset();
610 
611  // Iterate through the GEP folding the constants into offsets where
612  // we can.
613  for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
614  GTI != E; ++GTI) {
615  const Value *Op = GTI.getOperand();
616  if (StructType *STy = GTI.getStructTypeOrNull()) {
617  const StructLayout *SL = DL.getStructLayout(STy);
618  unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
619  TmpOffset += SL->getElementOffset(Idx);
620  } else {
621  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
622  while (true) {
623  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
624  // Constant-offset addressing.
625  TmpOffset += CI->getSExtValue() * S;
626  break;
627  }
628  if (canFoldAddIntoGEP(U, Op)) {
629  // A compatible add with a constant operand. Fold the constant.
630  ConstantInt *CI =
631  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
632  TmpOffset += CI->getSExtValue() * S;
633  // Iterate on the other operand.
634  Op = cast<AddOperator>(Op)->getOperand(0);
635  continue;
636  }
637  // Unsupported
638  goto unsupported_gep;
639  }
640  }
641  }
642 
643  // Try to grab the base operand now.
644  Addr.setOffset(TmpOffset);
645  if (computeAddress(U->getOperand(0), Addr, Ty))
646  return true;
647 
648  // We failed, restore everything and try the other options.
649  Addr = SavedAddr;
650 
651  unsupported_gep:
652  break;
653  }
654  case Instruction::Alloca: {
655  const AllocaInst *AI = cast<AllocaInst>(Obj);
657  FuncInfo.StaticAllocaMap.find(AI);
658  if (SI != FuncInfo.StaticAllocaMap.end()) {
659  Addr.setKind(Address::FrameIndexBase);
660  Addr.setFI(SI->second);
661  return true;
662  }
663  break;
664  }
665  case Instruction::Add: {
666  // Adds of constants are common and easy enough.
667  const Value *LHS = U->getOperand(0);
668  const Value *RHS = U->getOperand(1);
669 
670  if (isa<ConstantInt>(LHS))
671  std::swap(LHS, RHS);
672 
673  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
674  Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
675  return computeAddress(LHS, Addr, Ty);
676  }
677 
678  Address Backup = Addr;
679  if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
680  return true;
681  Addr = Backup;
682 
683  break;
684  }
685  case Instruction::Sub: {
686  // Subs of constants are common and easy enough.
687  const Value *LHS = U->getOperand(0);
688  const Value *RHS = U->getOperand(1);
689 
690  if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
691  Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
692  return computeAddress(LHS, Addr, Ty);
693  }
694  break;
695  }
696  case Instruction::Shl: {
697  if (Addr.getOffsetReg())
698  break;
699 
700  const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
701  if (!CI)
702  break;
703 
704  unsigned Val = CI->getZExtValue();
705  if (Val < 1 || Val > 3)
706  break;
707 
708  uint64_t NumBytes = 0;
709  if (Ty && Ty->isSized()) {
710  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
711  NumBytes = NumBits / 8;
712  if (!isPowerOf2_64(NumBits))
713  NumBytes = 0;
714  }
715 
716  if (NumBytes != (1ULL << Val))
717  break;
718 
719  Addr.setShift(Val);
720  Addr.setExtendType(AArch64_AM::LSL);
721 
722  const Value *Src = U->getOperand(0);
723  if (const auto *I = dyn_cast<Instruction>(Src)) {
724  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
725  // Fold the zext or sext when it won't become a noop.
726  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
727  if (!isIntExtFree(ZE) &&
728  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
729  Addr.setExtendType(AArch64_AM::UXTW);
730  Src = ZE->getOperand(0);
731  }
732  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
733  if (!isIntExtFree(SE) &&
734  SE->getOperand(0)->getType()->isIntegerTy(32)) {
735  Addr.setExtendType(AArch64_AM::SXTW);
736  Src = SE->getOperand(0);
737  }
738  }
739  }
740  }
741 
742  if (const auto *AI = dyn_cast<BinaryOperator>(Src))
743  if (AI->getOpcode() == Instruction::And) {
744  const Value *LHS = AI->getOperand(0);
745  const Value *RHS = AI->getOperand(1);
746 
747  if (const auto *C = dyn_cast<ConstantInt>(LHS))
748  if (C->getValue() == 0xffffffff)
749  std::swap(LHS, RHS);
750 
751  if (const auto *C = dyn_cast<ConstantInt>(RHS))
752  if (C->getValue() == 0xffffffff) {
753  Addr.setExtendType(AArch64_AM::UXTW);
754  unsigned Reg = getRegForValue(LHS);
755  if (!Reg)
756  return false;
757  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
758  Addr.setOffsetReg(Reg);
759  return true;
760  }
761  }
762 
763  unsigned Reg = getRegForValue(Src);
764  if (!Reg)
765  return false;
766  Addr.setOffsetReg(Reg);
767  return true;
768  }
769  case Instruction::Mul: {
770  if (Addr.getOffsetReg())
771  break;
772 
773  if (!isMulPowOf2(U))
774  break;
775 
776  const Value *LHS = U->getOperand(0);
777  const Value *RHS = U->getOperand(1);
778 
779  // Canonicalize power-of-2 value to the RHS.
780  if (const auto *C = dyn_cast<ConstantInt>(LHS))
781  if (C->getValue().isPowerOf2())
782  std::swap(LHS, RHS);
783 
784  assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
785  const auto *C = cast<ConstantInt>(RHS);
786  unsigned Val = C->getValue().logBase2();
787  if (Val < 1 || Val > 3)
788  break;
789 
790  uint64_t NumBytes = 0;
791  if (Ty && Ty->isSized()) {
792  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
793  NumBytes = NumBits / 8;
794  if (!isPowerOf2_64(NumBits))
795  NumBytes = 0;
796  }
797 
798  if (NumBytes != (1ULL << Val))
799  break;
800 
801  Addr.setShift(Val);
802  Addr.setExtendType(AArch64_AM::LSL);
803 
804  const Value *Src = LHS;
805  if (const auto *I = dyn_cast<Instruction>(Src)) {
806  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
807  // Fold the zext or sext when it won't become a noop.
808  if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
809  if (!isIntExtFree(ZE) &&
810  ZE->getOperand(0)->getType()->isIntegerTy(32)) {
811  Addr.setExtendType(AArch64_AM::UXTW);
812  Src = ZE->getOperand(0);
813  }
814  } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
815  if (!isIntExtFree(SE) &&
816  SE->getOperand(0)->getType()->isIntegerTy(32)) {
817  Addr.setExtendType(AArch64_AM::SXTW);
818  Src = SE->getOperand(0);
819  }
820  }
821  }
822  }
823 
824  unsigned Reg = getRegForValue(Src);
825  if (!Reg)
826  return false;
827  Addr.setOffsetReg(Reg);
828  return true;
829  }
830  case Instruction::And: {
831  if (Addr.getOffsetReg())
832  break;
833 
834  if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
835  break;
836 
837  const Value *LHS = U->getOperand(0);
838  const Value *RHS = U->getOperand(1);
839 
840  if (const auto *C = dyn_cast<ConstantInt>(LHS))
841  if (C->getValue() == 0xffffffff)
842  std::swap(LHS, RHS);
843 
844  if (const auto *C = dyn_cast<ConstantInt>(RHS))
845  if (C->getValue() == 0xffffffff) {
846  Addr.setShift(0);
847  Addr.setExtendType(AArch64_AM::LSL);
848  Addr.setExtendType(AArch64_AM::UXTW);
849 
850  unsigned Reg = getRegForValue(LHS);
851  if (!Reg)
852  return false;
853  Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
854  Addr.setOffsetReg(Reg);
855  return true;
856  }
857  break;
858  }
859  case Instruction::SExt:
860  case Instruction::ZExt: {
861  if (!Addr.getReg() || Addr.getOffsetReg())
862  break;
863 
864  const Value *Src = nullptr;
865  // Fold the zext or sext when it won't become a noop.
866  if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
867  if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
868  Addr.setExtendType(AArch64_AM::UXTW);
869  Src = ZE->getOperand(0);
870  }
871  } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
872  if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
873  Addr.setExtendType(AArch64_AM::SXTW);
874  Src = SE->getOperand(0);
875  }
876  }
877 
878  if (!Src)
879  break;
880 
881  Addr.setShift(0);
882  unsigned Reg = getRegForValue(Src);
883  if (!Reg)
884  return false;
885  Addr.setOffsetReg(Reg);
886  return true;
887  }
888  } // end switch
889 
890  if (Addr.isRegBase() && !Addr.getReg()) {
891  unsigned Reg = getRegForValue(Obj);
892  if (!Reg)
893  return false;
894  Addr.setReg(Reg);
895  return true;
896  }
897 
898  if (!Addr.getOffsetReg()) {
899  unsigned Reg = getRegForValue(Obj);
900  if (!Reg)
901  return false;
902  Addr.setOffsetReg(Reg);
903  return true;
904  }
905 
906  return false;
907 }
908 
909 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
910  const User *U = nullptr;
911  unsigned Opcode = Instruction::UserOp1;
912  bool InMBB = true;
913 
914  if (const auto *I = dyn_cast<Instruction>(V)) {
915  Opcode = I->getOpcode();
916  U = I;
917  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
918  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
919  Opcode = C->getOpcode();
920  U = C;
921  }
922 
923  switch (Opcode) {
924  default: break;
925  case Instruction::BitCast:
926  // Look past bitcasts if its operand is in the same BB.
927  if (InMBB)
928  return computeCallAddress(U->getOperand(0), Addr);
929  break;
930  case Instruction::IntToPtr:
931  // Look past no-op inttoptrs if its operand is in the same BB.
932  if (InMBB &&
933  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
934  TLI.getPointerTy(DL))
935  return computeCallAddress(U->getOperand(0), Addr);
936  break;
937  case Instruction::PtrToInt:
938  // Look past no-op ptrtoints if its operand is in the same BB.
939  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
940  return computeCallAddress(U->getOperand(0), Addr);
941  break;
942  }
943 
944  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
945  Addr.setGlobalValue(GV);
946  return true;
947  }
948 
949  // If all else fails, try to materialize the value in a register.
950  if (!Addr.getGlobalValue()) {
951  Addr.setReg(getRegForValue(V));
952  return Addr.getReg() != 0;
953  }
954 
955  return false;
956 }
957 
958 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
959  EVT evt = TLI.getValueType(DL, Ty, true);
960 
961  if (Subtarget->isTargetILP32() && Ty->isPointerTy())
962  return false;
963 
964  // Only handle simple types.
965  if (evt == MVT::Other || !evt.isSimple())
966  return false;
967  VT = evt.getSimpleVT();
968 
969  // This is a legal type, but it's not something we handle in fast-isel.
970  if (VT == MVT::f128)
971  return false;
972 
973  // Handle all other legal types, i.e. a register that will directly hold this
974  // value.
975  return TLI.isTypeLegal(VT);
976 }
977 
978 /// Determine if the value type is supported by FastISel.
979 ///
980 /// FastISel for AArch64 can handle more value types than are legal. This adds
981 /// simple value type such as i1, i8, and i16.
982 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
983  if (Ty->isVectorTy() && !IsVectorAllowed)
984  return false;
985 
986  if (isTypeLegal(Ty, VT))
987  return true;
988 
989  // If this is a type than can be sign or zero-extended to a basic operation
990  // go ahead and accept it now.
991  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
992  return true;
993 
994  return false;
995 }
996 
997 bool AArch64FastISel::isValueAvailable(const Value *V) const {
998  if (!isa<Instruction>(V))
999  return true;
1000 
1001  const auto *I = cast<Instruction>(V);
1002  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1003 }
1004 
1005 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1006  if (Subtarget->isTargetILP32())
1007  return false;
1008 
1009  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1010  if (!ScaleFactor)
1011  return false;
1012 
1013  bool ImmediateOffsetNeedsLowering = false;
1014  bool RegisterOffsetNeedsLowering = false;
1015  int64_t Offset = Addr.getOffset();
1016  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1017  ImmediateOffsetNeedsLowering = true;
1018  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1019  !isUInt<12>(Offset / ScaleFactor))
1020  ImmediateOffsetNeedsLowering = true;
1021 
1022  // Cannot encode an offset register and an immediate offset in the same
1023  // instruction. Fold the immediate offset into the load/store instruction and
1024  // emit an additional add to take care of the offset register.
1025  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1026  RegisterOffsetNeedsLowering = true;
1027 
1028  // Cannot encode zero register as base.
1029  if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1030  RegisterOffsetNeedsLowering = true;
1031 
1032  // If this is a stack pointer and the offset needs to be simplified then put
1033  // the alloca address into a register, set the base type back to register and
1034  // continue. This should almost never happen.
1035  if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1036  {
1037  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1038  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1039  ResultReg)
1040  .addFrameIndex(Addr.getFI())
1041  .addImm(0)
1042  .addImm(0);
1043  Addr.setKind(Address::RegBase);
1044  Addr.setReg(ResultReg);
1045  }
1046 
1047  if (RegisterOffsetNeedsLowering) {
1048  unsigned ResultReg = 0;
1049  if (Addr.getReg()) {
1050  if (Addr.getExtendType() == AArch64_AM::SXTW ||
1051  Addr.getExtendType() == AArch64_AM::UXTW )
1052  ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1053  Addr.getOffsetReg(), Addr.getExtendType(),
1054  Addr.getShift());
1055  else
1056  ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1057  Addr.getOffsetReg(), AArch64_AM::LSL,
1058  Addr.getShift());
1059  } else {
1060  if (Addr.getExtendType() == AArch64_AM::UXTW)
1061  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1062  Addr.getShift(), /*IsZExt=*/true);
1063  else if (Addr.getExtendType() == AArch64_AM::SXTW)
1064  ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1065  Addr.getShift(), /*IsZExt=*/false);
1066  else
1067  ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1068  Addr.getShift());
1069  }
1070  if (!ResultReg)
1071  return false;
1072 
1073  Addr.setReg(ResultReg);
1074  Addr.setOffsetReg(0);
1075  Addr.setShift(0);
1076  Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1077  }
1078 
1079  // Since the offset is too large for the load/store instruction get the
1080  // reg+offset into a register.
1081  if (ImmediateOffsetNeedsLowering) {
1082  unsigned ResultReg;
1083  if (Addr.getReg())
1084  // Try to fold the immediate into the add instruction.
1085  ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1086  else
1087  ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1088 
1089  if (!ResultReg)
1090  return false;
1091  Addr.setReg(ResultReg);
1092  Addr.setOffset(0);
1093  }
1094  return true;
1095 }
1096 
1097 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1098  const MachineInstrBuilder &MIB,
1100  unsigned ScaleFactor,
1101  MachineMemOperand *MMO) {
1102  int64_t Offset = Addr.getOffset() / ScaleFactor;
1103  // Frame base works a bit differently. Handle it separately.
1104  if (Addr.isFIBase()) {
1105  int FI = Addr.getFI();
1106  // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1107  // and alignment should be based on the VT.
1108  MMO = FuncInfo.MF->getMachineMemOperand(
1109  MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1110  MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1111  // Now add the rest of the operands.
1112  MIB.addFrameIndex(FI).addImm(Offset);
1113  } else {
1114  assert(Addr.isRegBase() && "Unexpected address kind.");
1115  const MCInstrDesc &II = MIB->getDesc();
1116  unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1117  Addr.setReg(
1118  constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1119  Addr.setOffsetReg(
1120  constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1121  if (Addr.getOffsetReg()) {
1122  assert(Addr.getOffset() == 0 && "Unexpected offset");
1123  bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1124  Addr.getExtendType() == AArch64_AM::SXTX;
1125  MIB.addReg(Addr.getReg());
1126  MIB.addReg(Addr.getOffsetReg());
1127  MIB.addImm(IsSigned);
1128  MIB.addImm(Addr.getShift() != 0);
1129  } else
1130  MIB.addReg(Addr.getReg()).addImm(Offset);
1131  }
1132 
1133  if (MMO)
1134  MIB.addMemOperand(MMO);
1135 }
1136 
1137 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1138  const Value *RHS, bool SetFlags,
1139  bool WantResult, bool IsZExt) {
1141  bool NeedExtend = false;
1142  switch (RetVT.SimpleTy) {
1143  default:
1144  return 0;
1145  case MVT::i1:
1146  NeedExtend = true;
1147  break;
1148  case MVT::i8:
1149  NeedExtend = true;
1150  ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1151  break;
1152  case MVT::i16:
1153  NeedExtend = true;
1154  ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1155  break;
1156  case MVT::i32: // fall-through
1157  case MVT::i64:
1158  break;
1159  }
1160  MVT SrcVT = RetVT;
1161  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1162 
1163  // Canonicalize immediates to the RHS first.
1164  if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1165  std::swap(LHS, RHS);
1166 
1167  // Canonicalize mul by power of 2 to the RHS.
1168  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1169  if (isMulPowOf2(LHS))
1170  std::swap(LHS, RHS);
1171 
1172  // Canonicalize shift immediate to the RHS.
1173  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1174  if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1175  if (isa<ConstantInt>(SI->getOperand(1)))
1176  if (SI->getOpcode() == Instruction::Shl ||
1177  SI->getOpcode() == Instruction::LShr ||
1178  SI->getOpcode() == Instruction::AShr )
1179  std::swap(LHS, RHS);
1180 
1181  unsigned LHSReg = getRegForValue(LHS);
1182  if (!LHSReg)
1183  return 0;
1184 
1185  if (NeedExtend)
1186  LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1187 
1188  unsigned ResultReg = 0;
1189  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1190  uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1191  if (C->isNegative())
1192  ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1193  WantResult);
1194  else
1195  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1196  WantResult);
1197  } else if (const auto *C = dyn_cast<Constant>(RHS))
1198  if (C->isNullValue())
1199  ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1200 
1201  if (ResultReg)
1202  return ResultReg;
1203 
1204  // Only extend the RHS within the instruction if there is a valid extend type.
1205  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1206  isValueAvailable(RHS)) {
1207  if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1208  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1209  if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1210  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1211  if (!RHSReg)
1212  return 0;
1213  return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
1214  C->getZExtValue(), SetFlags, WantResult);
1215  }
1216  unsigned RHSReg = getRegForValue(RHS);
1217  if (!RHSReg)
1218  return 0;
1219  return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1220  SetFlags, WantResult);
1221  }
1222 
1223  // Check if the mul can be folded into the instruction.
1224  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1225  if (isMulPowOf2(RHS)) {
1226  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1227  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1228 
1229  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1230  if (C->getValue().isPowerOf2())
1231  std::swap(MulLHS, MulRHS);
1232 
1233  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1234  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1235  unsigned RHSReg = getRegForValue(MulLHS);
1236  if (!RHSReg)
1237  return 0;
1238  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1239  ShiftVal, SetFlags, WantResult);
1240  if (ResultReg)
1241  return ResultReg;
1242  }
1243  }
1244 
1245  // Check if the shift can be folded into the instruction.
1246  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1247  if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1248  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1250  switch (SI->getOpcode()) {
1251  default: break;
1252  case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1253  case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1254  case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1255  }
1256  uint64_t ShiftVal = C->getZExtValue();
1257  if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1258  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1259  if (!RHSReg)
1260  return 0;
1261  ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1262  ShiftVal, SetFlags, WantResult);
1263  if (ResultReg)
1264  return ResultReg;
1265  }
1266  }
1267  }
1268  }
1269 
1270  unsigned RHSReg = getRegForValue(RHS);
1271  if (!RHSReg)
1272  return 0;
1273 
1274  if (NeedExtend)
1275  RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1276 
1277  return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1278 }
1279 
1280 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1281  unsigned RHSReg, bool SetFlags,
1282  bool WantResult) {
1283  assert(LHSReg && RHSReg && "Invalid register number.");
1284 
1285  if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1286  RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1287  return 0;
1288 
1289  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1290  return 0;
1291 
1292  static const unsigned OpcTable[2][2][2] = {
1293  { { AArch64::SUBWrr, AArch64::SUBXrr },
1294  { AArch64::ADDWrr, AArch64::ADDXrr } },
1295  { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1296  { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1297  };
1298  bool Is64Bit = RetVT == MVT::i64;
1299  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1300  const TargetRegisterClass *RC =
1301  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1302  unsigned ResultReg;
1303  if (WantResult)
1304  ResultReg = createResultReg(RC);
1305  else
1306  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1307 
1308  const MCInstrDesc &II = TII.get(Opc);
1309  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1310  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1311  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1312  .addReg(LHSReg)
1313  .addReg(RHSReg);
1314  return ResultReg;
1315 }
1316 
1317 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1318  uint64_t Imm, bool SetFlags,
1319  bool WantResult) {
1320  assert(LHSReg && "Invalid register number.");
1321 
1322  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1323  return 0;
1324 
1325  unsigned ShiftImm;
1326  if (isUInt<12>(Imm))
1327  ShiftImm = 0;
1328  else if ((Imm & 0xfff000) == Imm) {
1329  ShiftImm = 12;
1330  Imm >>= 12;
1331  } else
1332  return 0;
1333 
1334  static const unsigned OpcTable[2][2][2] = {
1335  { { AArch64::SUBWri, AArch64::SUBXri },
1336  { AArch64::ADDWri, AArch64::ADDXri } },
1337  { { AArch64::SUBSWri, AArch64::SUBSXri },
1338  { AArch64::ADDSWri, AArch64::ADDSXri } }
1339  };
1340  bool Is64Bit = RetVT == MVT::i64;
1341  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1342  const TargetRegisterClass *RC;
1343  if (SetFlags)
1344  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1345  else
1346  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1347  unsigned ResultReg;
1348  if (WantResult)
1349  ResultReg = createResultReg(RC);
1350  else
1351  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1352 
1353  const MCInstrDesc &II = TII.get(Opc);
1354  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1355  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1356  .addReg(LHSReg)
1357  .addImm(Imm)
1358  .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1359  return ResultReg;
1360 }
1361 
1362 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1363  unsigned RHSReg,
1364  AArch64_AM::ShiftExtendType ShiftType,
1365  uint64_t ShiftImm, bool SetFlags,
1366  bool WantResult) {
1367  assert(LHSReg && RHSReg && "Invalid register number.");
1368  assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1369  RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1370 
1371  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1372  return 0;
1373 
1374  // Don't deal with undefined shifts.
1375  if (ShiftImm >= RetVT.getSizeInBits())
1376  return 0;
1377 
1378  static const unsigned OpcTable[2][2][2] = {
1379  { { AArch64::SUBWrs, AArch64::SUBXrs },
1380  { AArch64::ADDWrs, AArch64::ADDXrs } },
1381  { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1382  { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1383  };
1384  bool Is64Bit = RetVT == MVT::i64;
1385  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1386  const TargetRegisterClass *RC =
1387  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1388  unsigned ResultReg;
1389  if (WantResult)
1390  ResultReg = createResultReg(RC);
1391  else
1392  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1393 
1394  const MCInstrDesc &II = TII.get(Opc);
1395  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1396  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1397  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1398  .addReg(LHSReg)
1399  .addReg(RHSReg)
1400  .addImm(getShifterImm(ShiftType, ShiftImm));
1401  return ResultReg;
1402 }
1403 
1404 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1405  unsigned RHSReg,
1407  uint64_t ShiftImm, bool SetFlags,
1408  bool WantResult) {
1409  assert(LHSReg && RHSReg && "Invalid register number.");
1410  assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1411  RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1412 
1413  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1414  return 0;
1415 
1416  if (ShiftImm >= 4)
1417  return 0;
1418 
1419  static const unsigned OpcTable[2][2][2] = {
1420  { { AArch64::SUBWrx, AArch64::SUBXrx },
1421  { AArch64::ADDWrx, AArch64::ADDXrx } },
1422  { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1423  { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1424  };
1425  bool Is64Bit = RetVT == MVT::i64;
1426  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1427  const TargetRegisterClass *RC = nullptr;
1428  if (SetFlags)
1429  RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1430  else
1431  RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1432  unsigned ResultReg;
1433  if (WantResult)
1434  ResultReg = createResultReg(RC);
1435  else
1436  ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1437 
1438  const MCInstrDesc &II = TII.get(Opc);
1439  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1440  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1441  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1442  .addReg(LHSReg)
1443  .addReg(RHSReg)
1444  .addImm(getArithExtendImm(ExtType, ShiftImm));
1445  return ResultReg;
1446 }
1447 
1448 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1449  Type *Ty = LHS->getType();
1450  EVT EVT = TLI.getValueType(DL, Ty, true);
1451  if (!EVT.isSimple())
1452  return false;
1453  MVT VT = EVT.getSimpleVT();
1454 
1455  switch (VT.SimpleTy) {
1456  default:
1457  return false;
1458  case MVT::i1:
1459  case MVT::i8:
1460  case MVT::i16:
1461  case MVT::i32:
1462  case MVT::i64:
1463  return emitICmp(VT, LHS, RHS, IsZExt);
1464  case MVT::f32:
1465  case MVT::f64:
1466  return emitFCmp(VT, LHS, RHS);
1467  }
1468 }
1469 
1470 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1471  bool IsZExt) {
1472  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1473  IsZExt) != 0;
1474 }
1475 
1476 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1477  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1478  /*SetFlags=*/true, /*WantResult=*/false) != 0;
1479 }
1480 
1481 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1482  if (RetVT != MVT::f32 && RetVT != MVT::f64)
1483  return false;
1484 
1485  // Check to see if the 2nd operand is a constant that we can encode directly
1486  // in the compare.
1487  bool UseImm = false;
1488  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1489  if (CFP->isZero() && !CFP->isNegative())
1490  UseImm = true;
1491 
1492  unsigned LHSReg = getRegForValue(LHS);
1493  if (!LHSReg)
1494  return false;
1495 
1496  if (UseImm) {
1497  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1498  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1499  .addReg(LHSReg);
1500  return true;
1501  }
1502 
1503  unsigned RHSReg = getRegForValue(RHS);
1504  if (!RHSReg)
1505  return false;
1506 
1507  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1508  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1509  .addReg(LHSReg)
1510  .addReg(RHSReg);
1511  return true;
1512 }
1513 
1514 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1515  bool SetFlags, bool WantResult, bool IsZExt) {
1516  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1517  IsZExt);
1518 }
1519 
1520 /// This method is a wrapper to simplify add emission.
1521 ///
1522 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1523 /// that fails, then try to materialize the immediate into a register and use
1524 /// emitAddSub_rr instead.
1525 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1526  unsigned ResultReg;
1527  if (Imm < 0)
1528  ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1529  else
1530  ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1531 
1532  if (ResultReg)
1533  return ResultReg;
1534 
1535  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1536  if (!CReg)
1537  return 0;
1538 
1539  ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1540  return ResultReg;
1541 }
1542 
1543 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1544  bool SetFlags, bool WantResult, bool IsZExt) {
1545  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1546  IsZExt);
1547 }
1548 
1549 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1550  unsigned RHSReg, bool WantResult) {
1551  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1552  /*SetFlags=*/true, WantResult);
1553 }
1554 
1555 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1556  unsigned RHSReg,
1557  AArch64_AM::ShiftExtendType ShiftType,
1558  uint64_t ShiftImm, bool WantResult) {
1559  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1560  ShiftImm, /*SetFlags=*/true, WantResult);
1561 }
1562 
1563 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1564  const Value *LHS, const Value *RHS) {
1565  // Canonicalize immediates to the RHS first.
1566  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1567  std::swap(LHS, RHS);
1568 
1569  // Canonicalize mul by power-of-2 to the RHS.
1570  if (LHS->hasOneUse() && isValueAvailable(LHS))
1571  if (isMulPowOf2(LHS))
1572  std::swap(LHS, RHS);
1573 
1574  // Canonicalize shift immediate to the RHS.
1575  if (LHS->hasOneUse() && isValueAvailable(LHS))
1576  if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1577  if (isa<ConstantInt>(SI->getOperand(1)))
1578  std::swap(LHS, RHS);
1579 
1580  unsigned LHSReg = getRegForValue(LHS);
1581  if (!LHSReg)
1582  return 0;
1583 
1584  unsigned ResultReg = 0;
1585  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1586  uint64_t Imm = C->getZExtValue();
1587  ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1588  }
1589  if (ResultReg)
1590  return ResultReg;
1591 
1592  // Check if the mul can be folded into the instruction.
1593  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1594  if (isMulPowOf2(RHS)) {
1595  const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1596  const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1597 
1598  if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1599  if (C->getValue().isPowerOf2())
1600  std::swap(MulLHS, MulRHS);
1601 
1602  assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1603  uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1604 
1605  unsigned RHSReg = getRegForValue(MulLHS);
1606  if (!RHSReg)
1607  return 0;
1608  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1609  if (ResultReg)
1610  return ResultReg;
1611  }
1612  }
1613 
1614  // Check if the shift can be folded into the instruction.
1615  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1616  if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1617  if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1618  uint64_t ShiftVal = C->getZExtValue();
1619  unsigned RHSReg = getRegForValue(SI->getOperand(0));
1620  if (!RHSReg)
1621  return 0;
1622  ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1623  if (ResultReg)
1624  return ResultReg;
1625  }
1626  }
1627 
1628  unsigned RHSReg = getRegForValue(RHS);
1629  if (!RHSReg)
1630  return 0;
1631 
1632  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1633  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1634  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1635  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1636  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1637  }
1638  return ResultReg;
1639 }
1640 
1641 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1642  unsigned LHSReg, uint64_t Imm) {
1643  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1644  "ISD nodes are not consecutive!");
1645  static const unsigned OpcTable[3][2] = {
1646  { AArch64::ANDWri, AArch64::ANDXri },
1647  { AArch64::ORRWri, AArch64::ORRXri },
1648  { AArch64::EORWri, AArch64::EORXri }
1649  };
1650  const TargetRegisterClass *RC;
1651  unsigned Opc;
1652  unsigned RegSize;
1653  switch (RetVT.SimpleTy) {
1654  default:
1655  return 0;
1656  case MVT::i1:
1657  case MVT::i8:
1658  case MVT::i16:
1659  case MVT::i32: {
1660  unsigned Idx = ISDOpc - ISD::AND;
1661  Opc = OpcTable[Idx][0];
1662  RC = &AArch64::GPR32spRegClass;
1663  RegSize = 32;
1664  break;
1665  }
1666  case MVT::i64:
1667  Opc = OpcTable[ISDOpc - ISD::AND][1];
1668  RC = &AArch64::GPR64spRegClass;
1669  RegSize = 64;
1670  break;
1671  }
1672 
1674  return 0;
1675 
1676  unsigned ResultReg =
1677  fastEmitInst_ri(Opc, RC, LHSReg,
1679  if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1680  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1681  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1682  }
1683  return ResultReg;
1684 }
1685 
1686 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1687  unsigned LHSReg, unsigned RHSReg,
1688  uint64_t ShiftImm) {
1689  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1690  "ISD nodes are not consecutive!");
1691  static const unsigned OpcTable[3][2] = {
1692  { AArch64::ANDWrs, AArch64::ANDXrs },
1693  { AArch64::ORRWrs, AArch64::ORRXrs },
1694  { AArch64::EORWrs, AArch64::EORXrs }
1695  };
1696 
1697  // Don't deal with undefined shifts.
1698  if (ShiftImm >= RetVT.getSizeInBits())
1699  return 0;
1700 
1701  const TargetRegisterClass *RC;
1702  unsigned Opc;
1703  switch (RetVT.SimpleTy) {
1704  default:
1705  return 0;
1706  case MVT::i1:
1707  case MVT::i8:
1708  case MVT::i16:
1709  case MVT::i32:
1710  Opc = OpcTable[ISDOpc - ISD::AND][0];
1711  RC = &AArch64::GPR32RegClass;
1712  break;
1713  case MVT::i64:
1714  Opc = OpcTable[ISDOpc - ISD::AND][1];
1715  RC = &AArch64::GPR64RegClass;
1716  break;
1717  }
1718  unsigned ResultReg =
1719  fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1721  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1722  uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1723  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1724  }
1725  return ResultReg;
1726 }
1727 
1728 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1729  uint64_t Imm) {
1730  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1731 }
1732 
1733 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1734  bool WantZExt, MachineMemOperand *MMO) {
1735  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1736  return 0;
1737 
1738  // Simplify this down to something we can handle.
1739  if (!simplifyAddress(Addr, VT))
1740  return 0;
1741 
1742  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1743  if (!ScaleFactor)
1744  llvm_unreachable("Unexpected value type.");
1745 
1746  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1747  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1748  bool UseScaled = true;
1749  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1750  UseScaled = false;
1751  ScaleFactor = 1;
1752  }
1753 
1754  static const unsigned GPOpcTable[2][8][4] = {
1755  // Sign-extend.
1756  { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1757  AArch64::LDURXi },
1758  { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1759  AArch64::LDURXi },
1760  { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1761  AArch64::LDRXui },
1762  { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1763  AArch64::LDRXui },
1764  { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1765  AArch64::LDRXroX },
1766  { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1767  AArch64::LDRXroX },
1768  { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1769  AArch64::LDRXroW },
1770  { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1771  AArch64::LDRXroW }
1772  },
1773  // Zero-extend.
1774  { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1775  AArch64::LDURXi },
1776  { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1777  AArch64::LDURXi },
1778  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1779  AArch64::LDRXui },
1780  { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1781  AArch64::LDRXui },
1782  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1783  AArch64::LDRXroX },
1784  { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1785  AArch64::LDRXroX },
1786  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1787  AArch64::LDRXroW },
1788  { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1789  AArch64::LDRXroW }
1790  }
1791  };
1792 
1793  static const unsigned FPOpcTable[4][2] = {
1794  { AArch64::LDURSi, AArch64::LDURDi },
1795  { AArch64::LDRSui, AArch64::LDRDui },
1796  { AArch64::LDRSroX, AArch64::LDRDroX },
1797  { AArch64::LDRSroW, AArch64::LDRDroW }
1798  };
1799 
1800  unsigned Opc;
1801  const TargetRegisterClass *RC;
1802  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1803  Addr.getOffsetReg();
1804  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1805  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1806  Addr.getExtendType() == AArch64_AM::SXTW)
1807  Idx++;
1808 
1809  bool IsRet64Bit = RetVT == MVT::i64;
1810  switch (VT.SimpleTy) {
1811  default:
1812  llvm_unreachable("Unexpected value type.");
1813  case MVT::i1: // Intentional fall-through.
1814  case MVT::i8:
1815  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1816  RC = (IsRet64Bit && !WantZExt) ?
1817  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1818  break;
1819  case MVT::i16:
1820  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1821  RC = (IsRet64Bit && !WantZExt) ?
1822  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1823  break;
1824  case MVT::i32:
1825  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1826  RC = (IsRet64Bit && !WantZExt) ?
1827  &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1828  break;
1829  case MVT::i64:
1830  Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1831  RC = &AArch64::GPR64RegClass;
1832  break;
1833  case MVT::f32:
1834  Opc = FPOpcTable[Idx][0];
1835  RC = &AArch64::FPR32RegClass;
1836  break;
1837  case MVT::f64:
1838  Opc = FPOpcTable[Idx][1];
1839  RC = &AArch64::FPR64RegClass;
1840  break;
1841  }
1842 
1843  // Create the base instruction, then add the operands.
1844  unsigned ResultReg = createResultReg(RC);
1845  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1846  TII.get(Opc), ResultReg);
1847  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1848 
1849  // Loading an i1 requires special handling.
1850  if (VT == MVT::i1) {
1851  unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1852  assert(ANDReg && "Unexpected AND instruction emission failure.");
1853  ResultReg = ANDReg;
1854  }
1855 
1856  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1857  // the 32bit reg to a 64bit reg.
1858  if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1859  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1860  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1861  TII.get(AArch64::SUBREG_TO_REG), Reg64)
1862  .addImm(0)
1863  .addReg(ResultReg, getKillRegState(true))
1864  .addImm(AArch64::sub_32);
1865  ResultReg = Reg64;
1866  }
1867  return ResultReg;
1868 }
1869 
1870 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1871  MVT VT;
1872  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1873  return false;
1874 
1875  if (VT.isVector())
1876  return selectOperator(I, I->getOpcode());
1877 
1878  unsigned ResultReg;
1879  switch (I->getOpcode()) {
1880  default:
1881  llvm_unreachable("Unexpected instruction.");
1882  case Instruction::Add:
1883  ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1884  break;
1885  case Instruction::Sub:
1886  ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1887  break;
1888  }
1889  if (!ResultReg)
1890  return false;
1891 
1892  updateValueMap(I, ResultReg);
1893  return true;
1894 }
1895 
1896 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1897  MVT VT;
1898  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1899  return false;
1900 
1901  if (VT.isVector())
1902  return selectOperator(I, I->getOpcode());
1903 
1904  unsigned ResultReg;
1905  switch (I->getOpcode()) {
1906  default:
1907  llvm_unreachable("Unexpected instruction.");
1908  case Instruction::And:
1909  ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1910  break;
1911  case Instruction::Or:
1912  ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1913  break;
1914  case Instruction::Xor:
1915  ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1916  break;
1917  }
1918  if (!ResultReg)
1919  return false;
1920 
1921  updateValueMap(I, ResultReg);
1922  return true;
1923 }
1924 
1925 bool AArch64FastISel::selectLoad(const Instruction *I) {
1926  MVT VT;
1927  // Verify we have a legal type before going any further. Currently, we handle
1928  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1929  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1930  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1931  cast<LoadInst>(I)->isAtomic())
1932  return false;
1933 
1934  const Value *SV = I->getOperand(0);
1935  if (TLI.supportSwiftError()) {
1936  // Swifterror values can come from either a function parameter with
1937  // swifterror attribute or an alloca with swifterror attribute.
1938  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1939  if (Arg->hasSwiftErrorAttr())
1940  return false;
1941  }
1942 
1943  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1944  if (Alloca->isSwiftError())
1945  return false;
1946  }
1947  }
1948 
1949  // See if we can handle this address.
1950  Address Addr;
1951  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1952  return false;
1953 
1954  // Fold the following sign-/zero-extend into the load instruction.
1955  bool WantZExt = true;
1956  MVT RetVT = VT;
1957  const Value *IntExtVal = nullptr;
1958  if (I->hasOneUse()) {
1959  if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1960  if (isTypeSupported(ZE->getType(), RetVT))
1961  IntExtVal = ZE;
1962  else
1963  RetVT = VT;
1964  } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1965  if (isTypeSupported(SE->getType(), RetVT))
1966  IntExtVal = SE;
1967  else
1968  RetVT = VT;
1969  WantZExt = false;
1970  }
1971  }
1972 
1973  unsigned ResultReg =
1974  emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1975  if (!ResultReg)
1976  return false;
1977 
1978  // There are a few different cases we have to handle, because the load or the
1979  // sign-/zero-extend might not be selected by FastISel if we fall-back to
1980  // SelectionDAG. There is also an ordering issue when both instructions are in
1981  // different basic blocks.
1982  // 1.) The load instruction is selected by FastISel, but the integer extend
1983  // not. This usually happens when the integer extend is in a different
1984  // basic block and SelectionDAG took over for that basic block.
1985  // 2.) The load instruction is selected before the integer extend. This only
1986  // happens when the integer extend is in a different basic block.
1987  // 3.) The load instruction is selected by SelectionDAG and the integer extend
1988  // by FastISel. This happens if there are instructions between the load
1989  // and the integer extend that couldn't be selected by FastISel.
1990  if (IntExtVal) {
1991  // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
1992  // could select it. Emit a copy to subreg if necessary. FastISel will remove
1993  // it when it selects the integer extend.
1994  unsigned Reg = lookUpRegForValue(IntExtVal);
1995  auto *MI = MRI.getUniqueVRegDef(Reg);
1996  if (!MI) {
1997  if (RetVT == MVT::i64 && VT <= MVT::i32) {
1998  if (WantZExt) {
1999  // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2000  MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2001  ResultReg = std::prev(I)->getOperand(0).getReg();
2002  removeDeadCode(I, std::next(I));
2003  } else
2004  ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2005  AArch64::sub_32);
2006  }
2007  updateValueMap(I, ResultReg);
2008  return true;
2009  }
2010 
2011  // The integer extend has already been emitted - delete all the instructions
2012  // that have been emitted by the integer extend lowering code and use the
2013  // result from the load instruction directly.
2014  while (MI) {
2015  Reg = 0;
2016  for (auto &Opnd : MI->uses()) {
2017  if (Opnd.isReg()) {
2018  Reg = Opnd.getReg();
2019  break;
2020  }
2021  }
2023  removeDeadCode(I, std::next(I));
2024  MI = nullptr;
2025  if (Reg)
2027  }
2028  updateValueMap(IntExtVal, ResultReg);
2029  return true;
2030  }
2031 
2032  updateValueMap(I, ResultReg);
2033  return true;
2034 }
2035 
2036 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2037  unsigned AddrReg,
2038  MachineMemOperand *MMO) {
2039  unsigned Opc;
2040  switch (VT.SimpleTy) {
2041  default: return false;
2042  case MVT::i8: Opc = AArch64::STLRB; break;
2043  case MVT::i16: Opc = AArch64::STLRH; break;
2044  case MVT::i32: Opc = AArch64::STLRW; break;
2045  case MVT::i64: Opc = AArch64::STLRX; break;
2046  }
2047 
2048  const MCInstrDesc &II = TII.get(Opc);
2049  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2050  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2051  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2052  .addReg(SrcReg)
2053  .addReg(AddrReg)
2054  .addMemOperand(MMO);
2055  return true;
2056 }
2057 
2058 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2059  MachineMemOperand *MMO) {
2060  if (!TLI.allowsMisalignedMemoryAccesses(VT))
2061  return false;
2062 
2063  // Simplify this down to something we can handle.
2064  if (!simplifyAddress(Addr, VT))
2065  return false;
2066 
2067  unsigned ScaleFactor = getImplicitScaleFactor(VT);
2068  if (!ScaleFactor)
2069  llvm_unreachable("Unexpected value type.");
2070 
2071  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2072  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2073  bool UseScaled = true;
2074  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2075  UseScaled = false;
2076  ScaleFactor = 1;
2077  }
2078 
2079  static const unsigned OpcTable[4][6] = {
2080  { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2081  AArch64::STURSi, AArch64::STURDi },
2082  { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2083  AArch64::STRSui, AArch64::STRDui },
2084  { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2085  AArch64::STRSroX, AArch64::STRDroX },
2086  { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2087  AArch64::STRSroW, AArch64::STRDroW }
2088  };
2089 
2090  unsigned Opc;
2091  bool VTIsi1 = false;
2092  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2093  Addr.getOffsetReg();
2094  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2095  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2096  Addr.getExtendType() == AArch64_AM::SXTW)
2097  Idx++;
2098 
2099  switch (VT.SimpleTy) {
2100  default: llvm_unreachable("Unexpected value type.");
2101  case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH;
2102  case MVT::i8: Opc = OpcTable[Idx][0]; break;
2103  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2104  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2105  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2106  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2107  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2108  }
2109 
2110  // Storing an i1 requires special handling.
2111  if (VTIsi1 && SrcReg != AArch64::WZR) {
2112  unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2113  assert(ANDReg && "Unexpected AND instruction emission failure.");
2114  SrcReg = ANDReg;
2115  }
2116  // Create the base instruction, then add the operands.
2117  const MCInstrDesc &II = TII.get(Opc);
2118  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2119  MachineInstrBuilder MIB =
2120  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2121  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2122 
2123  return true;
2124 }
2125 
2126 bool AArch64FastISel::selectStore(const Instruction *I) {
2127  MVT VT;
2128  const Value *Op0 = I->getOperand(0);
2129  // Verify we have a legal type before going any further. Currently, we handle
2130  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2131  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2132  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2133  return false;
2134 
2135  const Value *PtrV = I->getOperand(1);
2136  if (TLI.supportSwiftError()) {
2137  // Swifterror values can come from either a function parameter with
2138  // swifterror attribute or an alloca with swifterror attribute.
2139  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2140  if (Arg->hasSwiftErrorAttr())
2141  return false;
2142  }
2143 
2144  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2145  if (Alloca->isSwiftError())
2146  return false;
2147  }
2148  }
2149 
2150  // Get the value to be stored into a register. Use the zero register directly
2151  // when possible to avoid an unnecessary copy and a wasted register.
2152  unsigned SrcReg = 0;
2153  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2154  if (CI->isZero())
2155  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2156  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2157  if (CF->isZero() && !CF->isNegative()) {
2158  VT = MVT::getIntegerVT(VT.getSizeInBits());
2159  SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2160  }
2161  }
2162 
2163  if (!SrcReg)
2164  SrcReg = getRegForValue(Op0);
2165 
2166  if (!SrcReg)
2167  return false;
2168 
2169  auto *SI = cast<StoreInst>(I);
2170 
2171  // Try to emit a STLR for seq_cst/release.
2172  if (SI->isAtomic()) {
2173  AtomicOrdering Ord = SI->getOrdering();
2174  // The non-atomic instructions are sufficient for relaxed stores.
2175  if (isReleaseOrStronger(Ord)) {
2176  // The STLR addressing mode only supports a base reg; pass that directly.
2177  unsigned AddrReg = getRegForValue(PtrV);
2178  return emitStoreRelease(VT, SrcReg, AddrReg,
2179  createMachineMemOperandFor(I));
2180  }
2181  }
2182 
2183  // See if we can handle this address.
2184  Address Addr;
2185  if (!computeAddress(PtrV, Addr, Op0->getType()))
2186  return false;
2187 
2188  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2189  return false;
2190  return true;
2191 }
2192 
2194  switch (Pred) {
2195  case CmpInst::FCMP_ONE:
2196  case CmpInst::FCMP_UEQ:
2197  default:
2198  // AL is our "false" for now. The other two need more compares.
2199  return AArch64CC::AL;
2200  case CmpInst::ICMP_EQ:
2201  case CmpInst::FCMP_OEQ:
2202  return AArch64CC::EQ;
2203  case CmpInst::ICMP_SGT:
2204  case CmpInst::FCMP_OGT:
2205  return AArch64CC::GT;
2206  case CmpInst::ICMP_SGE:
2207  case CmpInst::FCMP_OGE:
2208  return AArch64CC::GE;
2209  case CmpInst::ICMP_UGT:
2210  case CmpInst::FCMP_UGT:
2211  return AArch64CC::HI;
2212  case CmpInst::FCMP_OLT:
2213  return AArch64CC::MI;
2214  case CmpInst::ICMP_ULE:
2215  case CmpInst::FCMP_OLE:
2216  return AArch64CC::LS;
2217  case CmpInst::FCMP_ORD:
2218  return AArch64CC::VC;
2219  case CmpInst::FCMP_UNO:
2220  return AArch64CC::VS;
2221  case CmpInst::FCMP_UGE:
2222  return AArch64CC::PL;
2223  case CmpInst::ICMP_SLT:
2224  case CmpInst::FCMP_ULT:
2225  return AArch64CC::LT;
2226  case CmpInst::ICMP_SLE:
2227  case CmpInst::FCMP_ULE:
2228  return AArch64CC::LE;
2229  case CmpInst::FCMP_UNE:
2230  case CmpInst::ICMP_NE:
2231  return AArch64CC::NE;
2232  case CmpInst::ICMP_UGE:
2233  return AArch64CC::HS;
2234  case CmpInst::ICMP_ULT:
2235  return AArch64CC::LO;
2236  }
2237 }
2238 
2239 /// Try to emit a combined compare-and-branch instruction.
2240 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2241  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2242  // will not be produced, as they are conditional branch instructions that do
2243  // not set flags.
2244  if (FuncInfo.MF->getFunction().hasFnAttribute(
2245  Attribute::SpeculativeLoadHardening))
2246  return false;
2247 
2248  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2249  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2250  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2251 
2252  const Value *LHS = CI->getOperand(0);
2253  const Value *RHS = CI->getOperand(1);
2254 
2255  MVT VT;
2256  if (!isTypeSupported(LHS->getType(), VT))
2257  return false;
2258 
2259  unsigned BW = VT.getSizeInBits();
2260  if (BW > 64)
2261  return false;
2262 
2263  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2264  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2265 
2266  // Try to take advantage of fallthrough opportunities.
2267  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2268  std::swap(TBB, FBB);
2270  }
2271 
2272  int TestBit = -1;
2273  bool IsCmpNE;
2274  switch (Predicate) {
2275  default:
2276  return false;
2277  case CmpInst::ICMP_EQ:
2278  case CmpInst::ICMP_NE:
2279  if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2280  std::swap(LHS, RHS);
2281 
2282  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2283  return false;
2284 
2285  if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2286  if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2287  const Value *AndLHS = AI->getOperand(0);
2288  const Value *AndRHS = AI->getOperand(1);
2289 
2290  if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2291  if (C->getValue().isPowerOf2())
2292  std::swap(AndLHS, AndRHS);
2293 
2294  if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2295  if (C->getValue().isPowerOf2()) {
2296  TestBit = C->getValue().logBase2();
2297  LHS = AndLHS;
2298  }
2299  }
2300 
2301  if (VT == MVT::i1)
2302  TestBit = 0;
2303 
2304  IsCmpNE = Predicate == CmpInst::ICMP_NE;
2305  break;
2306  case CmpInst::ICMP_SLT:
2307  case CmpInst::ICMP_SGE:
2308  if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2309  return false;
2310 
2311  TestBit = BW - 1;
2312  IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2313  break;
2314  case CmpInst::ICMP_SGT:
2315  case CmpInst::ICMP_SLE:
2316  if (!isa<ConstantInt>(RHS))
2317  return false;
2318 
2319  if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2320  return false;
2321 
2322  TestBit = BW - 1;
2323  IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2324  break;
2325  } // end switch
2326 
2327  static const unsigned OpcTable[2][2][2] = {
2328  { {AArch64::CBZW, AArch64::CBZX },
2329  {AArch64::CBNZW, AArch64::CBNZX} },
2330  { {AArch64::TBZW, AArch64::TBZX },
2331  {AArch64::TBNZW, AArch64::TBNZX} }
2332  };
2333 
2334  bool IsBitTest = TestBit != -1;
2335  bool Is64Bit = BW == 64;
2336  if (TestBit < 32 && TestBit >= 0)
2337  Is64Bit = false;
2338 
2339  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2340  const MCInstrDesc &II = TII.get(Opc);
2341 
2342  unsigned SrcReg = getRegForValue(LHS);
2343  if (!SrcReg)
2344  return false;
2345 
2346  if (BW == 64 && !Is64Bit)
2347  SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2348 
2349  if ((BW < 32) && !IsBitTest)
2350  SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2351 
2352  // Emit the combined compare and branch instruction.
2353  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2354  MachineInstrBuilder MIB =
2355  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2356  .addReg(SrcReg);
2357  if (IsBitTest)
2358  MIB.addImm(TestBit);
2359  MIB.addMBB(TBB);
2360 
2361  finishCondBranch(BI->getParent(), TBB, FBB);
2362  return true;
2363 }
2364 
2365 bool AArch64FastISel::selectBranch(const Instruction *I) {
2366  const BranchInst *BI = cast<BranchInst>(I);
2367  if (BI->isUnconditional()) {
2368  MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2369  fastEmitBranch(MSucc, BI->getDebugLoc());
2370  return true;
2371  }
2372 
2373  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2374  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2375 
2376  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2377  if (CI->hasOneUse() && isValueAvailable(CI)) {
2378  // Try to optimize or fold the cmp.
2379  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2380  switch (Predicate) {
2381  default:
2382  break;
2383  case CmpInst::FCMP_FALSE:
2384  fastEmitBranch(FBB, DbgLoc);
2385  return true;
2386  case CmpInst::FCMP_TRUE:
2387  fastEmitBranch(TBB, DbgLoc);
2388  return true;
2389  }
2390 
2391  // Try to emit a combined compare-and-branch first.
2392  if (emitCompareAndBranch(BI))
2393  return true;
2394 
2395  // Try to take advantage of fallthrough opportunities.
2396  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2397  std::swap(TBB, FBB);
2399  }
2400 
2401  // Emit the cmp.
2402  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2403  return false;
2404 
2405  // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2406  // instruction.
2409  switch (Predicate) {
2410  default:
2411  break;
2412  case CmpInst::FCMP_UEQ:
2413  ExtraCC = AArch64CC::EQ;
2414  CC = AArch64CC::VS;
2415  break;
2416  case CmpInst::FCMP_ONE:
2417  ExtraCC = AArch64CC::MI;
2418  CC = AArch64CC::GT;
2419  break;
2420  }
2421  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2422 
2423  // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2424  if (ExtraCC != AArch64CC::AL) {
2425  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2426  .addImm(ExtraCC)
2427  .addMBB(TBB);
2428  }
2429 
2430  // Emit the branch.
2431  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2432  .addImm(CC)
2433  .addMBB(TBB);
2434 
2435  finishCondBranch(BI->getParent(), TBB, FBB);
2436  return true;
2437  }
2438  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2439  uint64_t Imm = CI->getZExtValue();
2440  MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2441  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2442  .addMBB(Target);
2443 
2444  // Obtain the branch probability and add the target to the successor list.
2445  if (FuncInfo.BPI) {
2446  auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2447  BI->getParent(), Target->getBasicBlock());
2448  FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2449  } else
2450  FuncInfo.MBB->addSuccessorWithoutProb(Target);
2451  return true;
2452  } else {
2454  if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2455  // Fake request the condition, otherwise the intrinsic might be completely
2456  // optimized away.
2457  unsigned CondReg = getRegForValue(BI->getCondition());
2458  if (!CondReg)
2459  return false;
2460 
2461  // Emit the branch.
2462  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2463  .addImm(CC)
2464  .addMBB(TBB);
2465 
2466  finishCondBranch(BI->getParent(), TBB, FBB);
2467  return true;
2468  }
2469  }
2470 
2471  unsigned CondReg = getRegForValue(BI->getCondition());
2472  if (CondReg == 0)
2473  return false;
2474 
2475  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2476  unsigned Opcode = AArch64::TBNZW;
2477  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2478  std::swap(TBB, FBB);
2479  Opcode = AArch64::TBZW;
2480  }
2481 
2482  const MCInstrDesc &II = TII.get(Opcode);
2483  unsigned ConstrainedCondReg
2484  = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2485  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2486  .addReg(ConstrainedCondReg)
2487  .addImm(0)
2488  .addMBB(TBB);
2489 
2490  finishCondBranch(BI->getParent(), TBB, FBB);
2491  return true;
2492 }
2493 
2494 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2495  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2496  unsigned AddrReg = getRegForValue(BI->getOperand(0));
2497  if (AddrReg == 0)
2498  return false;
2499 
2500  // Emit the indirect branch.
2501  const MCInstrDesc &II = TII.get(AArch64::BR);
2502  AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2503  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2504 
2505  // Make sure the CFG is up-to-date.
2506  for (auto *Succ : BI->successors())
2507  FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2508 
2509  return true;
2510 }
2511 
2512 bool AArch64FastISel::selectCmp(const Instruction *I) {
2513  const CmpInst *CI = cast<CmpInst>(I);
2514 
2515  // Vectors of i1 are weird: bail out.
2516  if (CI->getType()->isVectorTy())
2517  return false;
2518 
2519  // Try to optimize or fold the cmp.
2520  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2521  unsigned ResultReg = 0;
2522  switch (Predicate) {
2523  default:
2524  break;
2525  case CmpInst::FCMP_FALSE:
2526  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2527  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2528  TII.get(TargetOpcode::COPY), ResultReg)
2529  .addReg(AArch64::WZR, getKillRegState(true));
2530  break;
2531  case CmpInst::FCMP_TRUE:
2532  ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2533  break;
2534  }
2535 
2536  if (ResultReg) {
2537  updateValueMap(I, ResultReg);
2538  return true;
2539  }
2540 
2541  // Emit the cmp.
2542  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2543  return false;
2544 
2545  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2546 
2547  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2548  // condition codes are inverted, because they are used by CSINC.
2549  static unsigned CondCodeTable[2][2] = {
2552  };
2553  unsigned *CondCodes = nullptr;
2554  switch (Predicate) {
2555  default:
2556  break;
2557  case CmpInst::FCMP_UEQ:
2558  CondCodes = &CondCodeTable[0][0];
2559  break;
2560  case CmpInst::FCMP_ONE:
2561  CondCodes = &CondCodeTable[1][0];
2562  break;
2563  }
2564 
2565  if (CondCodes) {
2566  unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2567  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2568  TmpReg1)
2569  .addReg(AArch64::WZR, getKillRegState(true))
2570  .addReg(AArch64::WZR, getKillRegState(true))
2571  .addImm(CondCodes[0]);
2572  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2573  ResultReg)
2574  .addReg(TmpReg1, getKillRegState(true))
2575  .addReg(AArch64::WZR, getKillRegState(true))
2576  .addImm(CondCodes[1]);
2577 
2578  updateValueMap(I, ResultReg);
2579  return true;
2580  }
2581 
2582  // Now set a register based on the comparison.
2584  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2585  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2586  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2587  ResultReg)
2588  .addReg(AArch64::WZR, getKillRegState(true))
2589  .addReg(AArch64::WZR, getKillRegState(true))
2590  .addImm(invertedCC);
2591 
2592  updateValueMap(I, ResultReg);
2593  return true;
2594 }
2595 
2596 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2597 /// value.
2599  if (!SI->getType()->isIntegerTy(1))
2600  return false;
2601 
2602  const Value *Src1Val, *Src2Val;
2603  unsigned Opc = 0;
2604  bool NeedExtraOp = false;
2605  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2606  if (CI->isOne()) {
2607  Src1Val = SI->getCondition();
2608  Src2Val = SI->getFalseValue();
2609  Opc = AArch64::ORRWrr;
2610  } else {
2611  assert(CI->isZero());
2612  Src1Val = SI->getFalseValue();
2613  Src2Val = SI->getCondition();
2614  Opc = AArch64::BICWrr;
2615  }
2616  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2617  if (CI->isOne()) {
2618  Src1Val = SI->getCondition();
2619  Src2Val = SI->getTrueValue();
2620  Opc = AArch64::ORRWrr;
2621  NeedExtraOp = true;
2622  } else {
2623  assert(CI->isZero());
2624  Src1Val = SI->getCondition();
2625  Src2Val = SI->getTrueValue();
2626  Opc = AArch64::ANDWrr;
2627  }
2628  }
2629 
2630  if (!Opc)
2631  return false;
2632 
2633  unsigned Src1Reg = getRegForValue(Src1Val);
2634  if (!Src1Reg)
2635  return false;
2636 
2637  unsigned Src2Reg = getRegForValue(Src2Val);
2638  if (!Src2Reg)
2639  return false;
2640 
2641  if (NeedExtraOp)
2642  Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2643 
2644  unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2645  Src2Reg);
2646  updateValueMap(SI, ResultReg);
2647  return true;
2648 }
2649 
2650 bool AArch64FastISel::selectSelect(const Instruction *I) {
2651  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2652  MVT VT;
2653  if (!isTypeSupported(I->getType(), VT))
2654  return false;
2655 
2656  unsigned Opc;
2657  const TargetRegisterClass *RC;
2658  switch (VT.SimpleTy) {
2659  default:
2660  return false;
2661  case MVT::i1:
2662  case MVT::i8:
2663  case MVT::i16:
2664  case MVT::i32:
2665  Opc = AArch64::CSELWr;
2666  RC = &AArch64::GPR32RegClass;
2667  break;
2668  case MVT::i64:
2669  Opc = AArch64::CSELXr;
2670  RC = &AArch64::GPR64RegClass;
2671  break;
2672  case MVT::f32:
2673  Opc = AArch64::FCSELSrrr;
2674  RC = &AArch64::FPR32RegClass;
2675  break;
2676  case MVT::f64:
2677  Opc = AArch64::FCSELDrrr;
2678  RC = &AArch64::FPR64RegClass;
2679  break;
2680  }
2681 
2682  const SelectInst *SI = cast<SelectInst>(I);
2683  const Value *Cond = SI->getCondition();
2686 
2687  if (optimizeSelect(SI))
2688  return true;
2689 
2690  // Try to pickup the flags, so we don't have to emit another compare.
2691  if (foldXALUIntrinsic(CC, I, Cond)) {
2692  // Fake request the condition to force emission of the XALU intrinsic.
2693  unsigned CondReg = getRegForValue(Cond);
2694  if (!CondReg)
2695  return false;
2696  } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2697  isValueAvailable(Cond)) {
2698  const auto *Cmp = cast<CmpInst>(Cond);
2699  // Try to optimize or fold the cmp.
2700  CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2701  const Value *FoldSelect = nullptr;
2702  switch (Predicate) {
2703  default:
2704  break;
2705  case CmpInst::FCMP_FALSE:
2706  FoldSelect = SI->getFalseValue();
2707  break;
2708  case CmpInst::FCMP_TRUE:
2709  FoldSelect = SI->getTrueValue();
2710  break;
2711  }
2712 
2713  if (FoldSelect) {
2714  unsigned SrcReg = getRegForValue(FoldSelect);
2715  if (!SrcReg)
2716  return false;
2717 
2718  updateValueMap(I, SrcReg);
2719  return true;
2720  }
2721 
2722  // Emit the cmp.
2723  if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2724  return false;
2725 
2726  // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2727  CC = getCompareCC(Predicate);
2728  switch (Predicate) {
2729  default:
2730  break;
2731  case CmpInst::FCMP_UEQ:
2732  ExtraCC = AArch64CC::EQ;
2733  CC = AArch64CC::VS;
2734  break;
2735  case CmpInst::FCMP_ONE:
2736  ExtraCC = AArch64CC::MI;
2737  CC = AArch64CC::GT;
2738  break;
2739  }
2740  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2741  } else {
2742  unsigned CondReg = getRegForValue(Cond);
2743  if (!CondReg)
2744  return false;
2745 
2746  const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2747  CondReg = constrainOperandRegClass(II, CondReg, 1);
2748 
2749  // Emit a TST instruction (ANDS wzr, reg, #imm).
2750  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2751  AArch64::WZR)
2752  .addReg(CondReg)
2754  }
2755 
2756  unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2757  unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2758 
2759  if (!Src1Reg || !Src2Reg)
2760  return false;
2761 
2762  if (ExtraCC != AArch64CC::AL)
2763  Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2764 
2765  unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2766  updateValueMap(I, ResultReg);
2767  return true;
2768 }
2769 
2770 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2771  Value *V = I->getOperand(0);
2772  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2773  return false;
2774 
2775  unsigned Op = getRegForValue(V);
2776  if (Op == 0)
2777  return false;
2778 
2779  unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2780  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2781  ResultReg).addReg(Op);
2782  updateValueMap(I, ResultReg);
2783  return true;
2784 }
2785 
2786 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2787  Value *V = I->getOperand(0);
2788  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2789  return false;
2790 
2791  unsigned Op = getRegForValue(V);
2792  if (Op == 0)
2793  return false;
2794 
2795  unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2796  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2797  ResultReg).addReg(Op);
2798  updateValueMap(I, ResultReg);
2799  return true;
2800 }
2801 
2802 // FPToUI and FPToSI
2803 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2804  MVT DestVT;
2805  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2806  return false;
2807 
2808  unsigned SrcReg = getRegForValue(I->getOperand(0));
2809  if (SrcReg == 0)
2810  return false;
2811 
2812  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2813  if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2814  return false;
2815 
2816  unsigned Opc;
2817  if (SrcVT == MVT::f64) {
2818  if (Signed)
2819  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2820  else
2821  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2822  } else {
2823  if (Signed)
2824  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2825  else
2826  Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2827  }
2828  unsigned ResultReg = createResultReg(
2829  DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2830  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2831  .addReg(SrcReg);
2832  updateValueMap(I, ResultReg);
2833  return true;
2834 }
2835 
2836 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2837  MVT DestVT;
2838  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2839  return false;
2840  // Let regular ISEL handle FP16
2841  if (DestVT == MVT::f16)
2842  return false;
2843 
2844  assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2845  "Unexpected value type.");
2846 
2847  unsigned SrcReg = getRegForValue(I->getOperand(0));
2848  if (!SrcReg)
2849  return false;
2850 
2851  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2852 
2853  // Handle sign-extension.
2854  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2855  SrcReg =
2856  emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2857  if (!SrcReg)
2858  return false;
2859  }
2860 
2861  unsigned Opc;
2862  if (SrcVT == MVT::i64) {
2863  if (Signed)
2864  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2865  else
2866  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2867  } else {
2868  if (Signed)
2869  Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2870  else
2871  Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2872  }
2873 
2874  unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2875  updateValueMap(I, ResultReg);
2876  return true;
2877 }
2878 
2879 bool AArch64FastISel::fastLowerArguments() {
2880  if (!FuncInfo.CanLowerReturn)
2881  return false;
2882 
2883  const Function *F = FuncInfo.Fn;
2884  if (F->isVarArg())
2885  return false;
2886 
2887  CallingConv::ID CC = F->getCallingConv();
2888  if (CC != CallingConv::C && CC != CallingConv::Swift)
2889  return false;
2890 
2891  if (Subtarget->hasCustomCallingConv())
2892  return false;
2893 
2894  // Only handle simple cases of up to 8 GPR and FPR each.
2895  unsigned GPRCnt = 0;
2896  unsigned FPRCnt = 0;
2897  for (auto const &Arg : F->args()) {
2898  if (Arg.hasAttribute(Attribute::ByVal) ||
2899  Arg.hasAttribute(Attribute::InReg) ||
2900  Arg.hasAttribute(Attribute::StructRet) ||
2901  Arg.hasAttribute(Attribute::SwiftSelf) ||
2902  Arg.hasAttribute(Attribute::SwiftAsync) ||
2903  Arg.hasAttribute(Attribute::SwiftError) ||
2904  Arg.hasAttribute(Attribute::Nest))
2905  return false;
2906 
2907  Type *ArgTy = Arg.getType();
2908  if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2909  return false;
2910 
2911  EVT ArgVT = TLI.getValueType(DL, ArgTy);
2912  if (!ArgVT.isSimple())
2913  return false;
2914 
2915  MVT VT = ArgVT.getSimpleVT().SimpleTy;
2916  if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2917  return false;
2918 
2919  if (VT.isVector() &&
2920  (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2921  return false;
2922 
2923  if (VT >= MVT::i1 && VT <= MVT::i64)
2924  ++GPRCnt;
2925  else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2926  VT.is128BitVector())
2927  ++FPRCnt;
2928  else
2929  return false;
2930 
2931  if (GPRCnt > 8 || FPRCnt > 8)
2932  return false;
2933  }
2934 
2935  static const MCPhysReg Registers[6][8] = {
2936  { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2937  AArch64::W5, AArch64::W6, AArch64::W7 },
2938  { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2939  AArch64::X5, AArch64::X6, AArch64::X7 },
2940  { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2941  AArch64::H5, AArch64::H6, AArch64::H7 },
2942  { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2943  AArch64::S5, AArch64::S6, AArch64::S7 },
2944  { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2945  AArch64::D5, AArch64::D6, AArch64::D7 },
2946  { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2947  AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2948  };
2949 
2950  unsigned GPRIdx = 0;
2951  unsigned FPRIdx = 0;
2952  for (auto const &Arg : F->args()) {
2953  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2954  unsigned SrcReg;
2955  const TargetRegisterClass *RC;
2956  if (VT >= MVT::i1 && VT <= MVT::i32) {
2957  SrcReg = Registers[0][GPRIdx++];
2958  RC = &AArch64::GPR32RegClass;
2959  VT = MVT::i32;
2960  } else if (VT == MVT::i64) {
2961  SrcReg = Registers[1][GPRIdx++];
2962  RC = &AArch64::GPR64RegClass;
2963  } else if (VT == MVT::f16) {
2964  SrcReg = Registers[2][FPRIdx++];
2965  RC = &AArch64::FPR16RegClass;
2966  } else if (VT == MVT::f32) {
2967  SrcReg = Registers[3][FPRIdx++];
2968  RC = &AArch64::FPR32RegClass;
2969  } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2970  SrcReg = Registers[4][FPRIdx++];
2971  RC = &AArch64::FPR64RegClass;
2972  } else if (VT.is128BitVector()) {
2973  SrcReg = Registers[5][FPRIdx++];
2974  RC = &AArch64::FPR128RegClass;
2975  } else
2976  llvm_unreachable("Unexpected value type.");
2977 
2978  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2979  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2980  // Without this, EmitLiveInCopies may eliminate the livein if its only
2981  // use is a bitcast (which isn't turned into an instruction).
2982  unsigned ResultReg = createResultReg(RC);
2983  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2984  TII.get(TargetOpcode::COPY), ResultReg)
2985  .addReg(DstReg, getKillRegState(true));
2986  updateValueMap(&Arg, ResultReg);
2987  }
2988  return true;
2989 }
2990 
2991 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2992  SmallVectorImpl<MVT> &OutVTs,
2993  unsigned &NumBytes) {
2994  CallingConv::ID CC = CLI.CallConv;
2996  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2997  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2998 
2999  // Get a count of how many bytes are to be pushed on the stack.
3000  NumBytes = CCInfo.getNextStackOffset();
3001 
3002  // Issue CALLSEQ_START
3003  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3004  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3005  .addImm(NumBytes).addImm(0);
3006 
3007  // Process the args.
3008  for (CCValAssign &VA : ArgLocs) {
3009  const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3010  MVT ArgVT = OutVTs[VA.getValNo()];
3011 
3012  unsigned ArgReg = getRegForValue(ArgVal);
3013  if (!ArgReg)
3014  return false;
3015 
3016  // Handle arg promotion: SExt, ZExt, AExt.
3017  switch (VA.getLocInfo()) {
3018  case CCValAssign::Full:
3019  break;
3020  case CCValAssign::SExt: {
3021  MVT DestVT = VA.getLocVT();
3022  MVT SrcVT = ArgVT;
3023  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3024  if (!ArgReg)
3025  return false;
3026  break;
3027  }
3028  case CCValAssign::AExt:
3029  // Intentional fall-through.
3030  case CCValAssign::ZExt: {
3031  MVT DestVT = VA.getLocVT();
3032  MVT SrcVT = ArgVT;
3033  ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3034  if (!ArgReg)
3035  return false;
3036  break;
3037  }
3038  default:
3039  llvm_unreachable("Unknown arg promotion!");
3040  }
3041 
3042  // Now copy/store arg to correct locations.
3043  if (VA.isRegLoc() && !VA.needsCustom()) {
3044  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3045  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3046  CLI.OutRegs.push_back(VA.getLocReg());
3047  } else if (VA.needsCustom()) {
3048  // FIXME: Handle custom args.
3049  return false;
3050  } else {
3051  assert(VA.isMemLoc() && "Assuming store on stack.");
3052 
3053  // Don't emit stores for undef values.
3054  if (isa<UndefValue>(ArgVal))
3055  continue;
3056 
3057  // Need to store on the stack.
3058  unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3059 
3060  unsigned BEAlign = 0;
3061  if (ArgSize < 8 && !Subtarget->isLittleEndian())
3062  BEAlign = 8 - ArgSize;
3063 
3064  Address Addr;
3065  Addr.setKind(Address::RegBase);
3066  Addr.setReg(AArch64::SP);
3067  Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3068 
3069  Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3070  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3071  MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3072  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3073 
3074  if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3075  return false;
3076  }
3077  }
3078  return true;
3079 }
3080 
3081 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3082  unsigned NumBytes) {
3083  CallingConv::ID CC = CLI.CallConv;
3084 
3085  // Issue CALLSEQ_END
3086  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3087  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3088  .addImm(NumBytes).addImm(0);
3089 
3090  // Now the return value.
3091  if (RetVT != MVT::isVoid) {
3093  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3094  CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3095 
3096  // Only handle a single return value.
3097  if (RVLocs.size() != 1)
3098  return false;
3099 
3100  // Copy all of the result registers out of their specified physreg.
3101  MVT CopyVT = RVLocs[0].getValVT();
3102 
3103  // TODO: Handle big-endian results
3104  if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3105  return false;
3106 
3107  unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3108  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3109  TII.get(TargetOpcode::COPY), ResultReg)
3110  .addReg(RVLocs[0].getLocReg());
3111  CLI.InRegs.push_back(RVLocs[0].getLocReg());
3112 
3113  CLI.ResultReg = ResultReg;
3114  CLI.NumResultRegs = 1;
3115  }
3116 
3117  return true;
3118 }
3119 
3120 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3121  CallingConv::ID CC = CLI.CallConv;
3122  bool IsTailCall = CLI.IsTailCall;
3123  bool IsVarArg = CLI.IsVarArg;
3124  const Value *Callee = CLI.Callee;
3125  MCSymbol *Symbol = CLI.Symbol;
3126 
3127  if (!Callee && !Symbol)
3128  return false;
3129 
3130  // Allow SelectionDAG isel to handle tail calls.
3131  if (IsTailCall)
3132  return false;
3133 
3134  // FIXME: we could and should support this, but for now correctness at -O0 is
3135  // more important.
3136  if (Subtarget->isTargetILP32())
3137  return false;
3138 
3139  CodeModel::Model CM = TM.getCodeModel();
3140  // Only support the small-addressing and large code models.
3141  if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3142  return false;
3143 
3144  // FIXME: Add large code model support for ELF.
3145  if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3146  return false;
3147 
3148  // Let SDISel handle vararg functions.
3149  if (IsVarArg)
3150  return false;
3151 
3152  // FIXME: Only handle *simple* calls for now.
3153  MVT RetVT;
3154  if (CLI.RetTy->isVoidTy())
3155  RetVT = MVT::isVoid;
3156  else if (!isTypeLegal(CLI.RetTy, RetVT))
3157  return false;
3158 
3159  for (auto Flag : CLI.OutFlags)
3160  if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3161  Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3162  return false;
3163 
3164  // Set up the argument vectors.
3165  SmallVector<MVT, 16> OutVTs;
3166  OutVTs.reserve(CLI.OutVals.size());
3167 
3168  for (auto *Val : CLI.OutVals) {
3169  MVT VT;
3170  if (!isTypeLegal(Val->getType(), VT) &&
3171  !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3172  return false;
3173 
3174  // We don't handle vector parameters yet.
3175  if (VT.isVector() || VT.getSizeInBits() > 64)
3176  return false;
3177 
3178  OutVTs.push_back(VT);
3179  }
3180 
3181  Address Addr;
3182  if (Callee && !computeCallAddress(Callee, Addr))
3183  return false;
3184 
3185  // The weak function target may be zero; in that case we must use indirect
3186  // addressing via a stub on windows as it may be out of range for a
3187  // PC-relative jump.
3188  if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3189  Addr.getGlobalValue()->hasExternalWeakLinkage())
3190  return false;
3191 
3192  // Handle the arguments now that we've gotten them.
3193  unsigned NumBytes;
3194  if (!processCallArgs(CLI, OutVTs, NumBytes))
3195  return false;
3196 
3197  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3198  if (RegInfo->isAnyArgRegReserved(*MF))
3199  RegInfo->emitReservedArgRegCallError(*MF);
3200 
3201  // Issue the call.
3202  MachineInstrBuilder MIB;
3203  if (Subtarget->useSmallAddressing()) {
3204  const MCInstrDesc &II =
3205  TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3206  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3207  if (Symbol)
3208  MIB.addSym(Symbol, 0);
3209  else if (Addr.getGlobalValue())
3210  MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3211  else if (Addr.getReg()) {
3212  unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3213  MIB.addReg(Reg);
3214  } else
3215  return false;
3216  } else {
3217  unsigned CallReg = 0;
3218  if (Symbol) {
3219  unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3220  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3221  ADRPReg)
3223 
3224  CallReg = createResultReg(&AArch64::GPR64RegClass);
3225  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3226  TII.get(AArch64::LDRXui), CallReg)
3227  .addReg(ADRPReg)
3228  .addSym(Symbol,
3230  } else if (Addr.getGlobalValue())
3231  CallReg = materializeGV(Addr.getGlobalValue());
3232  else if (Addr.getReg())
3233  CallReg = Addr.getReg();
3234 
3235  if (!CallReg)
3236  return false;
3237 
3238  const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3239  CallReg = constrainOperandRegClass(II, CallReg, 0);
3240  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3241  }
3242 
3243  // Add implicit physical register uses to the call.
3244  for (auto Reg : CLI.OutRegs)
3246 
3247  // Add a register mask with the call-preserved registers.
3248  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3249  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3250 
3251  CLI.Call = MIB;
3252 
3253  // Finish off the call including any return values.
3254  return finishCall(CLI, RetVT, NumBytes);
3255 }
3256 
3257 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3258  if (Alignment)
3259  return Len / Alignment <= 4;
3260  else
3261  return Len < 32;
3262 }
3263 
3264 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3265  uint64_t Len, unsigned Alignment) {
3266  // Make sure we don't bloat code by inlining very large memcpy's.
3267  if (!isMemCpySmall(Len, Alignment))
3268  return false;
3269 
3270  int64_t UnscaledOffset = 0;
3271  Address OrigDest = Dest;
3272  Address OrigSrc = Src;
3273 
3274  while (Len) {
3275  MVT VT;
3276  if (!Alignment || Alignment >= 8) {
3277  if (Len >= 8)
3278  VT = MVT::i64;
3279  else if (Len >= 4)
3280  VT = MVT::i32;
3281  else if (Len >= 2)
3282  VT = MVT::i16;
3283  else {
3284  VT = MVT::i8;
3285  }
3286  } else {
3287  // Bound based on alignment.
3288  if (Len >= 4 && Alignment == 4)
3289  VT = MVT::i32;
3290  else if (Len >= 2 && Alignment == 2)
3291  VT = MVT::i16;
3292  else {
3293  VT = MVT::i8;
3294  }
3295  }
3296 
3297  unsigned ResultReg = emitLoad(VT, VT, Src);
3298  if (!ResultReg)
3299  return false;
3300 
3301  if (!emitStore(VT, ResultReg, Dest))
3302  return false;
3303 
3304  int64_t Size = VT.getSizeInBits() / 8;
3305  Len -= Size;
3306  UnscaledOffset += Size;
3307 
3308  // We need to recompute the unscaled offset for each iteration.
3309  Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3310  Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3311  }
3312 
3313  return true;
3314 }
3315 
3316 /// Check if it is possible to fold the condition from the XALU intrinsic
3317 /// into the user. The condition code will only be updated on success.
3318 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3319  const Instruction *I,
3320  const Value *Cond) {
3321  if (!isa<ExtractValueInst>(Cond))
3322  return false;
3323 
3324  const auto *EV = cast<ExtractValueInst>(Cond);
3325  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3326  return false;
3327 
3328  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3329  MVT RetVT;
3330  const Function *Callee = II->getCalledFunction();
3331  Type *RetTy =
3332  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3333  if (!isTypeLegal(RetTy, RetVT))
3334  return false;
3335 
3336  if (RetVT != MVT::i32 && RetVT != MVT::i64)
3337  return false;
3338 
3339  const Value *LHS = II->getArgOperand(0);
3340  const Value *RHS = II->getArgOperand(1);
3341 
3342  // Canonicalize immediate to the RHS.
3343  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3344  std::swap(LHS, RHS);
3345 
3346  // Simplify multiplies.
3347  Intrinsic::ID IID = II->getIntrinsicID();
3348  switch (IID) {
3349  default:
3350  break;
3351  case Intrinsic::smul_with_overflow:
3352  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3353  if (C->getValue() == 2)
3354  IID = Intrinsic::sadd_with_overflow;
3355  break;
3356  case Intrinsic::umul_with_overflow:
3357  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3358  if (C->getValue() == 2)
3359  IID = Intrinsic::uadd_with_overflow;
3360  break;
3361  }
3362 
3363  AArch64CC::CondCode TmpCC;
3364  switch (IID) {
3365  default:
3366  return false;
3367  case Intrinsic::sadd_with_overflow:
3368  case Intrinsic::ssub_with_overflow:
3369  TmpCC = AArch64CC::VS;
3370  break;
3371  case Intrinsic::uadd_with_overflow:
3372  TmpCC = AArch64CC::HS;
3373  break;
3374  case Intrinsic::usub_with_overflow:
3375  TmpCC = AArch64CC::LO;
3376  break;
3377  case Intrinsic::smul_with_overflow:
3378  case Intrinsic::umul_with_overflow:
3379  TmpCC = AArch64CC::NE;
3380  break;
3381  }
3382 
3383  // Check if both instructions are in the same basic block.
3384  if (!isValueAvailable(II))
3385  return false;
3386 
3387  // Make sure nothing is in the way
3390  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3391  // We only expect extractvalue instructions between the intrinsic and the
3392  // instruction to be selected.
3393  if (!isa<ExtractValueInst>(Itr))
3394  return false;
3395 
3396  // Check that the extractvalue operand comes from the intrinsic.
3397  const auto *EVI = cast<ExtractValueInst>(Itr);
3398  if (EVI->getAggregateOperand() != II)
3399  return false;
3400  }
3401 
3402  CC = TmpCC;
3403  return true;
3404 }
3405 
3406 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3407  // FIXME: Handle more intrinsics.
3408  switch (II->getIntrinsicID()) {
3409  default: return false;
3410  case Intrinsic::frameaddress: {
3411  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3412  MFI.setFrameAddressIsTaken(true);
3413 
3414  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3415  Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3416  Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3417  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3418  TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3419  // Recursively load frame address
3420  // ldr x0, [fp]
3421  // ldr x0, [x0]
3422  // ldr x0, [x0]
3423  // ...
3424  unsigned DestReg;
3425  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3426  while (Depth--) {
3427  DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3428  SrcReg, 0);
3429  assert(DestReg && "Unexpected LDR instruction emission failure.");
3430  SrcReg = DestReg;
3431  }
3432 
3433  updateValueMap(II, SrcReg);
3434  return true;
3435  }
3436  case Intrinsic::sponentry: {
3437  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3438 
3439  // SP = FP + Fixed Object + 16
3440  int FI = MFI.CreateFixedObject(4, 0, false);
3441  unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3442  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3443  TII.get(AArch64::ADDXri), ResultReg)
3444  .addFrameIndex(FI)
3445  .addImm(0)
3446  .addImm(0);
3447 
3448  updateValueMap(II, ResultReg);
3449  return true;
3450  }
3451  case Intrinsic::memcpy:
3452  case Intrinsic::memmove: {
3453  const auto *MTI = cast<MemTransferInst>(II);
3454  // Don't handle volatile.
3455  if (MTI->isVolatile())
3456  return false;
3457 
3458  // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3459  // we would emit dead code because we don't currently handle memmoves.
3460  bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3461  if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3462  // Small memcpy's are common enough that we want to do them without a call
3463  // if possible.
3464  uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3465  unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3466  MTI->getSourceAlignment());
3467  if (isMemCpySmall(Len, Alignment)) {
3468  Address Dest, Src;
3469  if (!computeAddress(MTI->getRawDest(), Dest) ||
3470  !computeAddress(MTI->getRawSource(), Src))
3471  return false;
3472  if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3473  return true;
3474  }
3475  }
3476 
3477  if (!MTI->getLength()->getType()->isIntegerTy(64))
3478  return false;
3479 
3480  if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3481  // Fast instruction selection doesn't support the special
3482  // address spaces.
3483  return false;
3484 
3485  const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3486  return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3487  }
3488  case Intrinsic::memset: {
3489  const MemSetInst *MSI = cast<MemSetInst>(II);
3490  // Don't handle volatile.
3491  if (MSI->isVolatile())
3492  return false;
3493 
3494  if (!MSI->getLength()->getType()->isIntegerTy(64))
3495  return false;
3496 
3497  if (MSI->getDestAddressSpace() > 255)
3498  // Fast instruction selection doesn't support the special
3499  // address spaces.
3500  return false;
3501 
3502  return lowerCallTo(II, "memset", II->arg_size() - 1);
3503  }
3504  case Intrinsic::sin:
3505  case Intrinsic::cos:
3506  case Intrinsic::pow: {
3507  MVT RetVT;
3508  if (!isTypeLegal(II->getType(), RetVT))
3509  return false;
3510 
3511  if (RetVT != MVT::f32 && RetVT != MVT::f64)
3512  return false;
3513 
3514  static const RTLIB::Libcall LibCallTable[3][2] = {
3515  { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3516  { RTLIB::COS_F32, RTLIB::COS_F64 },
3517  { RTLIB::POW_F32, RTLIB::POW_F64 }
3518  };
3519  RTLIB::Libcall LC;
3520  bool Is64Bit = RetVT == MVT::f64;
3521  switch (II->getIntrinsicID()) {
3522  default:
3523  llvm_unreachable("Unexpected intrinsic.");
3524  case Intrinsic::sin:
3525  LC = LibCallTable[0][Is64Bit];
3526  break;
3527  case Intrinsic::cos:
3528  LC = LibCallTable[1][Is64Bit];
3529  break;
3530  case Intrinsic::pow:
3531  LC = LibCallTable[2][Is64Bit];
3532  break;
3533  }
3534 
3535  ArgListTy Args;
3536  Args.reserve(II->arg_size());
3537 
3538  // Populate the argument list.
3539  for (auto &Arg : II->args()) {
3540  ArgListEntry Entry;
3541  Entry.Val = Arg;
3542  Entry.Ty = Arg->getType();
3543  Args.push_back(Entry);
3544  }
3545 
3546  CallLoweringInfo CLI;
3547  MCContext &Ctx = MF->getContext();
3548  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3549  TLI.getLibcallName(LC), std::move(Args));
3550  if (!lowerCallTo(CLI))
3551  return false;
3552  updateValueMap(II, CLI.ResultReg);
3553  return true;
3554  }
3555  case Intrinsic::fabs: {
3556  MVT VT;
3557  if (!isTypeLegal(II->getType(), VT))
3558  return false;
3559 
3560  unsigned Opc;
3561  switch (VT.SimpleTy) {
3562  default:
3563  return false;
3564  case MVT::f32:
3565  Opc = AArch64::FABSSr;
3566  break;
3567  case MVT::f64:
3568  Opc = AArch64::FABSDr;
3569  break;
3570  }
3571  unsigned SrcReg = getRegForValue(II->getOperand(0));
3572  if (!SrcReg)
3573  return false;
3574  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3575  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3576  .addReg(SrcReg);
3577  updateValueMap(II, ResultReg);
3578  return true;
3579  }
3580  case Intrinsic::trap:
3581  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3582  .addImm(1);
3583  return true;
3584  case Intrinsic::debugtrap:
3585  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3586  .addImm(0xF000);
3587  return true;
3588 
3589  case Intrinsic::sqrt: {
3590  Type *RetTy = II->getCalledFunction()->getReturnType();
3591 
3592  MVT VT;
3593  if (!isTypeLegal(RetTy, VT))
3594  return false;
3595 
3596  unsigned Op0Reg = getRegForValue(II->getOperand(0));
3597  if (!Op0Reg)
3598  return false;
3599 
3600  unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3601  if (!ResultReg)
3602  return false;
3603 
3604  updateValueMap(II, ResultReg);
3605  return true;
3606  }
3607  case Intrinsic::sadd_with_overflow:
3608  case Intrinsic::uadd_with_overflow:
3609  case Intrinsic::ssub_with_overflow:
3610  case Intrinsic::usub_with_overflow:
3611  case Intrinsic::smul_with_overflow:
3612  case Intrinsic::umul_with_overflow: {
3613  // This implements the basic lowering of the xalu with overflow intrinsics.
3614  const Function *Callee = II->getCalledFunction();
3615  auto *Ty = cast<StructType>(Callee->getReturnType());
3616  Type *RetTy = Ty->getTypeAtIndex(0U);
3617 
3618  MVT VT;
3619  if (!isTypeLegal(RetTy, VT))
3620  return false;
3621 
3622  if (VT != MVT::i32 && VT != MVT::i64)
3623  return false;
3624 
3625  const Value *LHS = II->getArgOperand(0);
3626  const Value *RHS = II->getArgOperand(1);
3627  // Canonicalize immediate to the RHS.
3628  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3629  std::swap(LHS, RHS);
3630 
3631  // Simplify multiplies.
3632  Intrinsic::ID IID = II->getIntrinsicID();
3633  switch (IID) {
3634  default:
3635  break;
3636  case Intrinsic::smul_with_overflow:
3637  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3638  if (C->getValue() == 2) {
3639  IID = Intrinsic::sadd_with_overflow;
3640  RHS = LHS;
3641  }
3642  break;
3643  case Intrinsic::umul_with_overflow:
3644  if (const auto *C = dyn_cast<ConstantInt>(RHS))
3645  if (C->getValue() == 2) {
3646  IID = Intrinsic::uadd_with_overflow;
3647  RHS = LHS;
3648  }
3649  break;
3650  }
3651 
3652  unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3654  switch (IID) {
3655  default: llvm_unreachable("Unexpected intrinsic!");
3656  case Intrinsic::sadd_with_overflow:
3657  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3658  CC = AArch64CC::VS;
3659  break;
3660  case Intrinsic::uadd_with_overflow:
3661  ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3662  CC = AArch64CC::HS;
3663  break;
3664  case Intrinsic::ssub_with_overflow:
3665  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3666  CC = AArch64CC::VS;
3667  break;
3668  case Intrinsic::usub_with_overflow:
3669  ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3670  CC = AArch64CC::LO;
3671  break;
3672  case Intrinsic::smul_with_overflow: {
3673  CC = AArch64CC::NE;
3674  unsigned LHSReg = getRegForValue(LHS);
3675  if (!LHSReg)
3676  return false;
3677 
3678  unsigned RHSReg = getRegForValue(RHS);
3679  if (!RHSReg)
3680  return false;
3681 
3682  if (VT == MVT::i32) {
3683  MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3684  unsigned MulSubReg =
3685  fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3686  // cmp xreg, wreg, sxtw
3687  emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3688  AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3689  /*WantResult=*/false);
3690  MulReg = MulSubReg;
3691  } else {
3692  assert(VT == MVT::i64 && "Unexpected value type.");
3693  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3694  // reused in the next instruction.
3695  MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3696  unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3697  emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3698  /*WantResult=*/false);
3699  }
3700  break;
3701  }
3702  case Intrinsic::umul_with_overflow: {
3703  CC = AArch64CC::NE;
3704  unsigned LHSReg = getRegForValue(LHS);
3705  if (!LHSReg)
3706  return false;
3707 
3708  unsigned RHSReg = getRegForValue(RHS);
3709  if (!RHSReg)
3710  return false;
3711 
3712  if (VT == MVT::i32) {
3713  MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3714  // tst xreg, #0xffffffff00000000
3715  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3716  TII.get(AArch64::ANDSXri), AArch64::XZR)
3717  .addReg(MulReg)
3718  .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3719  MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3720  } else {
3721  assert(VT == MVT::i64 && "Unexpected value type.");
3722  // LHSReg and RHSReg cannot be killed by this Mul, since they are
3723  // reused in the next instruction.
3724  MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3725  unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3726  emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3727  }
3728  break;
3729  }
3730  }
3731 
3732  if (MulReg) {
3733  ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3734  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3735  TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3736  }
3737 
3738  if (!ResultReg1)
3739  return false;
3740 
3741  ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3742  AArch64::WZR, AArch64::WZR,
3743  getInvertedCondCode(CC));
3744  (void)ResultReg2;
3745  assert((ResultReg1 + 1) == ResultReg2 &&
3746  "Nonconsecutive result registers.");
3747  updateValueMap(II, ResultReg1, 2);
3748  return true;
3749  }
3750  }
3751  return false;
3752 }
3753 
3754 bool AArch64FastISel::selectRet(const Instruction *I) {
3755  const ReturnInst *Ret = cast<ReturnInst>(I);
3756  const Function &F = *I->getParent()->getParent();
3757 
3758  if (!FuncInfo.CanLowerReturn)
3759  return false;
3760 
3761  if (F.isVarArg())
3762  return false;
3763 
3764  if (TLI.supportSwiftError() &&
3765  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3766  return false;
3767 
3768  if (TLI.supportSplitCSR(FuncInfo.MF))
3769  return false;
3770 
3771  // Build a list of return value registers.
3772  SmallVector<unsigned, 4> RetRegs;
3773 
3774  if (Ret->getNumOperands() > 0) {
3775  CallingConv::ID CC = F.getCallingConv();
3777  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3778 
3779  // Analyze operands of the call, assigning locations to each operand.
3781  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3784  CCInfo.AnalyzeReturn(Outs, RetCC);
3785 
3786  // Only handle a single return value for now.
3787  if (ValLocs.size() != 1)
3788  return false;
3789 
3790  CCValAssign &VA = ValLocs[0];
3791  const Value *RV = Ret->getOperand(0);
3792 
3793  // Don't bother handling odd stuff for now.
3794  if ((VA.getLocInfo() != CCValAssign::Full) &&
3795  (VA.getLocInfo() != CCValAssign::BCvt))
3796  return false;
3797 
3798  // Only handle register returns for now.
3799  if (!VA.isRegLoc())
3800  return false;
3801 
3802  unsigned Reg = getRegForValue(RV);
3803  if (Reg == 0)
3804  return false;
3805 
3806  unsigned SrcReg = Reg + VA.getValNo();
3807  Register DestReg = VA.getLocReg();
3808  // Avoid a cross-class copy. This is very unlikely.
3809  if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3810  return false;
3811 
3812  EVT RVEVT = TLI.getValueType(DL, RV->getType());
3813  if (!RVEVT.isSimple())
3814  return false;
3815 
3816  // Vectors (of > 1 lane) in big endian need tricky handling.
3817  if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3818  !Subtarget->isLittleEndian())
3819  return false;
3820 
3821  MVT RVVT = RVEVT.getSimpleVT();
3822  if (RVVT == MVT::f128)
3823  return false;
3824 
3825  MVT DestVT = VA.getValVT();
3826  // Special handling for extended integers.
3827  if (RVVT != DestVT) {
3828  if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3829  return false;
3830 
3831  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3832  return false;
3833 
3834  bool IsZExt = Outs[0].Flags.isZExt();
3835  SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3836  if (SrcReg == 0)
3837  return false;
3838  }
3839 
3840  // "Callee" (i.e. value producer) zero extends pointers at function
3841  // boundary.
3842  if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3843  SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3844 
3845  // Make the copy.
3846  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3847  TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3848 
3849  // Add register to return instruction.
3850  RetRegs.push_back(VA.getLocReg());
3851  }
3852 
3853  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3854  TII.get(AArch64::RET_ReallyLR));
3855  for (unsigned RetReg : RetRegs)
3856  MIB.addReg(RetReg, RegState::Implicit);
3857  return true;
3858 }
3859 
3860 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3861  Type *DestTy = I->getType();
3862  Value *Op = I->getOperand(0);
3863  Type *SrcTy = Op->getType();
3864 
3865  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3866  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3867  if (!SrcEVT.isSimple())
3868  return false;
3869  if (!DestEVT.isSimple())
3870  return false;
3871 
3872  MVT SrcVT = SrcEVT.getSimpleVT();
3873  MVT DestVT = DestEVT.getSimpleVT();
3874 
3875  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3876  SrcVT != MVT::i8)
3877  return false;
3878  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3879  DestVT != MVT::i1)
3880  return false;
3881 
3882  unsigned SrcReg = getRegForValue(Op);
3883  if (!SrcReg)
3884  return false;
3885 
3886  // If we're truncating from i64 to a smaller non-legal type then generate an
3887  // AND. Otherwise, we know the high bits are undefined and a truncate only
3888  // generate a COPY. We cannot mark the source register also as result
3889  // register, because this can incorrectly transfer the kill flag onto the
3890  // source register.
3891  unsigned ResultReg;
3892  if (SrcVT == MVT::i64) {
3893  uint64_t Mask = 0;
3894  switch (DestVT.SimpleTy) {
3895  default:
3896  // Trunc i64 to i32 is handled by the target-independent fast-isel.
3897  return false;
3898  case MVT::i1:
3899  Mask = 0x1;
3900  break;
3901  case MVT::i8:
3902  Mask = 0xff;
3903  break;
3904  case MVT::i16:
3905  Mask = 0xffff;
3906  break;
3907  }
3908  // Issue an extract_subreg to get the lower 32-bits.
3909  unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3910  AArch64::sub_32);
3911  // Create the AND instruction which performs the actual truncation.
3912  ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3913  assert(ResultReg && "Unexpected AND instruction emission failure.");
3914  } else {
3915  ResultReg = createResultReg(&AArch64::GPR32RegClass);
3916  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3917  TII.get(TargetOpcode::COPY), ResultReg)
3918  .addReg(SrcReg);
3919  }
3920 
3921  updateValueMap(I, ResultReg);
3922  return true;
3923 }
3924 
3925 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3926  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3927  DestVT == MVT::i64) &&
3928  "Unexpected value type.");
3929  // Handle i8 and i16 as i32.
3930  if (DestVT == MVT::i8 || DestVT == MVT::i16)
3931  DestVT = MVT::i32;
3932 
3933  if (IsZExt) {
3934  unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
3935  assert(ResultReg && "Unexpected AND instruction emission failure.");
3936  if (DestVT == MVT::i64) {
3937  // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
3938  // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
3939  Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3940  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3941  TII.get(AArch64::SUBREG_TO_REG), Reg64)
3942  .addImm(0)
3943  .addReg(ResultReg)
3944  .addImm(AArch64::sub_32);
3945  ResultReg = Reg64;
3946  }
3947  return ResultReg;
3948  } else {
3949  if (DestVT == MVT::i64) {
3950  // FIXME: We're SExt i1 to i64.
3951  return 0;
3952  }
3953  return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3954  0, 0);
3955  }
3956 }
3957 
3958 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3959  unsigned Opc, ZReg;
3960  switch (RetVT.SimpleTy) {
3961  default: return 0;
3962  case MVT::i8:
3963  case MVT::i16:
3964  case MVT::i32:
3965  RetVT = MVT::i32;
3966  Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3967  case MVT::i64:
3968  Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3969  }
3970 
3971  const TargetRegisterClass *RC =
3972  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3973  return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
3974 }
3975 
3976 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3977  if (RetVT != MVT::i64)
3978  return 0;
3979 
3980  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3981  Op0, Op1, AArch64::XZR);
3982 }
3983 
3984 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3985  if (RetVT != MVT::i64)
3986  return 0;
3987 
3988  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3989  Op0, Op1, AArch64::XZR);
3990 }
3991 
3992 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
3993  unsigned Op1Reg) {
3994  unsigned Opc = 0;
3995  bool NeedTrunc = false;
3996  uint64_t Mask = 0;
3997  switch (RetVT.SimpleTy) {
3998  default: return 0;
3999  case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4000  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4001  case MVT::i32: Opc = AArch64::LSLVWr; break;
4002  case MVT::i64: Opc = AArch64::LSLVXr; break;
4003  }
4004 
4005  const TargetRegisterClass *RC =
4006  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4007  if (NeedTrunc)
4008  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4009 
4010  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4011  if (NeedTrunc)
4012  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4013  return ResultReg;
4014 }
4015 
4016 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4017  uint64_t Shift, bool IsZExt) {
4018  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4019  "Unexpected source/return type pair.");
4020  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4021  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4022  "Unexpected source value type.");
4023  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4024  RetVT == MVT::i64) && "Unexpected return value type.");
4025 
4026  bool Is64Bit = (RetVT == MVT::i64);
4027  unsigned RegSize = Is64Bit ? 64 : 32;
4028  unsigned DstBits = RetVT.getSizeInBits();
4029  unsigned SrcBits = SrcVT.getSizeInBits();
4030  const TargetRegisterClass *RC =
4031  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4032 
4033  // Just emit a copy for "zero" shifts.
4034  if (Shift == 0) {
4035  if (RetVT == SrcVT) {
4036  unsigned ResultReg = createResultReg(RC);
4037  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4038  TII.get(TargetOpcode::COPY), ResultReg)
4039  .addReg(Op0);
4040  return ResultReg;
4041  } else
4042  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4043  }
4044 
4045  // Don't deal with undefined shifts.
4046  if (Shift >= DstBits)
4047  return 0;
4048 
4049  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4050  // {S|U}BFM Wd, Wn, #r, #s
4051  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4052 
4053  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4054  // %2 = shl i16 %1, 4
4055  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4056  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4057  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4058  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4059 
4060  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4061  // %2 = shl i16 %1, 8
4062  // Wd<32+7-24,32-24> = Wn<7:0>
4063  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4064  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4065  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4066 
4067  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4068  // %2 = shl i16 %1, 12
4069  // Wd<32+3-20,32-20> = Wn<3:0>
4070  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4071  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4072  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4073 
4074  unsigned ImmR = RegSize - Shift;
4075  // Limit the width to the length of the source type.
4076  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4077  static const unsigned OpcTable[2][2] = {
4078  {AArch64::SBFMWri, AArch64::SBFMXri},
4079  {AArch64::UBFMWri, AArch64::UBFMXri}
4080  };
4081  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4082  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4083  Register TmpReg = MRI.createVirtualRegister(RC);
4084  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4085  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4086  .addImm(0)
4087  .addReg(Op0)
4088  .addImm(AArch64::sub_32);
4089  Op0 = TmpReg;
4090  }
4091  return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4092 }
4093 
4094 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4095  unsigned Op1Reg) {
4096  unsigned Opc = 0;
4097  bool NeedTrunc = false;
4098  uint64_t Mask = 0;
4099  switch (RetVT.SimpleTy) {
4100  default: return 0;
4101  case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4102  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4103  case MVT::i32: Opc = AArch64::LSRVWr; break;
4104  case MVT::i64: Opc = AArch64::LSRVXr; break;
4105  }
4106 
4107  const TargetRegisterClass *RC =
4108  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4109  if (NeedTrunc) {
4110  Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4111  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4112  }
4113  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4114  if (NeedTrunc)
4115  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4116  return ResultReg;
4117 }
4118 
4119 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4120  uint64_t Shift, bool IsZExt) {
4121  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4122  "Unexpected source/return type pair.");
4123  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4124  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4125  "Unexpected source value type.");
4126  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4127  RetVT == MVT::i64) && "Unexpected return value type.");
4128 
4129  bool Is64Bit = (RetVT == MVT::i64);
4130  unsigned RegSize = Is64Bit ? 64 : 32;
4131  unsigned DstBits = RetVT.getSizeInBits();
4132  unsigned SrcBits = SrcVT.getSizeInBits();
4133  const TargetRegisterClass *RC =
4134  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4135 
4136  // Just emit a copy for "zero" shifts.
4137  if (Shift == 0) {
4138  if (RetVT == SrcVT) {
4139  unsigned ResultReg = createResultReg(RC);
4140  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4141  TII.get(TargetOpcode::COPY), ResultReg)
4142  .addReg(Op0);
4143  return ResultReg;
4144  } else
4145  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4146  }
4147 
4148  // Don't deal with undefined shifts.
4149  if (Shift >= DstBits)
4150  return 0;
4151 
4152  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4153  // {S|U}BFM Wd, Wn, #r, #s
4154  // Wd<s-r:0> = Wn<s:r> when r <= s
4155 
4156  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4157  // %2 = lshr i16 %1, 4
4158  // Wd<7-4:0> = Wn<7:4>
4159  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4160  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4161  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4162 
4163  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4164  // %2 = lshr i16 %1, 8
4165  // Wd<7-7,0> = Wn<7:7>
4166  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4167  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4168  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4169 
4170  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4171  // %2 = lshr i16 %1, 12
4172  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4173  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4174  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4175  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4176 
4177  if (Shift >= SrcBits && IsZExt)
4178  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4179 
4180  // It is not possible to fold a sign-extend into the LShr instruction. In this
4181  // case emit a sign-extend.
4182  if (!IsZExt) {
4183  Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4184  if (!Op0)
4185  return 0;
4186  SrcVT = RetVT;
4187  SrcBits = SrcVT.getSizeInBits();
4188  IsZExt = true;
4189  }
4190 
4191  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4192  unsigned ImmS = SrcBits - 1;
4193  static const unsigned OpcTable[2][2] = {
4194  {AArch64::SBFMWri, AArch64::SBFMXri},
4195  {AArch64::UBFMWri, AArch64::UBFMXri}
4196  };
4197  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4198  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4199  Register TmpReg = MRI.createVirtualRegister(RC);
4200  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4201  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4202  .addImm(0)
4203  .addReg(Op0)
4204  .addImm(AArch64::sub_32);
4205  Op0 = TmpReg;
4206  }
4207  return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4208 }
4209 
4210 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4211  unsigned Op1Reg) {
4212  unsigned Opc = 0;
4213  bool NeedTrunc = false;
4214  uint64_t Mask = 0;
4215  switch (RetVT.SimpleTy) {
4216  default: return 0;
4217  case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4218  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4219  case MVT::i32: Opc = AArch64::ASRVWr; break;
4220  case MVT::i64: Opc = AArch64::ASRVXr; break;
4221  }
4222 
4223  const TargetRegisterClass *RC =
4224  (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4225  if (NeedTrunc) {
4226  Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4227  Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4228  }
4229  unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4230  if (NeedTrunc)
4231  ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4232  return ResultReg;
4233 }
4234 
4235 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4236  uint64_t Shift, bool IsZExt) {
4237  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4238  "Unexpected source/return type pair.");
4239  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4240  SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4241  "Unexpected source value type.");
4242  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4243  RetVT == MVT::i64) && "Unexpected return value type.");
4244 
4245  bool Is64Bit = (RetVT == MVT::i64);
4246  unsigned RegSize = Is64Bit ? 64 : 32;
4247  unsigned DstBits = RetVT.getSizeInBits();
4248  unsigned SrcBits = SrcVT.getSizeInBits();
4249  const TargetRegisterClass *RC =
4250  Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4251 
4252  // Just emit a copy for "zero" shifts.
4253  if (Shift == 0) {
4254  if (RetVT == SrcVT) {
4255  unsigned ResultReg = createResultReg(RC);
4256  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4257  TII.get(TargetOpcode::COPY), ResultReg)
4258  .addReg(Op0);
4259  return ResultReg;
4260  } else
4261  return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4262  }
4263 
4264  // Don't deal with undefined shifts.
4265  if (Shift >= DstBits)
4266  return 0;
4267 
4268  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4269  // {S|U}BFM Wd, Wn, #r, #s
4270  // Wd<s-r:0> = Wn<s:r> when r <= s
4271 
4272  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4273  // %2 = ashr i16 %1, 4
4274  // Wd<7-4:0> = Wn<7:4>
4275  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4276  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4277  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4278 
4279  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4280  // %2 = ashr i16 %1, 8
4281  // Wd<7-7,0> = Wn<7:7>
4282  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4283  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4284  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4285 
4286  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4287  // %2 = ashr i16 %1, 12
4288  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4289  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4290  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4291  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4292 
4293  if (Shift >= SrcBits && IsZExt)
4294  return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4295 
4296  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4297  unsigned ImmS = SrcBits - 1;
4298  static const unsigned OpcTable[2][2] = {
4299  {AArch64::SBFMWri, AArch64::SBFMXri},
4300  {AArch64::UBFMWri, AArch64::UBFMXri}
4301  };
4302  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4303  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4304  Register TmpReg = MRI.createVirtualRegister(RC);
4305  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4306  TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4307  .addImm(0)
4308  .addReg(Op0)
4309  .addImm(AArch64::sub_32);
4310  Op0 = TmpReg;
4311  }
4312  return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4313 }
4314 
4315 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4316  bool IsZExt) {
4317  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4318 
4319  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4320  // DestVT are odd things, so test to make sure that they are both types we can
4321  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4322  // bail out to SelectionDAG.
4323  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4324  (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4325  ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4326  (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4327  return 0;
4328 
4329  unsigned Opc;
4330  unsigned Imm = 0;
4331 
4332  switch (SrcVT.SimpleTy) {
4333  default:
4334  return 0;
4335  case MVT::i1:
4336  return emiti1Ext(SrcReg, DestVT, IsZExt);
4337  case MVT::i8:
4338  if (DestVT == MVT::i64)
4339  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4340  else
4341  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4342  Imm = 7;
4343  break;
4344  case MVT::i16:
4345  if (DestVT == MVT::i64)
4346  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4347  else
4348  Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4349  Imm = 15;
4350  break;
4351  case MVT::i32:
4352  assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4353  Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4354  Imm = 31;
4355  break;
4356  }
4357 
4358  // Handle i8 and i16 as i32.
4359  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4360  DestVT = MVT::i32;
4361  else if (DestVT == MVT::i64) {
4362  Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4363  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4364  TII.get(AArch64::SUBREG_TO_REG), Src64)
4365  .addImm(0)
4366  .addReg(SrcReg)
4367  .addImm(AArch64::sub_32);
4368  SrcReg = Src64;
4369  }
4370 
4371  const TargetRegisterClass *RC =
4372  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4373  return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4374 }
4375 
4376 static bool isZExtLoad(const MachineInstr *LI) {
4377  switch (LI->getOpcode()) {
4378  default:
4379  return false;
4380  case AArch64::LDURBBi:
4381  case AArch64::LDURHHi:
4382  case AArch64::LDURWi:
4383  case AArch64::LDRBBui:
4384  case AArch64::LDRHHui:
4385  case AArch64::LDRWui:
4386  case AArch64::LDRBBroX:
4387  case AArch64::LDRHHroX:
4388  case AArch64::LDRWroX:
4389  case AArch64::LDRBBroW:
4390  case AArch64::LDRHHroW:
4391  case AArch64::LDRWroW:
4392  return true;
4393  }
4394 }
4395 
4396 static bool isSExtLoad(const MachineInstr *LI) {
4397  switch (LI->getOpcode()) {
4398  default:
4399  return false;
4400  case AArch64::LDURSBWi:
4401  case AArch64::LDURSHWi:
4402  case AArch64::LDURSBXi:
4403  case AArch64::LDURSHXi:
4404  case AArch64::LDURSWi:
4405  case AArch64::LDRSBWui:
4406  case AArch64::LDRSHWui:
4407  case AArch64::LDRSBXui:
4408  case AArch64::LDRSHXui:
4409  case AArch64::LDRSWui:
4410  case AArch64::LDRSBWroX:
4411  case AArch64::LDRSHWroX:
4412  case AArch64::LDRSBXroX:
4413  case AArch64::LDRSHXroX:
4414  case AArch64::LDRSWroX:
4415  case AArch64::LDRSBWroW:
4416  case AArch64::LDRSHWroW:
4417  case AArch64::LDRSBXroW:
4418  case AArch64::LDRSHXroW:
4419  case AArch64::LDRSWroW:
4420  return true;
4421  }
4422 }
4423 
4424 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4425  MVT SrcVT) {
4426  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4427  if (!LI || !LI->hasOneUse())
4428  return false;
4429 
4430  // Check if the load instruction has already been selected.
4431  unsigned Reg = lookUpRegForValue(LI);
4432  if (!Reg)
4433  return false;
4434 
4436  if (!MI)
4437  return false;
4438 
4439  // Check if the correct load instruction has been emitted - SelectionDAG might
4440  // have emitted a zero-extending load, but we need a sign-extending load.
4441  bool IsZExt = isa<ZExtInst>(I);
4442  const auto *LoadMI = MI;
4443  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4444  LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4445  Register LoadReg = MI->getOperand(1).getReg();
4446  LoadMI = MRI.getUniqueVRegDef(LoadReg);
4447  assert(LoadMI && "Expected valid instruction");
4448  }
4449  if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4450  return false;
4451 
4452  // Nothing to be done.
4453  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4454  updateValueMap(I, Reg);
4455  return true;
4456  }
4457 
4458  if (IsZExt) {
4459  unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4460  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4461  TII.get(AArch64::SUBREG_TO_REG), Reg64)
4462  .addImm(0)
4463  .addReg(Reg, getKillRegState(true))
4464  .addImm(AArch64::sub_32);
4465  Reg = Reg64;
4466  } else {
4467  assert((MI->getOpcode() == TargetOpcode::COPY &&
4468  MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4469  "Expected copy instruction");
4470  Reg = MI->getOperand(1).getReg();
4472  removeDeadCode(I, std::next(I));
4473  }
4474  updateValueMap(I, Reg);
4475  return true;
4476 }
4477 
4478 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4479  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4480  "Unexpected integer extend instruction.");
4481  MVT RetVT;
4482  MVT SrcVT;
4483  if (!isTypeSupported(I->getType(), RetVT))
4484  return false;
4485 
4486  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4487  return false;
4488 
4489  // Try to optimize already sign-/zero-extended values from load instructions.
4490  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4491  return true;
4492 
4493  unsigned SrcReg = getRegForValue(I->getOperand(0));
4494  if (!SrcReg)
4495  return false;
4496 
4497  // Try to optimize already sign-/zero-extended values from function arguments.
4498  bool IsZExt = isa<ZExtInst>(I);
4499  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4500  if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4501  if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4502  unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4503  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4504  TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4505  .addImm(0)
4506  .addReg(SrcReg)
4507  .addImm(AArch64::sub_32);
4508  SrcReg = ResultReg;
4509  }
4510 
4511  updateValueMap(I, SrcReg);
4512  return true;
4513  }
4514  }
4515 
4516  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4517  if (!ResultReg)
4518  return false;
4519 
4520  updateValueMap(I, ResultReg);
4521  return true;
4522 }
4523 
4524 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4525  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4526  if (!DestEVT.isSimple())
4527  return false;
4528 
4529  MVT DestVT = DestEVT.getSimpleVT();
4530  if (DestVT != MVT::i64 && DestVT != MVT::i32)
4531  return false;
4532 
4533  unsigned DivOpc;
4534  bool Is64bit = (DestVT == MVT::i64);
4535  switch (ISDOpcode) {
4536  default:
4537  return false;
4538  case ISD::SREM:
4539  DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4540  break;
4541  case ISD::UREM:
4542  DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4543  break;
4544  }
4545  unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4546  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4547  if (!Src0Reg)
4548  return false;
4549 
4550  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4551  if (!Src1Reg)
4552  return false;
4553 
4554  const TargetRegisterClass *RC =
4555  (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4556  unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4557  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4558  // The remainder is computed as numerator - (quotient * denominator) using the
4559  // MSUB instruction.
4560  unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4561  updateValueMap(I, ResultReg);
4562  return true;
4563 }
4564 
4565 bool AArch64FastISel::selectMul(const Instruction *I) {
4566  MVT VT;
4567  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4568  return false;
4569 
4570  if (VT.isVector())
4571  return selectBinaryOp(I, ISD::MUL);
4572 
4573  const Value *Src0 = I->getOperand(0);
4574  const Value *Src1 = I->getOperand(1);
4575  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4576  if (C->getValue().isPowerOf2())
4577  std::swap(Src0, Src1);
4578 
4579  // Try to simplify to a shift instruction.
4580  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4581  if (C->getValue().isPowerOf2()) {
4582  uint64_t ShiftVal = C->getValue().logBase2();
4583  MVT SrcVT = VT;
4584  bool IsZExt = true;
4585  if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4586  if (!isIntExtFree(ZExt)) {
4587  MVT VT;
4588  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4589  SrcVT = VT;
4590  IsZExt = true;
4591  Src0 = ZExt->getOperand(0);
4592  }
4593  }
4594  } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4595  if (!isIntExtFree(SExt)) {
4596  MVT VT;
4597  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4598  SrcVT = VT;
4599  IsZExt = false;
4600  Src0 = SExt->getOperand(0);
4601  }
4602  }
4603  }
4604 
4605  unsigned Src0Reg = getRegForValue(Src0);
4606  if (!Src0Reg)
4607  return false;
4608 
4609  unsigned ResultReg =
4610  emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4611 
4612  if (ResultReg) {
4613  updateValueMap(I, ResultReg);
4614  return true;
4615  }
4616  }
4617 
4618  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4619  if (!Src0Reg)
4620  return false;
4621 
4622  unsigned Src1Reg = getRegForValue(I->getOperand(1));
4623  if (!Src1Reg)
4624  return false;
4625 
4626  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4627 
4628  if (!ResultReg)
4629  return false;
4630 
4631  updateValueMap(I, ResultReg);
4632  return true;
4633 }
4634 
4635 bool AArch64FastISel::selectShift(const Instruction *I) {
4636  MVT RetVT;
4637  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4638  return false;
4639 
4640  if (RetVT.isVector())
4641  return selectOperator(I, I->getOpcode());
4642 
4643  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4644  unsigned ResultReg = 0;
4645  uint64_t ShiftVal = C->getZExtValue();
4646  MVT SrcVT = RetVT;
4647  bool IsZExt = I->getOpcode() != Instruction::AShr;
4648  const Value *Op0 = I->getOperand(0);
4649  if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4650  if (!isIntExtFree(ZExt)) {
4651  MVT TmpVT;
4652  if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4653  SrcVT = TmpVT;
4654  IsZExt = true;
4655  Op0 = ZExt->getOperand(0);
4656  }
4657  }
4658  } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4659  if (!isIntExtFree(SExt)) {
4660  MVT TmpVT;
4661  if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4662  SrcVT = TmpVT;
4663  IsZExt = false;
4664  Op0 = SExt->getOperand(0);
4665  }
4666  }
4667  }
4668 
4669  unsigned Op0Reg = getRegForValue(Op0);
4670  if (!Op0Reg)
4671  return false;
4672 
4673  switch (I->getOpcode()) {
4674  default: llvm_unreachable("Unexpected instruction.");
4675  case Instruction::Shl:
4676  ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4677  break;
4678  case Instruction::AShr:
4679  ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4680  break;
4681  case Instruction::LShr:
4682  ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4683  break;
4684  }
4685  if (!ResultReg)
4686  return false;
4687 
4688  updateValueMap(I, ResultReg);
4689  return true;
4690  }
4691 
4692  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4693  if (!Op0Reg)
4694  return false;
4695 
4696  unsigned Op1Reg = getRegForValue(I->getOperand(1));
4697  if (!Op1Reg)
4698  return false;
4699 
4700  unsigned ResultReg = 0;
4701  switch (I->getOpcode()) {
4702  default: llvm_unreachable("Unexpected instruction.");
4703  case Instruction::Shl:
4704  ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4705  break;
4706  case Instruction::AShr:
4707  ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4708  break;
4709  case Instruction::LShr:
4710  ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4711  break;
4712  }
4713 
4714  if (!ResultReg)
4715  return false;
4716 
4717  updateValueMap(I, ResultReg);
4718  return true;
4719 }
4720 
4721 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4722  MVT RetVT, SrcVT;
4723 
4724  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4725  return false;
4726  if (!isTypeLegal(I->getType(), RetVT))
4727  return false;
4728 
4729  unsigned Opc;
4730  if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4731  Opc = AArch64::FMOVWSr;
4732  else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4733  Opc = AArch64::FMOVXDr;
4734  else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4735  Opc = AArch64::FMOVSWr;
4736  else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4737  Opc = AArch64::FMOVDXr;
4738  else
4739  return false;
4740 
4741  const TargetRegisterClass *RC = nullptr;
4742  switch (RetVT.SimpleTy) {
4743  default: llvm_unreachable("Unexpected value type.");
4744  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4745  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4746  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4747  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4748  }
4749  unsigned Op0Reg = getRegForValue(I->getOperand(0));
4750  if (!Op0Reg)
4751  return false;
4752 
4753  unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4754  if (!ResultReg)
4755  return false;
4756 
4757  updateValueMap(I, ResultReg);
4758  return true;
4759 }
4760 
4761 bool AArch64FastISel::selectFRem(const Instruction *I) {
4762  MVT RetVT;
4763  if (!isTypeLegal(I->getType(), RetVT))
4764  return false;
4765 
4766  RTLIB::Libcall LC;
4767  switch (RetVT.SimpleTy) {
4768  default:
4769  return false;
4770  case MVT::f32:
4771  LC = RTLIB::REM_F32;
4772  break;
4773  case MVT::f64:
4774  LC = RTLIB::REM_F64;
4775  break;
4776  }
4777 
4778  ArgListTy Args;
4779  Args.reserve(I->getNumOperands());
4780 
4781  // Populate the argument list.
4782  for (auto &Arg : I->operands()) {
4783  ArgListEntry Entry;
4784  Entry.Val = Arg;
4785  Entry.Ty = Arg->getType();
4786  Args.push_back(Entry);
4787  }
4788 
4789  CallLoweringInfo CLI;
4790  MCContext &Ctx = MF->getContext();
4791  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4792  TLI.getLibcallName(LC), std::move(Args));
4793  if (!lowerCallTo(CLI))
4794  return false;
4795  updateValueMap(I, CLI.ResultReg);
4796  return true;
4797 }
4798 
4799 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4800  MVT VT;
4801  if (!isTypeLegal(I->getType(), VT))
4802  return false;
4803 
4804  if (!isa<ConstantInt>(I->getOperand(1)))
4805  return selectBinaryOp(I, ISD::SDIV);
4806 
4807  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4808  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4809  !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4810  return selectBinaryOp(I, ISD::SDIV);
4811 
4812  unsigned Lg2 = C.countTrailingZeros();
4813  unsigned Src0Reg = getRegForValue(I->getOperand(0));
4814  if (!Src0Reg)
4815  return false;
4816 
4817  if (cast<BinaryOperator>(I)->isExact()) {
4818  unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4819  if (!ResultReg)
4820  return false;
4821  updateValueMap(I, ResultReg);
4822  return true;
4823  }
4824 
4825  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4826  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4827  if (!AddReg)
4828  return false;
4829 
4830  // (Src0 < 0) ? Pow2 - 1 : 0;
4831  if (!emitICmp_ri(VT, Src0Reg, 0))
4832  return false;
4833 
4834  unsigned SelectOpc;
4835  const TargetRegisterClass *RC;
4836  if (VT == MVT::i64) {
4837  SelectOpc = AArch64::CSELXr;
4838  RC = &AArch64::GPR64RegClass;
4839  } else {
4840  SelectOpc = AArch64::CSELWr;
4841  RC = &AArch64::GPR32RegClass;
4842  }
4843  unsigned SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4844  AArch64CC::LT);
4845  if (!SelectReg)
4846  return false;
4847 
4848  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4849  // negate the result.
4850  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4851  unsigned ResultReg;
4852  if (C.isNegative())
4853  ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4854  AArch64_AM::ASR, Lg2);
4855  else
4856  ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4857 
4858  if (!ResultReg)
4859  return false;
4860 
4861  updateValueMap(I, ResultReg);
4862  return true;
4863 }
4864 
4865 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4866 /// have to duplicate it for AArch64, because otherwise we would fail during the
4867 /// sign-extend emission.
4868 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4869  unsigned IdxN = getRegForValue(Idx);
4870  if (IdxN == 0)
4871  // Unhandled operand. Halt "fast" selection and bail.
4872  return 0;
4873 
4874  // If the index is smaller or larger than intptr_t, truncate or extend it.
4875  MVT PtrVT = TLI.getPointerTy(DL);
4876  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4877  if (IdxVT.bitsLT(PtrVT)) {
4878  IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4879  } else if (IdxVT.bitsGT(PtrVT))
4880  llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4881  return IdxN;
4882 }
4883 
4884 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4885 /// duplicate it for AArch64, because otherwise we would bail out even for
4886 /// simple cases. This is because the standard fastEmit functions don't cover
4887 /// MUL at all and ADD is lowered very inefficientily.
4888 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4889  if (Subtarget->isTargetILP32())
4890  return false;
4891 
4892  unsigned N = getRegForValue(I->getOperand(0));
4893  if (!N)
4894  return false;
4895 
4896  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4897  // into a single N = N + TotalOffset.
4898  uint64_t TotalOffs = 0;
4899  MVT VT = TLI.getPointerTy(DL);
4901  GTI != E; ++GTI) {
4902  const Value *Idx = GTI.getOperand();
4903  if (auto *StTy = GTI.getStructTypeOrNull()) {
4904  unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4905  // N = N + Offset
4906  if (Field)
4907  TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4908  } else {
4909  Type *Ty = GTI.getIndexedType();
4910 
4911  // If this is a constant subscript, handle it quickly.
4912  if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4913  if (CI->isZero())
4914  continue;
4915  // N = N + Offset
4916  TotalOffs +=
4917  DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4918  continue;
4919  }
4920  if (TotalOffs) {
4921  N = emitAdd_ri_(VT, N, TotalOffs);
4922  if (!N)
4923  return false;
4924  TotalOffs = 0;
4925  }
4926 
4927  // N = N + Idx * ElementSize;
4928  uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4929  unsigned IdxN = getRegForGEPIndex(Idx);
4930  if (!IdxN)
4931  return false;
4932 
4933  if (ElementSize != 1) {
4934  unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4935  if (!C)
4936  return false;
4937  IdxN = emitMul_rr(VT, IdxN, C);
4938  if (!IdxN)
4939  return false;
4940  }
4941  N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
4942  if (!N)
4943  return false;
4944  }
4945  }
4946  if (TotalOffs) {
4947  N = emitAdd_ri_(VT, N, TotalOffs);
4948  if (!N)
4949  return false;
4950  }
4951  updateValueMap(I, N);
4952  return true;
4953 }
4954 
4955 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4956  assert(TM.getOptLevel() == CodeGenOpt::None &&
4957  "cmpxchg survived AtomicExpand at optlevel > -O0");
4958 
4959  auto *RetPairTy = cast<StructType>(I->getType());
4960  Type *RetTy = RetPairTy->getTypeAtIndex(0U);
4961  assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
4962  "cmpxchg has a non-i1 status result");
4963 
4964  MVT VT;
4965  if (!isTypeLegal(RetTy, VT))
4966  return false;
4967 
4968  const TargetRegisterClass *ResRC;
4969  unsigned Opc, CmpOpc;
4970  // This only supports i32/i64, because i8/i16 aren't legal, and the generic
4971  // extractvalue selection doesn't support that.
4972  if (VT == MVT::i32) {
4973  Opc = AArch64::CMP_SWAP_32;
4974  CmpOpc = AArch64::SUBSWrs;
4975  ResRC = &AArch64::GPR32RegClass;
4976  } else if (VT == MVT::i64) {
4977  Opc = AArch64::CMP_SWAP_64;
4978  CmpOpc = AArch64::SUBSXrs;
4979  ResRC = &AArch64::GPR64RegClass;
4980  } else {
4981  return false;
4982  }
4983 
4984  const MCInstrDesc &II = TII.get(Opc);
4985 
4986  const unsigned AddrReg = constrainOperandRegClass(
4987  II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
4988  const unsigned DesiredReg = constrainOperandRegClass(
4989  II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
4990  const unsigned NewReg = constrainOperandRegClass(
4991  II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
4992 
4993  const unsigned ResultReg1 = createResultReg(ResRC);
4994  const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
4995  const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
4996 
4997  // FIXME: MachineMemOperand doesn't support cmpxchg yet.
4998  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
4999  .addDef(ResultReg1)
5000  .addDef(ScratchReg)
5001  .addUse(AddrReg)
5002  .addUse(DesiredReg)
5003  .addUse(NewReg);
5004 
5005  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5006  .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5007  .addUse(ResultReg1)
5008  .addUse(DesiredReg)
5009  .addImm(0);
5010 
5011  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5012  .addDef(ResultReg2)
5013  .addUse(AArch64::WZR)
5014  .addUse(AArch64::WZR)
5016 
5017  assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5018  updateValueMap(I, ResultReg1, 2);
5019  return true;
5020 }
5021 
5022 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5023  switch (I->getOpcode()) {
5024  default:
5025  break;
5026  case Instruction::Add:
5027  case Instruction::Sub:
5028  return selectAddSub(I);
5029  case Instruction::Mul:
5030  return selectMul(I);
5031  case Instruction::SDiv:
5032  return selectSDiv(I);
5033  case Instruction::SRem:
5034  if (!selectBinaryOp(I, ISD::SREM))
5035  return selectRem(I, ISD::SREM);
5036  return true;
5037  case Instruction::URem:
5038  if (!selectBinaryOp(I, ISD::UREM))
5039  return selectRem(I, ISD::UREM);
5040  return true;
5041  case Instruction::Shl:
5042  case Instruction::LShr:
5043  case Instruction::AShr:
5044  return selectShift(I);
5045  case Instruction::And:
5046  case Instruction::Or:
5047  case Instruction::Xor:
5048  return selectLogicalOp(I);
5049  case Instruction::Br:
5050  return selectBranch(I);
5051  case Instruction::IndirectBr:
5052  return selectIndirectBr(I);
5053  case Instruction::BitCast:
5054  if (!FastISel::selectBitCast(I))
5055  return selectBitCast(I);
5056  return true;
5057  case Instruction::FPToSI:
5058  if (!selectCast(I, ISD::FP_TO_SINT))
5059  return selectFPToInt(I, /*Signed=*/true);
5060  return true;
5061  case Instruction::FPToUI:
5062  return selectFPToInt(I, /*Signed=*/false);
5063  case Instruction::ZExt:
5064  case Instruction::SExt:
5065  return selectIntExt(I);
5066  case Instruction::Trunc:
5067  if (!selectCast(I, ISD::TRUNCATE))
5068  return selectTrunc(I);
5069  return true;
5070  case Instruction::FPExt:
5071  return selectFPExt(I);
5072  case Instruction::FPTrunc:
5073  return selectFPTrunc(I);
5074  case Instruction::SIToFP:
5075  if (!selectCast(I, ISD::SINT_TO_FP))
5076  return selectIntToFP(I, /*Signed=*/true);
5077  return true;
5078  case Instruction::UIToFP:
5079  return selectIntToFP(I, /*Signed=*/false);
5080  case Instruction::Load:
5081  return selectLoad(I);
5082  case Instruction::Store:
5083  return selectStore(I);
5084  case Instruction::FCmp:
5085  case Instruction::ICmp:
5086  return selectCmp(I);
5087  case Instruction::Select:
5088  return selectSelect(I);
5089  case Instruction::Ret:
5090  return selectRet(I);
5091  case Instruction::FRem:
5092  return selectFRem(I);
5093  case Instruction::GetElementPtr:
5094  return selectGetElementPtr(I);
5095  case Instruction::AtomicCmpXchg:
5096  return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5097  }
5098 
5099  // fall-back to target-independent instruction selection.
5100  return selectOperator(I, I->getOpcode());
5101 }
5102 
5104  const TargetLibraryInfo *LibInfo) {
5105  return new AArch64FastISel(FuncInfo, LibInfo);
5106 }
llvm::FunctionLoweringInfo::Fn
const Function * Fn
Definition: FunctionLoweringInfo.h:54
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:243
llvm::CmpInst::FCMP_ULE
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:735
llvm::CCValAssign::ZExt
@ ZExt
Definition: CallingConvLower.h:38
ValueTypes.h
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
llvm::AArch64CC::Invalid
@ Invalid
Definition: AArch64BaseInfo.h:272
llvm::AArch64Subtarget::isTargetWindows
bool isTargetWindows() const
Definition: AArch64Subtarget.h:521
AArch64RegisterInfo.h
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:263
llvm::AArch64CC::LO
@ LO
Definition: AArch64BaseInfo.h:258
llvm::AArch64Subtarget::hasFPARMv8
bool hasFPARMv8() const
Definition: AArch64Subtarget.h:386
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4636
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1072
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm::AArch64CC::HI
@ HI
Definition: AArch64BaseInfo.h:263
MachineInstr.h
MathExtras.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
llvm::AArch64CC::AL
@ AL
Definition: AArch64BaseInfo.h:269
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:741
AArch64.h
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:2986
llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:256
llvm::CCValAssign::Full
@ Full
Definition: CallingConvLower.h:36
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:633
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::AArch64_AM::LSL
@ LSL
Definition: AArch64AddressingModes.h:35
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
optimizeSelect
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing optimizeSelect
Definition: README.txt:81
llvm::EVT::getVectorElementCount
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:323
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:217
AtomicOrdering.h
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:191
MCInstrDesc.h
llvm::AArch64CC::MI
@ MI
Definition: AArch64BaseInfo.h:259
RegSize
unsigned RegSize
Definition: AArch64MIPeepholeOpt.cpp:80
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:72
llvm::Function
Definition: Function.h:62
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:52
llvm::RegState::Implicit
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Definition: MachineInstrBuilder.h:46
GetElementPtrTypeIterator.h
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:137
llvm::AllocaInst::getType
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:104
llvm::MemIntrinsicBase::getDestAddressSpace
unsigned getDestAddressSpace() const
Definition: IntrinsicInst.h:654
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:366
llvm::CmpInst::FCMP_ONE
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:728
ErrorHandling.h
llvm::getBLRCallOpcode
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
Definition: AArch64InstrInfo.cpp:7567
llvm::AArch64RegisterInfo::isAnyArgRegReserved
bool isAnyArgRegReserved(const MachineFunction &MF) const
Definition: AArch64RegisterInfo.cpp:347
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:411
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:742
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:820
Registers
SI Pre allocate WWM Registers
Definition: SIPreAllocateWWMRegs.cpp:77
AArch64BaseInfo.h
llvm::AArch64Subtarget::isTargetDarwin
bool isTargetDarwin() const
Definition: AArch64Subtarget.h:518
MachineBasicBlock.h
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:785
APInt.h
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:747
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
Shift
bool Shift
Definition: README.txt:468
getCompareCC
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred)
Definition: AArch64FastISel.cpp:2193
llvm::ConstantFP::isZero
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:301
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:486
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::RTLIB::Libcall
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Definition: RuntimeLibcalls.h:30
DenseMap.h
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:128
llvm::AArch64RegisterInfo::getFrameRegister
Register getFrameRegister(const MachineFunction &MF) const override
Definition: AArch64RegisterInfo.cpp:421
llvm::CallingConv::GHC
@ GHC
Definition: CallingConv.h:51
llvm::CC_AArch64_GHC
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:297
llvm::CCValAssign::BCvt
@ BCvt
Definition: CallingConvLower.h:46
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
Operator.h
llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:750
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:139
llvm::CmpInst::FCMP_OGT
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:724
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::getOffset
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
Definition: RuntimeDyld.cpp:170
llvm::ARCISD::BL
@ BL
Definition: ARCISelLowering.h:34
F
#define F(x, y, z)
Definition: MD5.cpp:56
MachineRegisterInfo.h
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::gep_type_end
gep_type_iterator gep_type_end(const User *GEP)
Definition: GetElementPtrTypeIterator.h:146
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:130
MachineValueType.h
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:266
llvm::CmpInst::FCMP_ULT
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:734
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
Instruction.h
llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition: MachineInstrBuilder.h:116
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Type::isArrayTy
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:214
llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition: TargetRegisterInfo.h:93
llvm::CallingConv::WebKit_JS
@ WebKit_JS
Definition: CallingConv.h:58
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::MinAlign
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:672
llvm::codeview::EncodedFramePtrReg::FramePtr
@ FramePtr
llvm::CCValAssign::AExt
@ AExt
Definition: CallingConvLower.h:39
GlobalValue.h
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:33
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
llvm::AArch64CC::VC
@ VC
Definition: AArch64BaseInfo.h:262
Constants.h
llvm::ISD::Constant
@ Constant
Definition: ISDOpcodes.h:76
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:74
isZExtLoad
static bool isZExtLoad(const MachineInstr *LI)
Definition: AArch64FastISel.cpp:4376
FunctionLoweringInfo.h
llvm::AArch64_AM::isLogicalImmediate
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
Definition: AArch64AddressingModes.h:276
llvm::AArch64Subtarget::isLittleEndian
bool isLittleEndian() const
Definition: AArch64Subtarget.h:516
llvm::CC_AArch64_DarwinPCS
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:150
llvm::User
Definition: User.h:44
llvm::AArch64Subtarget::isTargetILP32
bool isTargetILP32() const
Definition: AArch64Subtarget.h:529
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:746
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:56
InstrTypes.h
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1383
llvm::CC_AArch64_WebKit_JS
bool CC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
MCSymbol.h
llvm::CmpInst::FCMP_UGE
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:733
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:735
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:321
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:226
llvm::Mips::GPRIdx
@ GPRIdx
Definition: MipsRegisterBankInfo.cpp:44
llvm::AArch64CC::HS
@ HS
Definition: AArch64BaseInfo.h:257
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MVT::isVoid
@ isVoid
Definition: MachineValueType.h:264
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::AArch64_AM::SXTB
@ SXTB
Definition: AArch64AddressingModes.h:46
llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:730
llvm::AArch64_AM::LSR
@ LSR
Definition: AArch64AddressingModes.h:36
llvm::Instruction
Definition: Instruction.h:45
llvm::MCID::Flag
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:146
llvm::RetCC_AArch64_AAPCS
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::CC_AArch64_AAPCS
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::AArch64_AM::getShifterImm
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==.