LLVM  10.0.0svn
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
21 #include "llvm/ADT/Optional.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/Support/Debug.h"
37 
38 #define DEBUG_TYPE "aarch64-isel"
39 
40 using namespace llvm;
41 
42 namespace {
43 
44 #define GET_GLOBALISEL_PREDICATE_BITSET
45 #include "AArch64GenGlobalISel.inc"
46 #undef GET_GLOBALISEL_PREDICATE_BITSET
47 
48 class AArch64InstructionSelector : public InstructionSelector {
49 public:
50  AArch64InstructionSelector(const AArch64TargetMachine &TM,
51  const AArch64Subtarget &STI,
52  const AArch64RegisterBankInfo &RBI);
53 
54  bool select(MachineInstr &I) override;
55  static const char *getName() { return DEBUG_TYPE; }
56 
57  void setupMF(MachineFunction &MF, GISelKnownBits &KB,
58  CodeGenCoverage &CoverageInfo) override {
59  InstructionSelector::setupMF(MF, KB, CoverageInfo);
60 
61  // hasFnAttribute() is expensive to call on every BRCOND selection, so
62  // cache it here for each run of the selector.
63  ProduceNonFlagSettingCondBr =
64  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
65  }
66 
67 private:
68  /// tblgen-erated 'select' implementation, used as the initial selector for
69  /// the patterns that don't require complex C++.
70  bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
71 
72  // A lowering phase that runs before any selection attempts.
73 
74  void preISelLower(MachineInstr &I) const;
75 
76  // An early selection function that runs before the selectImpl() call.
77  bool earlySelect(MachineInstr &I) const;
78 
79  bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
80 
81  /// Eliminate same-sized cross-bank copies into stores before selectImpl().
82  void contractCrossBankCopyIntoStore(MachineInstr &I,
83  MachineRegisterInfo &MRI) const;
84 
85  bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
86  MachineRegisterInfo &MRI) const;
87  bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
88  MachineRegisterInfo &MRI) const;
89 
90  bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
91  MachineRegisterInfo &MRI) const;
92 
93  bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
94  bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
95 
96  // Helper to generate an equivalent of scalar_to_vector into a new register,
97  // returned via 'Dst'.
98  MachineInstr *emitScalarToVector(unsigned EltSize,
99  const TargetRegisterClass *DstRC,
101  MachineIRBuilder &MIRBuilder) const;
102 
103  /// Emit a lane insert into \p DstReg, or a new vector register if None is
104  /// provided.
105  ///
106  /// The lane inserted into is defined by \p LaneIdx. The vector source
107  /// register is given by \p SrcReg. The register containing the element is
108  /// given by \p EltReg.
109  MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
110  Register EltReg, unsigned LaneIdx,
111  const RegisterBank &RB,
112  MachineIRBuilder &MIRBuilder) const;
113  bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
114  bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
117 
118  bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
119  bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
120  bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
121  bool selectSplitVectorUnmerge(MachineInstr &I,
122  MachineRegisterInfo &MRI) const;
123  bool selectIntrinsicWithSideEffects(MachineInstr &I,
124  MachineRegisterInfo &MRI) const;
125  bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
126  bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
127  bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
128  bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
129  bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
130  bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
131  bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
132 
133  unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
134  MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
135  MachineIRBuilder &MIRBuilder) const;
136 
137  // Emit a vector concat operation.
138  MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
139  Register Op2,
140  MachineIRBuilder &MIRBuilder) const;
141  MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
143  MachineIRBuilder &MIRBuilder) const;
144  MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
145  MachineIRBuilder &MIRBuilder) const;
146  MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
147  MachineIRBuilder &MIRBuilder) const;
148  MachineInstr *emitTST(const Register &LHS, const Register &RHS,
149  MachineIRBuilder &MIRBuilder) const;
150  MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
151  const RegisterBank &DstRB, LLT ScalarTy,
152  Register VecReg, unsigned LaneIdx,
153  MachineIRBuilder &MIRBuilder) const;
154 
155  /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
156  /// materialized using a FMOV instruction, then update MI and return it.
157  /// Otherwise, do nothing and return a nullptr.
158  MachineInstr *emitFMovForFConstant(MachineInstr &MI,
159  MachineRegisterInfo &MRI) const;
160 
161  /// Emit a CSet for a compare.
162  MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
163  MachineIRBuilder &MIRBuilder) const;
164 
165  // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
166  // We use these manually instead of using the importer since it doesn't
167  // support SDNodeXForm.
168  ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
169  ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
170  ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
171  ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
172 
173  ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
174  ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
175  ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
176 
177  ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
178  unsigned Size) const;
179 
180  ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
181  return selectAddrModeUnscaled(Root, 1);
182  }
183  ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
184  return selectAddrModeUnscaled(Root, 2);
185  }
186  ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
187  return selectAddrModeUnscaled(Root, 4);
188  }
189  ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
190  return selectAddrModeUnscaled(Root, 8);
191  }
192  ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
193  return selectAddrModeUnscaled(Root, 16);
194  }
195 
196  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
197  unsigned Size) const;
198  template <int Width>
199  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
200  return selectAddrModeIndexed(Root, Width / 8);
201  }
202 
203  bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
204  const MachineRegisterInfo &MRI) const;
205  ComplexRendererFns
206  selectAddrModeShiftedExtendXReg(MachineOperand &Root,
207  unsigned SizeInBytes) const;
208  ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
209  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
210  unsigned SizeInBytes) const;
211  template <int Width>
212  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
213  return selectAddrModeXRO(Root, Width / 8);
214  }
215 
216  ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
217 
218  ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
219  return selectShiftedRegister(Root);
220  }
221 
222  ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
223  // TODO: selectShiftedRegister should allow for rotates on logical shifts.
224  // For now, make them the same. The only difference between the two is that
225  // logical shifts are allowed to fold in rotates. Otherwise, these are
226  // functionally the same.
227  return selectShiftedRegister(Root);
228  }
229 
230  /// Instructions that accept extend modifiers like UXTW expect the register
231  /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
232  /// subregister copy if necessary. Return either ExtReg, or the result of the
233  /// new copy.
234  Register narrowExtendRegIfNeeded(Register ExtReg,
235  MachineIRBuilder &MIB) const;
236  ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
237 
238  void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
239  void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I) const;
240  void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I) const;
241 
242  // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
243  void materializeLargeCMVal(MachineInstr &I, const Value *V,
244  unsigned OpFlags) const;
245 
246  // Optimization methods.
247  bool tryOptVectorShuffle(MachineInstr &I) const;
248  bool tryOptVectorDup(MachineInstr &MI) const;
249  bool tryOptSelect(MachineInstr &MI) const;
250  MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
251  MachineOperand &Predicate,
252  MachineIRBuilder &MIRBuilder) const;
253 
254  /// Return true if \p MI is a load or store of \p NumBytes bytes.
255  bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
256 
257  /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
258  /// register zeroed out. In other words, the result of MI has been explicitly
259  /// zero extended.
260  bool isDef32(const MachineInstr &MI) const;
261 
262  const AArch64TargetMachine &TM;
263  const AArch64Subtarget &STI;
264  const AArch64InstrInfo &TII;
265  const AArch64RegisterInfo &TRI;
266  const AArch64RegisterBankInfo &RBI;
267 
268  bool ProduceNonFlagSettingCondBr = false;
269 
270 #define GET_GLOBALISEL_PREDICATES_DECL
271 #include "AArch64GenGlobalISel.inc"
272 #undef GET_GLOBALISEL_PREDICATES_DECL
273 
274 // We declare the temporaries used by selectImpl() in the class to minimize the
275 // cost of constructing placeholder values.
276 #define GET_GLOBALISEL_TEMPORARIES_DECL
277 #include "AArch64GenGlobalISel.inc"
278 #undef GET_GLOBALISEL_TEMPORARIES_DECL
279 };
280 
281 } // end anonymous namespace
282 
283 #define GET_GLOBALISEL_IMPL
284 #include "AArch64GenGlobalISel.inc"
285 #undef GET_GLOBALISEL_IMPL
286 
287 AArch64InstructionSelector::AArch64InstructionSelector(
288  const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
289  const AArch64RegisterBankInfo &RBI)
290  : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
291  TRI(*STI.getRegisterInfo()), RBI(RBI),
293 #include "AArch64GenGlobalISel.inc"
296 #include "AArch64GenGlobalISel.inc"
298 {
299 }
300 
301 // FIXME: This should be target-independent, inferred from the types declared
302 // for each class in the bank.
303 static const TargetRegisterClass *
304 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
305  const RegisterBankInfo &RBI,
306  bool GetAllRegSet = false) {
307  if (RB.getID() == AArch64::GPRRegBankID) {
308  if (Ty.getSizeInBits() <= 32)
309  return GetAllRegSet ? &AArch64::GPR32allRegClass
310  : &AArch64::GPR32RegClass;
311  if (Ty.getSizeInBits() == 64)
312  return GetAllRegSet ? &AArch64::GPR64allRegClass
313  : &AArch64::GPR64RegClass;
314  return nullptr;
315  }
316 
317  if (RB.getID() == AArch64::FPRRegBankID) {
318  if (Ty.getSizeInBits() <= 16)
319  return &AArch64::FPR16RegClass;
320  if (Ty.getSizeInBits() == 32)
321  return &AArch64::FPR32RegClass;
322  if (Ty.getSizeInBits() == 64)
323  return &AArch64::FPR64RegClass;
324  if (Ty.getSizeInBits() == 128)
325  return &AArch64::FPR128RegClass;
326  return nullptr;
327  }
328 
329  return nullptr;
330 }
331 
332 /// Given a register bank, and size in bits, return the smallest register class
333 /// that can represent that combination.
334 static const TargetRegisterClass *
335 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
336  bool GetAllRegSet = false) {
337  unsigned RegBankID = RB.getID();
338 
339  if (RegBankID == AArch64::GPRRegBankID) {
340  if (SizeInBits <= 32)
341  return GetAllRegSet ? &AArch64::GPR32allRegClass
342  : &AArch64::GPR32RegClass;
343  if (SizeInBits == 64)
344  return GetAllRegSet ? &AArch64::GPR64allRegClass
345  : &AArch64::GPR64RegClass;
346  }
347 
348  if (RegBankID == AArch64::FPRRegBankID) {
349  switch (SizeInBits) {
350  default:
351  return nullptr;
352  case 8:
353  return &AArch64::FPR8RegClass;
354  case 16:
355  return &AArch64::FPR16RegClass;
356  case 32:
357  return &AArch64::FPR32RegClass;
358  case 64:
359  return &AArch64::FPR64RegClass;
360  case 128:
361  return &AArch64::FPR128RegClass;
362  }
363  }
364 
365  return nullptr;
366 }
367 
368 /// Returns the correct subregister to use for a given register class.
370  const TargetRegisterInfo &TRI, unsigned &SubReg) {
371  switch (TRI.getRegSizeInBits(*RC)) {
372  case 8:
373  SubReg = AArch64::bsub;
374  break;
375  case 16:
376  SubReg = AArch64::hsub;
377  break;
378  case 32:
379  if (RC != &AArch64::FPR32RegClass)
380  SubReg = AArch64::sub_32;
381  else
382  SubReg = AArch64::ssub;
383  break;
384  case 64:
385  SubReg = AArch64::dsub;
386  break;
387  default:
388  LLVM_DEBUG(
389  dbgs() << "Couldn't find appropriate subregister for register class.");
390  return false;
391  }
392 
393  return true;
394 }
395 
396 /// Check whether \p I is a currently unsupported binary operation:
397 /// - it has an unsized type
398 /// - an operand is not a vreg
399 /// - all operands are not in the same bank
400 /// These are checks that should someday live in the verifier, but right now,
401 /// these are mostly limitations of the aarch64 selector.
402 static bool unsupportedBinOp(const MachineInstr &I,
403  const AArch64RegisterBankInfo &RBI,
404  const MachineRegisterInfo &MRI,
405  const AArch64RegisterInfo &TRI) {
406  LLT Ty = MRI.getType(I.getOperand(0).getReg());
407  if (!Ty.isValid()) {
408  LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
409  return true;
410  }
411 
412  const RegisterBank *PrevOpBank = nullptr;
413  for (auto &MO : I.operands()) {
414  // FIXME: Support non-register operands.
415  if (!MO.isReg()) {
416  LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
417  return true;
418  }
419 
420  // FIXME: Can generic operations have physical registers operands? If
421  // so, this will need to be taught about that, and we'll need to get the
422  // bank out of the minimal class for the register.
423  // Either way, this needs to be documented (and possibly verified).
424  if (!Register::isVirtualRegister(MO.getReg())) {
425  LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
426  return true;
427  }
428 
429  const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
430  if (!OpBank) {
431  LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
432  return true;
433  }
434 
435  if (PrevOpBank && OpBank != PrevOpBank) {
436  LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
437  return true;
438  }
439  PrevOpBank = OpBank;
440  }
441  return false;
442 }
443 
444 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
445 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
446 /// and of size \p OpSize.
447 /// \returns \p GenericOpc if the combination is unsupported.
448 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
449  unsigned OpSize) {
450  switch (RegBankID) {
451  case AArch64::GPRRegBankID:
452  if (OpSize == 32) {
453  switch (GenericOpc) {
454  case TargetOpcode::G_SHL:
455  return AArch64::LSLVWr;
456  case TargetOpcode::G_LSHR:
457  return AArch64::LSRVWr;
458  case TargetOpcode::G_ASHR:
459  return AArch64::ASRVWr;
460  default:
461  return GenericOpc;
462  }
463  } else if (OpSize == 64) {
464  switch (GenericOpc) {
465  case TargetOpcode::G_GEP:
466  return AArch64::ADDXrr;
467  case TargetOpcode::G_SHL:
468  return AArch64::LSLVXr;
469  case TargetOpcode::G_LSHR:
470  return AArch64::LSRVXr;
471  case TargetOpcode::G_ASHR:
472  return AArch64::ASRVXr;
473  default:
474  return GenericOpc;
475  }
476  }
477  break;
478  case AArch64::FPRRegBankID:
479  switch (OpSize) {
480  case 32:
481  switch (GenericOpc) {
482  case TargetOpcode::G_FADD:
483  return AArch64::FADDSrr;
484  case TargetOpcode::G_FSUB:
485  return AArch64::FSUBSrr;
486  case TargetOpcode::G_FMUL:
487  return AArch64::FMULSrr;
488  case TargetOpcode::G_FDIV:
489  return AArch64::FDIVSrr;
490  default:
491  return GenericOpc;
492  }
493  case 64:
494  switch (GenericOpc) {
495  case TargetOpcode::G_FADD:
496  return AArch64::FADDDrr;
497  case TargetOpcode::G_FSUB:
498  return AArch64::FSUBDrr;
499  case TargetOpcode::G_FMUL:
500  return AArch64::FMULDrr;
501  case TargetOpcode::G_FDIV:
502  return AArch64::FDIVDrr;
503  case TargetOpcode::G_OR:
504  return AArch64::ORRv8i8;
505  default:
506  return GenericOpc;
507  }
508  }
509  break;
510  }
511  return GenericOpc;
512 }
513 
514 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
515 /// appropriate for the (value) register bank \p RegBankID and of memory access
516 /// size \p OpSize. This returns the variant with the base+unsigned-immediate
517 /// addressing mode (e.g., LDRXui).
518 /// \returns \p GenericOpc if the combination is unsupported.
519 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
520  unsigned OpSize) {
521  const bool isStore = GenericOpc == TargetOpcode::G_STORE;
522  switch (RegBankID) {
523  case AArch64::GPRRegBankID:
524  switch (OpSize) {
525  case 8:
526  return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
527  case 16:
528  return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
529  case 32:
530  return isStore ? AArch64::STRWui : AArch64::LDRWui;
531  case 64:
532  return isStore ? AArch64::STRXui : AArch64::LDRXui;
533  }
534  break;
535  case AArch64::FPRRegBankID:
536  switch (OpSize) {
537  case 8:
538  return isStore ? AArch64::STRBui : AArch64::LDRBui;
539  case 16:
540  return isStore ? AArch64::STRHui : AArch64::LDRHui;
541  case 32:
542  return isStore ? AArch64::STRSui : AArch64::LDRSui;
543  case 64:
544  return isStore ? AArch64::STRDui : AArch64::LDRDui;
545  }
546  break;
547  }
548  return GenericOpc;
549 }
550 
551 #ifndef NDEBUG
552 /// Helper function that verifies that we have a valid copy at the end of
553 /// selectCopy. Verifies that the source and dest have the expected sizes and
554 /// then returns true.
555 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
556  const MachineRegisterInfo &MRI,
557  const TargetRegisterInfo &TRI,
558  const RegisterBankInfo &RBI) {
559  const Register DstReg = I.getOperand(0).getReg();
560  const Register SrcReg = I.getOperand(1).getReg();
561  const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
562  const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
563 
564  // Make sure the size of the source and dest line up.
565  assert(
566  (DstSize == SrcSize ||
567  // Copies are a mean to setup initial types, the number of
568  // bits may not exactly match.
569  (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
570  // Copies are a mean to copy bits around, as long as we are
571  // on the same register class, that's fine. Otherwise, that
572  // means we need some SUBREG_TO_REG or AND & co.
573  (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
574  "Copy with different width?!");
575 
576  // Check the size of the destination.
577  assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
578  "GPRs cannot get more than 64-bit width values");
579 
580  return true;
581 }
582 #endif
583 
584 /// Helper function for selectCopy. Inserts a subregister copy from
585 /// \p *From to \p *To, linking it up to \p I.
586 ///
587 /// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
588 ///
589 /// CopyReg (From class) = COPY SrcReg
590 /// SubRegCopy (To class) = COPY CopyReg:SubReg
591 /// Dst = COPY SubRegCopy
593  const RegisterBankInfo &RBI, Register SrcReg,
594  const TargetRegisterClass *From,
595  const TargetRegisterClass *To,
596  unsigned SubReg) {
597  MachineIRBuilder MIB(I);
598  auto Copy = MIB.buildCopy({From}, {SrcReg});
599  auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
600  .addReg(Copy.getReg(0), 0, SubReg);
601  MachineOperand &RegOp = I.getOperand(1);
602  RegOp.setReg(SubRegCopy.getReg(0));
603 
604  // It's possible that the destination register won't be constrained. Make
605  // sure that happens.
606  if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
607  RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
608 
609  return true;
610 }
611 
612 /// Helper function to get the source and destination register classes for a
613 /// copy. Returns a std::pair containing the source register class for the
614 /// copy, and the destination register class for the copy. If a register class
615 /// cannot be determined, then it will be nullptr.
616 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
619  const RegisterBankInfo &RBI) {
620  Register DstReg = I.getOperand(0).getReg();
621  Register SrcReg = I.getOperand(1).getReg();
622  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
623  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
624  unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
625  unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
626 
627  // Special casing for cross-bank copies of s1s. We can technically represent
628  // a 1-bit value with any size of register. The minimum size for a GPR is 32
629  // bits. So, we need to put the FPR on 32 bits as well.
630  //
631  // FIXME: I'm not sure if this case holds true outside of copies. If it does,
632  // then we can pull it into the helpers that get the appropriate class for a
633  // register bank. Or make a new helper that carries along some constraint
634  // information.
635  if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
636  SrcSize = DstSize = 32;
637 
638  return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
639  getMinClassForRegBank(DstRegBank, DstSize, true)};
640 }
641 
644  const RegisterBankInfo &RBI) {
645 
646  Register DstReg = I.getOperand(0).getReg();
647  Register SrcReg = I.getOperand(1).getReg();
648  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
649  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
650 
651  // Find the correct register classes for the source and destination registers.
652  const TargetRegisterClass *SrcRC;
653  const TargetRegisterClass *DstRC;
654  std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
655 
656  if (!DstRC) {
657  LLVM_DEBUG(dbgs() << "Unexpected dest size "
658  << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
659  return false;
660  }
661 
662  // A couple helpers below, for making sure that the copy we produce is valid.
663 
664  // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
665  // to verify that the src and dst are the same size, since that's handled by
666  // the SUBREG_TO_REG.
667  bool KnownValid = false;
668 
669  // Returns true, or asserts if something we don't expect happens. Instead of
670  // returning true, we return isValidCopy() to ensure that we verify the
671  // result.
672  auto CheckCopy = [&]() {
673  // If we have a bitcast or something, we can't have physical registers.
674  assert((I.isCopy() ||
675  (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
676  !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
677  "No phys reg on generic operator!");
678  assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
679  (void)KnownValid;
680  return true;
681  };
682 
683  // Is this a copy? If so, then we may need to insert a subregister copy, or
684  // a SUBREG_TO_REG.
685  if (I.isCopy()) {
686  // Yes. Check if there's anything to fix up.
687  if (!SrcRC) {
688  LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
689  return false;
690  }
691 
692  unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
693  unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
694 
695  // If we're doing a cross-bank copy on different-sized registers, we need
696  // to do a bit more work.
697  if (SrcSize > DstSize) {
698  // We're doing a cross-bank copy into a smaller register. We need a
699  // subregister copy. First, get a register class that's on the same bank
700  // as the destination, but the same size as the source.
701  const TargetRegisterClass *SubregRC =
702  getMinClassForRegBank(DstRegBank, SrcSize, true);
703  assert(SubregRC && "Didn't get a register class for subreg?");
704 
705  // Get the appropriate subregister for the destination.
706  unsigned SubReg = 0;
707  if (!getSubRegForClass(DstRC, TRI, SubReg)) {
708  LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
709  return false;
710  }
711 
712  // Now, insert a subregister copy using the new register class.
713  selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
714  return CheckCopy();
715  }
716 
717  // Is this a cross-bank copy?
718  if (DstRegBank.getID() != SrcRegBank.getID()) {
719  if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
720  SrcSize == 16) {
721  // Special case for FPR16 to GPR32.
722  // FIXME: This can probably be generalized like the above case.
723  Register PromoteReg =
724  MRI.createVirtualRegister(&AArch64::FPR32RegClass);
725  BuildMI(*I.getParent(), I, I.getDebugLoc(),
726  TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
727  .addImm(0)
728  .addUse(SrcReg)
729  .addImm(AArch64::hsub);
730  MachineOperand &RegOp = I.getOperand(1);
731  RegOp.setReg(PromoteReg);
732 
733  // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
734  KnownValid = true;
735  }
736  }
737 
738  // If the destination is a physical register, then there's nothing to
739  // change, so we're done.
740  if (Register::isPhysicalRegister(DstReg))
741  return CheckCopy();
742  }
743 
744  // No need to constrain SrcReg. It will get constrained when we hit another
745  // of its use or its defs. Copies do not have constraints.
746  if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
747  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
748  << " operand\n");
749  return false;
750  }
751  I.setDesc(TII.get(AArch64::COPY));
752  return CheckCopy();
753 }
754 
755 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
756  if (!DstTy.isScalar() || !SrcTy.isScalar())
757  return GenericOpc;
758 
759  const unsigned DstSize = DstTy.getSizeInBits();
760  const unsigned SrcSize = SrcTy.getSizeInBits();
761 
762  switch (DstSize) {
763  case 32:
764  switch (SrcSize) {
765  case 32:
766  switch (GenericOpc) {
767  case TargetOpcode::G_SITOFP:
768  return AArch64::SCVTFUWSri;
769  case TargetOpcode::G_UITOFP:
770  return AArch64::UCVTFUWSri;
771  case TargetOpcode::G_FPTOSI:
772  return AArch64::FCVTZSUWSr;
773  case TargetOpcode::G_FPTOUI:
774  return AArch64::FCVTZUUWSr;
775  default:
776  return GenericOpc;
777  }
778  case 64:
779  switch (GenericOpc) {
780  case TargetOpcode::G_SITOFP:
781  return AArch64::SCVTFUXSri;
782  case TargetOpcode::G_UITOFP:
783  return AArch64::UCVTFUXSri;
784  case TargetOpcode::G_FPTOSI:
785  return AArch64::FCVTZSUWDr;
786  case TargetOpcode::G_FPTOUI:
787  return AArch64::FCVTZUUWDr;
788  default:
789  return GenericOpc;
790  }
791  default:
792  return GenericOpc;
793  }
794  case 64:
795  switch (SrcSize) {
796  case 32:
797  switch (GenericOpc) {
798  case TargetOpcode::G_SITOFP:
799  return AArch64::SCVTFUWDri;
800  case TargetOpcode::G_UITOFP:
801  return AArch64::UCVTFUWDri;
802  case TargetOpcode::G_FPTOSI:
803  return AArch64::FCVTZSUXSr;
804  case TargetOpcode::G_FPTOUI:
805  return AArch64::FCVTZUUXSr;
806  default:
807  return GenericOpc;
808  }
809  case 64:
810  switch (GenericOpc) {
811  case TargetOpcode::G_SITOFP:
812  return AArch64::SCVTFUXDri;
813  case TargetOpcode::G_UITOFP:
814  return AArch64::UCVTFUXDri;
815  case TargetOpcode::G_FPTOSI:
816  return AArch64::FCVTZSUXDr;
817  case TargetOpcode::G_FPTOUI:
818  return AArch64::FCVTZUUXDr;
819  default:
820  return GenericOpc;
821  }
822  default:
823  return GenericOpc;
824  }
825  default:
826  return GenericOpc;
827  };
828  return GenericOpc;
829 }
830 
832  const RegisterBankInfo &RBI) {
834  bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
835  AArch64::GPRRegBankID);
836  LLT Ty = MRI.getType(I.getOperand(0).getReg());
837  if (Ty == LLT::scalar(32))
838  return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
839  else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
840  return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
841  return 0;
842 }
843 
844 /// Helper function to select the opcode for a G_FCMP.
846  // If this is a compare against +0.0, then we don't have to explicitly
847  // materialize a constant.
848  const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
849  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
850  unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
851  if (OpSize != 32 && OpSize != 64)
852  return 0;
853  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
854  {AArch64::FCMPSri, AArch64::FCMPDri}};
855  return CmpOpcTbl[ShouldUseImm][OpSize == 64];
856 }
857 
858 /// Returns true if \p P is an unsigned integer comparison predicate.
860  switch (P) {
861  default:
862  return false;
863  case CmpInst::ICMP_UGT:
864  case CmpInst::ICMP_UGE:
865  case CmpInst::ICMP_ULT:
866  case CmpInst::ICMP_ULE:
867  return true;
868  }
869 }
870 
872  switch (P) {
873  default:
874  llvm_unreachable("Unknown condition code!");
875  case CmpInst::ICMP_NE:
876  return AArch64CC::NE;
877  case CmpInst::ICMP_EQ:
878  return AArch64CC::EQ;
879  case CmpInst::ICMP_SGT:
880  return AArch64CC::GT;
881  case CmpInst::ICMP_SGE:
882  return AArch64CC::GE;
883  case CmpInst::ICMP_SLT:
884  return AArch64CC::LT;
885  case CmpInst::ICMP_SLE:
886  return AArch64CC::LE;
887  case CmpInst::ICMP_UGT:
888  return AArch64CC::HI;
889  case CmpInst::ICMP_UGE:
890  return AArch64CC::HS;
891  case CmpInst::ICMP_ULT:
892  return AArch64CC::LO;
893  case CmpInst::ICMP_ULE:
894  return AArch64CC::LS;
895  }
896 }
897 
900  AArch64CC::CondCode &CondCode2) {
901  CondCode2 = AArch64CC::AL;
902  switch (P) {
903  default:
904  llvm_unreachable("Unknown FP condition!");
905  case CmpInst::FCMP_OEQ:
906  CondCode = AArch64CC::EQ;
907  break;
908  case CmpInst::FCMP_OGT:
909  CondCode = AArch64CC::GT;
910  break;
911  case CmpInst::FCMP_OGE:
912  CondCode = AArch64CC::GE;
913  break;
914  case CmpInst::FCMP_OLT:
915  CondCode = AArch64CC::MI;
916  break;
917  case CmpInst::FCMP_OLE:
918  CondCode = AArch64CC::LS;
919  break;
920  case CmpInst::FCMP_ONE:
921  CondCode = AArch64CC::MI;
922  CondCode2 = AArch64CC::GT;
923  break;
924  case CmpInst::FCMP_ORD:
925  CondCode = AArch64CC::VC;
926  break;
927  case CmpInst::FCMP_UNO:
928  CondCode = AArch64CC::VS;
929  break;
930  case CmpInst::FCMP_UEQ:
931  CondCode = AArch64CC::EQ;
932  CondCode2 = AArch64CC::VS;
933  break;
934  case CmpInst::FCMP_UGT:
935  CondCode = AArch64CC::HI;
936  break;
937  case CmpInst::FCMP_UGE:
938  CondCode = AArch64CC::PL;
939  break;
940  case CmpInst::FCMP_ULT:
941  CondCode = AArch64CC::LT;
942  break;
943  case CmpInst::FCMP_ULE:
944  CondCode = AArch64CC::LE;
945  break;
946  case CmpInst::FCMP_UNE:
947  CondCode = AArch64CC::NE;
948  break;
949  }
950 }
951 
952 bool AArch64InstructionSelector::selectCompareBranch(
954 
955  const Register CondReg = I.getOperand(0).getReg();
956  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
957  MachineInstr *CCMI = MRI.getVRegDef(CondReg);
958  if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
959  CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
960  if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
961  return false;
962 
963  Register LHS = CCMI->getOperand(2).getReg();
964  Register RHS = CCMI->getOperand(3).getReg();
965  auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
966  if (!VRegAndVal)
967  std::swap(RHS, LHS);
968 
969  VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
970  if (!VRegAndVal || VRegAndVal->Value != 0) {
971  MachineIRBuilder MIB(I);
972  // If we can't select a CBZ then emit a cmp + Bcc.
973  if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
974  CCMI->getOperand(1), MIB))
975  return false;
978  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
979  I.eraseFromParent();
980  return true;
981  }
982 
983  const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
984  if (RB.getID() != AArch64::GPRRegBankID)
985  return false;
986 
987  const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
988  if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
989  return false;
990 
991  const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
992  unsigned CBOpc = 0;
993  if (CmpWidth <= 32)
994  CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
995  else if (CmpWidth == 64)
996  CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
997  else
998  return false;
999 
1000  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
1001  .addUse(LHS)
1002  .addMBB(DestMBB)
1003  .constrainAllUses(TII, TRI, RBI);
1004 
1005  I.eraseFromParent();
1006  return true;
1007 }
1008 
1009 bool AArch64InstructionSelector::selectVectorSHL(
1010  MachineInstr &I, MachineRegisterInfo &MRI) const {
1011  assert(I.getOpcode() == TargetOpcode::G_SHL);
1012  Register DstReg = I.getOperand(0).getReg();
1013  const LLT Ty = MRI.getType(DstReg);
1014  Register Src1Reg = I.getOperand(1).getReg();
1015  Register Src2Reg = I.getOperand(2).getReg();
1016 
1017  if (!Ty.isVector())
1018  return false;
1019 
1020  unsigned Opc = 0;
1021  if (Ty == LLT::vector(4, 32)) {
1022  Opc = AArch64::USHLv4i32;
1023  } else if (Ty == LLT::vector(2, 32)) {
1024  Opc = AArch64::USHLv2i32;
1025  } else {
1026  LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1027  return false;
1028  }
1029 
1030  MachineIRBuilder MIB(I);
1031  auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
1033  I.eraseFromParent();
1034  return true;
1035 }
1036 
1037 bool AArch64InstructionSelector::selectVectorASHR(
1038  MachineInstr &I, MachineRegisterInfo &MRI) const {
1039  assert(I.getOpcode() == TargetOpcode::G_ASHR);
1040  Register DstReg = I.getOperand(0).getReg();
1041  const LLT Ty = MRI.getType(DstReg);
1042  Register Src1Reg = I.getOperand(1).getReg();
1043  Register Src2Reg = I.getOperand(2).getReg();
1044 
1045  if (!Ty.isVector())
1046  return false;
1047 
1048  // There is not a shift right register instruction, but the shift left
1049  // register instruction takes a signed value, where negative numbers specify a
1050  // right shift.
1051 
1052  unsigned Opc = 0;
1053  unsigned NegOpc = 0;
1054  const TargetRegisterClass *RC = nullptr;
1055  if (Ty == LLT::vector(4, 32)) {
1056  Opc = AArch64::SSHLv4i32;
1057  NegOpc = AArch64::NEGv4i32;
1058  RC = &AArch64::FPR128RegClass;
1059  } else if (Ty == LLT::vector(2, 32)) {
1060  Opc = AArch64::SSHLv2i32;
1061  NegOpc = AArch64::NEGv2i32;
1062  RC = &AArch64::FPR64RegClass;
1063  } else {
1064  LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1065  return false;
1066  }
1067 
1068  MachineIRBuilder MIB(I);
1069  auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1071  auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1073  I.eraseFromParent();
1074  return true;
1075 }
1076 
1077 bool AArch64InstructionSelector::selectVaStartAAPCS(
1078  MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1079  return false;
1080 }
1081 
1082 bool AArch64InstructionSelector::selectVaStartDarwin(
1083  MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1085  Register ListReg = I.getOperand(0).getReg();
1086 
1087  Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1088 
1089  auto MIB =
1090  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1091  .addDef(ArgsAddrReg)
1092  .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1093  .addImm(0)
1094  .addImm(0);
1095 
1097 
1098  MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1099  .addUse(ArgsAddrReg)
1100  .addUse(ListReg)
1101  .addImm(0)
1103 
1105  I.eraseFromParent();
1106  return true;
1107 }
1108 
1109 void AArch64InstructionSelector::materializeLargeCMVal(
1110  MachineInstr &I, const Value *V, unsigned OpFlags) const {
1111  MachineBasicBlock &MBB = *I.getParent();
1112  MachineFunction &MF = *MBB.getParent();
1113  MachineRegisterInfo &MRI = MF.getRegInfo();
1114  MachineIRBuilder MIB(I);
1115 
1116  auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1117  MovZ->addOperand(MF, I.getOperand(1));
1118  MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1120  MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1122 
1123  auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1124  Register ForceDstReg) {
1125  Register DstReg = ForceDstReg
1126  ? ForceDstReg
1127  : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1128  auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1129  if (auto *GV = dyn_cast<GlobalValue>(V)) {
1130  MovI->addOperand(MF, MachineOperand::CreateGA(
1131  GV, MovZ->getOperand(1).getOffset(), Flags));
1132  } else {
1133  MovI->addOperand(
1134  MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1135  MovZ->getOperand(1).getOffset(), Flags));
1136  }
1137  MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1139  return DstReg;
1140  };
1141  Register DstReg = BuildMovK(MovZ.getReg(0),
1143  DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1144  BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1145  return;
1146 }
1147 
1148 void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
1149  MachineBasicBlock &MBB = *I.getParent();
1150  MachineFunction &MF = *MBB.getParent();
1151  MachineRegisterInfo &MRI = MF.getRegInfo();
1152 
1153  switch (I.getOpcode()) {
1154  case TargetOpcode::G_SHL:
1155  case TargetOpcode::G_ASHR:
1156  case TargetOpcode::G_LSHR: {
1157  // These shifts are legalized to have 64 bit shift amounts because we want
1158  // to take advantage of the existing imported selection patterns that assume
1159  // the immediates are s64s. However, if the shifted type is 32 bits and for
1160  // some reason we receive input GMIR that has an s64 shift amount that's not
1161  // a G_CONSTANT, insert a truncate so that we can still select the s32
1162  // register-register variant.
1163  Register SrcReg = I.getOperand(1).getReg();
1164  Register ShiftReg = I.getOperand(2).getReg();
1165  const LLT ShiftTy = MRI.getType(ShiftReg);
1166  const LLT SrcTy = MRI.getType(SrcReg);
1167  if (SrcTy.isVector())
1168  return;
1169  assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1170  if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1171  return;
1172  auto *AmtMI = MRI.getVRegDef(ShiftReg);
1173  assert(AmtMI && "could not find a vreg definition for shift amount");
1174  if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1175  // Insert a subregister copy to implement a 64->32 trunc
1176  MachineIRBuilder MIB(I);
1177  auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1178  .addReg(ShiftReg, 0, AArch64::sub_32);
1179  MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1180  I.getOperand(2).setReg(Trunc.getReg(0));
1181  }
1182  return;
1183  }
1184  case TargetOpcode::G_STORE:
1185  contractCrossBankCopyIntoStore(I, MRI);
1186  return;
1187  default:
1188  return;
1189  }
1190 }
1191 
1192 bool AArch64InstructionSelector::earlySelectSHL(
1193  MachineInstr &I, MachineRegisterInfo &MRI) const {
1194  // We try to match the immediate variant of LSL, which is actually an alias
1195  // for a special case of UBFM. Otherwise, we fall back to the imported
1196  // selector which will match the register variant.
1197  assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1198  const auto &MO = I.getOperand(2);
1199  auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1200  if (!VRegAndVal)
1201  return false;
1202 
1203  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1204  if (DstTy.isVector())
1205  return false;
1206  bool Is64Bit = DstTy.getSizeInBits() == 64;
1207  auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1208  auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1209  MachineIRBuilder MIB(I);
1210 
1211  if (!Imm1Fn || !Imm2Fn)
1212  return false;
1213 
1214  auto NewI =
1215  MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1216  {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1217 
1218  for (auto &RenderFn : *Imm1Fn)
1219  RenderFn(NewI);
1220  for (auto &RenderFn : *Imm2Fn)
1221  RenderFn(NewI);
1222 
1223  I.eraseFromParent();
1224  return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1225 }
1226 
1227 void AArch64InstructionSelector::contractCrossBankCopyIntoStore(
1228  MachineInstr &I, MachineRegisterInfo &MRI) const {
1229  assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
1230  // If we're storing a scalar, it doesn't matter what register bank that
1231  // scalar is on. All that matters is the size.
1232  //
1233  // So, if we see something like this (with a 32-bit scalar as an example):
1234  //
1235  // %x:gpr(s32) = ... something ...
1236  // %y:fpr(s32) = COPY %x:gpr(s32)
1237  // G_STORE %y:fpr(s32)
1238  //
1239  // We can fix this up into something like this:
1240  //
1241  // G_STORE %x:gpr(s32)
1242  //
1243  // And then continue the selection process normally.
1245  if (!Def)
1246  return;
1247  Register DefDstReg = Def->getOperand(0).getReg();
1248  LLT DefDstTy = MRI.getType(DefDstReg);
1249  Register StoreSrcReg = I.getOperand(0).getReg();
1250  LLT StoreSrcTy = MRI.getType(StoreSrcReg);
1251 
1252  // If we get something strange like a physical register, then we shouldn't
1253  // go any further.
1254  if (!DefDstTy.isValid())
1255  return;
1256 
1257  // Are the source and dst types the same size?
1258  if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
1259  return;
1260 
1261  if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
1262  RBI.getRegBank(DefDstReg, MRI, TRI))
1263  return;
1264 
1265  // We have a cross-bank copy, which is entering a store. Let's fold it.
1266  I.getOperand(0).setReg(DefDstReg);
1267 }
1268 
1269 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1270  assert(I.getParent() && "Instruction should be in a basic block!");
1271  assert(I.getParent()->getParent() && "Instruction should be in a function!");
1272 
1273  MachineBasicBlock &MBB = *I.getParent();
1274  MachineFunction &MF = *MBB.getParent();
1275  MachineRegisterInfo &MRI = MF.getRegInfo();
1276 
1277  switch (I.getOpcode()) {
1278  case TargetOpcode::G_SHL:
1279  return earlySelectSHL(I, MRI);
1280  case TargetOpcode::G_CONSTANT: {
1281  bool IsZero = false;
1282  if (I.getOperand(1).isCImm())
1283  IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
1284  else if (I.getOperand(1).isImm())
1285  IsZero = I.getOperand(1).getImm() == 0;
1286 
1287  if (!IsZero)
1288  return false;
1289 
1290  Register DefReg = I.getOperand(0).getReg();
1291  LLT Ty = MRI.getType(DefReg);
1292  if (Ty != LLT::scalar(64) && Ty != LLT::scalar(32))
1293  return false;
1294 
1295  if (Ty == LLT::scalar(64)) {
1296  I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
1297  RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
1298  } else {
1299  I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
1300  RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
1301  }
1302  I.setDesc(TII.get(TargetOpcode::COPY));
1303  return true;
1304  }
1305  default:
1306  return false;
1307  }
1308 }
1309 
1310 bool AArch64InstructionSelector::select(MachineInstr &I) {
1311  assert(I.getParent() && "Instruction should be in a basic block!");
1312  assert(I.getParent()->getParent() && "Instruction should be in a function!");
1313 
1314  MachineBasicBlock &MBB = *I.getParent();
1315  MachineFunction &MF = *MBB.getParent();
1316  MachineRegisterInfo &MRI = MF.getRegInfo();
1317 
1318  unsigned Opcode = I.getOpcode();
1319  // G_PHI requires same handling as PHI
1320  if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
1321  // Certain non-generic instructions also need some special handling.
1322 
1323  if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
1324  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1325 
1326  if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
1327  const Register DefReg = I.getOperand(0).getReg();
1328  const LLT DefTy = MRI.getType(DefReg);
1329 
1330  const RegClassOrRegBank &RegClassOrBank =
1331  MRI.getRegClassOrRegBank(DefReg);
1332 
1333  const TargetRegisterClass *DefRC
1334  = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1335  if (!DefRC) {
1336  if (!DefTy.isValid()) {
1337  LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1338  return false;
1339  }
1340  const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1341  DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1342  if (!DefRC) {
1343  LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1344  return false;
1345  }
1346  }
1347 
1348  I.setDesc(TII.get(TargetOpcode::PHI));
1349 
1350  return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1351  }
1352 
1353  if (I.isCopy())
1354  return selectCopy(I, TII, MRI, TRI, RBI);
1355 
1356  return true;
1357  }
1358 
1359 
1360  if (I.getNumOperands() != I.getNumExplicitOperands()) {
1361  LLVM_DEBUG(
1362  dbgs() << "Generic instruction has unexpected implicit operands\n");
1363  return false;
1364  }
1365 
1366  // Try to do some lowering before we start instruction selecting. These
1367  // lowerings are purely transformations on the input G_MIR and so selection
1368  // must continue after any modification of the instruction.
1369  preISelLower(I);
1370 
1371  // There may be patterns where the importer can't deal with them optimally,
1372  // but does select it to a suboptimal sequence so our custom C++ selection
1373  // code later never has a chance to work on it. Therefore, we have an early
1374  // selection attempt here to give priority to certain selection routines
1375  // over the imported ones.
1376  if (earlySelect(I))
1377  return true;
1378 
1379  if (selectImpl(I, *CoverageInfo))
1380  return true;
1381 
1382  LLT Ty =
1383  I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
1384 
1385  MachineIRBuilder MIB(I);
1386 
1387  switch (Opcode) {
1388  case TargetOpcode::G_BRCOND: {
1389  if (Ty.getSizeInBits() > 32) {
1390  // We shouldn't need this on AArch64, but it would be implemented as an
1391  // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1392  // bit being tested is < 32.
1393  LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1394  << ", expected at most 32-bits");
1395  return false;
1396  }
1397 
1398  const Register CondReg = I.getOperand(0).getReg();
1399  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1400 
1401  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1402  // instructions will not be produced, as they are conditional branch
1403  // instructions that do not set flags.
1404  bool ProduceNonFlagSettingCondBr =
1405  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1406  if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
1407  return true;
1408 
1409  if (ProduceNonFlagSettingCondBr) {
1410  auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1411  .addUse(CondReg)
1412  .addImm(/*bit offset=*/0)
1413  .addMBB(DestMBB);
1414 
1415  I.eraseFromParent();
1416  return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1417  } else {
1418  auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1419  .addDef(AArch64::WZR)
1420  .addUse(CondReg)
1421  .addImm(1);
1422  constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1423  auto Bcc =
1424  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1425  .addImm(AArch64CC::EQ)
1426  .addMBB(DestMBB);
1427 
1428  I.eraseFromParent();
1429  return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1430  }
1431  }
1432 
1433  case TargetOpcode::G_BRINDIRECT: {
1434  I.setDesc(TII.get(AArch64::BR));
1435  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1436  }
1437 
1438  case TargetOpcode::G_BRJT:
1439  return selectBrJT(I, MRI);
1440 
1441  case TargetOpcode::G_BSWAP: {
1442  // Handle vector types for G_BSWAP directly.
1443  Register DstReg = I.getOperand(0).getReg();
1444  LLT DstTy = MRI.getType(DstReg);
1445 
1446  // We should only get vector types here; everything else is handled by the
1447  // importer right now.
1448  if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1449  LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1450  return false;
1451  }
1452 
1453  // Only handle 4 and 2 element vectors for now.
1454  // TODO: 16-bit elements.
1455  unsigned NumElts = DstTy.getNumElements();
1456  if (NumElts != 4 && NumElts != 2) {
1457  LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1458  return false;
1459  }
1460 
1461  // Choose the correct opcode for the supported types. Right now, that's
1462  // v2s32, v4s32, and v2s64.
1463  unsigned Opc = 0;
1464  unsigned EltSize = DstTy.getElementType().getSizeInBits();
1465  if (EltSize == 32)
1466  Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1467  : AArch64::REV32v16i8;
1468  else if (EltSize == 64)
1469  Opc = AArch64::REV64v16i8;
1470 
1471  // We should always get something by the time we get here...
1472  assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1473 
1474  I.setDesc(TII.get(Opc));
1475  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1476  }
1477 
1478  case TargetOpcode::G_FCONSTANT:
1479  case TargetOpcode::G_CONSTANT: {
1480  const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1481 
1482  const LLT s8 = LLT::scalar(8);
1483  const LLT s16 = LLT::scalar(16);
1484  const LLT s32 = LLT::scalar(32);
1485  const LLT s64 = LLT::scalar(64);
1486  const LLT p0 = LLT::pointer(0, 64);
1487 
1488  const Register DefReg = I.getOperand(0).getReg();
1489  const LLT DefTy = MRI.getType(DefReg);
1490  const unsigned DefSize = DefTy.getSizeInBits();
1491  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1492 
1493  // FIXME: Redundant check, but even less readable when factored out.
1494  if (isFP) {
1495  if (Ty != s32 && Ty != s64) {
1496  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1497  << " constant, expected: " << s32 << " or " << s64
1498  << '\n');
1499  return false;
1500  }
1501 
1502  if (RB.getID() != AArch64::FPRRegBankID) {
1503  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1504  << " constant on bank: " << RB
1505  << ", expected: FPR\n");
1506  return false;
1507  }
1508 
1509  // The case when we have 0.0 is covered by tablegen. Reject it here so we
1510  // can be sure tablegen works correctly and isn't rescued by this code.
1511  if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1512  return false;
1513  } else {
1514  // s32 and s64 are covered by tablegen.
1515  if (Ty != p0 && Ty != s8 && Ty != s16) {
1516  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1517  << " constant, expected: " << s32 << ", " << s64
1518  << ", or " << p0 << '\n');
1519  return false;
1520  }
1521 
1522  if (RB.getID() != AArch64::GPRRegBankID) {
1523  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1524  << " constant on bank: " << RB
1525  << ", expected: GPR\n");
1526  return false;
1527  }
1528  }
1529 
1530  // We allow G_CONSTANT of types < 32b.
1531  const unsigned MovOpc =
1532  DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
1533 
1534  if (isFP) {
1535  // Either emit a FMOV, or emit a copy to emit a normal mov.
1536  const TargetRegisterClass &GPRRC =
1537  DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1538  const TargetRegisterClass &FPRRC =
1539  DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1540 
1541  // Can we use a FMOV instruction to represent the immediate?
1542  if (emitFMovForFConstant(I, MRI))
1543  return true;
1544 
1545  // Nope. Emit a copy and use a normal mov instead.
1546  const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1547  MachineOperand &RegOp = I.getOperand(0);
1548  RegOp.setReg(DefGPRReg);
1549  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1550  MIB.buildCopy({DefReg}, {DefGPRReg});
1551 
1552  if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
1553  LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
1554  return false;
1555  }
1556 
1557  MachineOperand &ImmOp = I.getOperand(1);
1558  // FIXME: Is going through int64_t always correct?
1559  ImmOp.ChangeToImmediate(
1561  } else if (I.getOperand(1).isCImm()) {
1562  uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1563  I.getOperand(1).ChangeToImmediate(Val);
1564  } else if (I.getOperand(1).isImm()) {
1565  uint64_t Val = I.getOperand(1).getImm();
1566  I.getOperand(1).ChangeToImmediate(Val);
1567  }
1568 
1569  I.setDesc(TII.get(MovOpc));
1571  return true;
1572  }
1573  case TargetOpcode::G_EXTRACT: {
1574  Register DstReg = I.getOperand(0).getReg();
1575  Register SrcReg = I.getOperand(1).getReg();
1576  LLT SrcTy = MRI.getType(SrcReg);
1577  LLT DstTy = MRI.getType(DstReg);
1578  (void)DstTy;
1579  unsigned SrcSize = SrcTy.getSizeInBits();
1580 
1581  if (SrcTy.getSizeInBits() > 64) {
1582  // This should be an extract of an s128, which is like a vector extract.
1583  if (SrcTy.getSizeInBits() != 128)
1584  return false;
1585  // Only support extracting 64 bits from an s128 at the moment.
1586  if (DstTy.getSizeInBits() != 64)
1587  return false;
1588 
1589  const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1590  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1591  // Check we have the right regbank always.
1592  assert(SrcRB.getID() == AArch64::FPRRegBankID &&
1593  DstRB.getID() == AArch64::FPRRegBankID &&
1594  "Wrong extract regbank!");
1595  (void)SrcRB;
1596 
1597  // Emit the same code as a vector extract.
1598  // Offset must be a multiple of 64.
1599  unsigned Offset = I.getOperand(2).getImm();
1600  if (Offset % 64 != 0)
1601  return false;
1602  unsigned LaneIdx = Offset / 64;
1603  MachineIRBuilder MIB(I);
1604  MachineInstr *Extract = emitExtractVectorElt(
1605  DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
1606  if (!Extract)
1607  return false;
1608  I.eraseFromParent();
1609  return true;
1610  }
1611 
1612  I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
1614  Ty.getSizeInBits() - 1);
1615 
1616  if (SrcSize < 64) {
1617  assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1618  "unexpected G_EXTRACT types");
1619  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1620  }
1621 
1622  DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1623  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1624  MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1625  .addReg(DstReg, 0, AArch64::sub_32);
1627  AArch64::GPR32RegClass, MRI);
1628  I.getOperand(0).setReg(DstReg);
1629 
1630  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1631  }
1632 
1633  case TargetOpcode::G_INSERT: {
1634  LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
1635  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1636  unsigned DstSize = DstTy.getSizeInBits();
1637  // Larger inserts are vectors, same-size ones should be something else by
1638  // now (split up or turned into COPYs).
1639  if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1640  return false;
1641 
1642  I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
1643  unsigned LSB = I.getOperand(3).getImm();
1644  unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
1645  I.getOperand(3).setImm((DstSize - LSB) % DstSize);
1646  MachineInstrBuilder(MF, I).addImm(Width - 1);
1647 
1648  if (DstSize < 64) {
1649  assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1650  "unexpected G_INSERT types");
1651  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1652  }
1653 
1654  Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1655  BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1656  TII.get(AArch64::SUBREG_TO_REG))
1657  .addDef(SrcReg)
1658  .addImm(0)
1659  .addUse(I.getOperand(2).getReg())
1660  .addImm(AArch64::sub_32);
1662  AArch64::GPR32RegClass, MRI);
1663  I.getOperand(2).setReg(SrcReg);
1664 
1665  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1666  }
1667  case TargetOpcode::G_FRAME_INDEX: {
1668  // allocas and G_FRAME_INDEX are only supported in addrspace(0).
1669  if (Ty != LLT::pointer(0, 64)) {
1670  LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1671  << ", expected: " << LLT::pointer(0, 64) << '\n');
1672  return false;
1673  }
1674  I.setDesc(TII.get(AArch64::ADDXri));
1675 
1676  // MOs for a #0 shifted immediate.
1677  I.addOperand(MachineOperand::CreateImm(0));
1678  I.addOperand(MachineOperand::CreateImm(0));
1679 
1680  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1681  }
1682 
1683  case TargetOpcode::G_GLOBAL_VALUE: {
1684  auto GV = I.getOperand(1).getGlobal();
1685  if (GV->isThreadLocal())
1686  return selectTLSGlobalValue(I, MRI);
1687 
1688  unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
1689  if (OpFlags & AArch64II::MO_GOT) {
1690  I.setDesc(TII.get(AArch64::LOADgot));
1691  I.getOperand(1).setTargetFlags(OpFlags);
1692  } else if (TM.getCodeModel() == CodeModel::Large) {
1693  // Materialize the global using movz/movk instructions.
1694  materializeLargeCMVal(I, GV, OpFlags);
1695  I.eraseFromParent();
1696  return true;
1697  } else if (TM.getCodeModel() == CodeModel::Tiny) {
1698  I.setDesc(TII.get(AArch64::ADR));
1699  I.getOperand(1).setTargetFlags(OpFlags);
1700  } else {
1701  I.setDesc(TII.get(AArch64::MOVaddr));
1703  MachineInstrBuilder MIB(MF, I);
1704  MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1706  }
1707  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1708  }
1709 
1710  case TargetOpcode::G_ZEXTLOAD:
1711  case TargetOpcode::G_LOAD:
1712  case TargetOpcode::G_STORE: {
1713  bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
1714  MachineIRBuilder MIB(I);
1715 
1716  LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
1717 
1718  if (PtrTy != LLT::pointer(0, 64)) {
1719  LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1720  << ", expected: " << LLT::pointer(0, 64) << '\n');
1721  return false;
1722  }
1723 
1724  auto &MemOp = **I.memoperands_begin();
1725  if (MemOp.isAtomic()) {
1726  // For now we just support s8 acquire loads to be able to compile stack
1727  // protector code.
1728  if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
1729  MemOp.getSize() == 1) {
1730  I.setDesc(TII.get(AArch64::LDARB));
1731  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1732  }
1733  LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
1734  return false;
1735  }
1736  unsigned MemSizeInBits = MemOp.getSize() * 8;
1737 
1738  const Register PtrReg = I.getOperand(1).getReg();
1739 #ifndef NDEBUG
1740  const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
1741  // Sanity-check the pointer register.
1742  assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1743  "Load/Store pointer operand isn't a GPR");
1744  assert(MRI.getType(PtrReg).isPointer() &&
1745  "Load/Store pointer operand isn't a pointer");
1746 #endif
1747 
1748  const Register ValReg = I.getOperand(0).getReg();
1749  const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1750 
1751  const unsigned NewOpc =
1752  selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
1753  if (NewOpc == I.getOpcode())
1754  return false;
1755 
1756  I.setDesc(TII.get(NewOpc));
1757 
1758  uint64_t Offset = 0;
1759  auto *PtrMI = MRI.getVRegDef(PtrReg);
1760 
1761  // Try to fold a GEP into our unsigned immediate addressing mode.
1762  if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1763  if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1764  int64_t Imm = *COff;
1765  const unsigned Size = MemSizeInBits / 8;
1766  const unsigned Scale = Log2_32(Size);
1767  if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1768  Register Ptr2Reg = PtrMI->getOperand(1).getReg();
1769  I.getOperand(1).setReg(Ptr2Reg);
1770  PtrMI = MRI.getVRegDef(Ptr2Reg);
1771  Offset = Imm / Size;
1772  }
1773  }
1774  }
1775 
1776  // If we haven't folded anything into our addressing mode yet, try to fold
1777  // a frame index into the base+offset.
1778  if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1779  I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1780 
1781  I.addOperand(MachineOperand::CreateImm(Offset));
1782 
1783  // If we're storing a 0, use WZR/XZR.
1784  if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1785  if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1786  if (I.getOpcode() == AArch64::STRWui)
1787  I.getOperand(0).setReg(AArch64::WZR);
1788  else if (I.getOpcode() == AArch64::STRXui)
1789  I.getOperand(0).setReg(AArch64::XZR);
1790  }
1791  }
1792 
1793  if (IsZExtLoad) {
1794  // The zextload from a smaller type to i32 should be handled by the importer.
1795  if (MRI.getType(ValReg).getSizeInBits() != 64)
1796  return false;
1797  // If we have a ZEXTLOAD then change the load's type to be a narrower reg
1798  //and zero_extend with SUBREG_TO_REG.
1799  Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1800  Register DstReg = I.getOperand(0).getReg();
1801  I.getOperand(0).setReg(LdReg);
1802 
1803  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1804  MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
1805  .addImm(0)
1806  .addUse(LdReg)
1807  .addImm(AArch64::sub_32);
1809  return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
1810  MRI);
1811  }
1812  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1813  }
1814 
1815  case TargetOpcode::G_SMULH:
1816  case TargetOpcode::G_UMULH: {
1817  // Reject the various things we don't support yet.
1818  if (unsupportedBinOp(I, RBI, MRI, TRI))
1819  return false;
1820 
1821  const Register DefReg = I.getOperand(0).getReg();
1822  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1823 
1824  if (RB.getID() != AArch64::GPRRegBankID) {
1825  LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
1826  return false;
1827  }
1828 
1829  if (Ty != LLT::scalar(64)) {
1830  LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1831  << ", expected: " << LLT::scalar(64) << '\n');
1832  return false;
1833  }
1834 
1835  unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1836  : AArch64::UMULHrr;
1837  I.setDesc(TII.get(NewOpc));
1838 
1839  // Now that we selected an opcode, we need to constrain the register
1840  // operands to use appropriate classes.
1841  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1842  }
1843  case TargetOpcode::G_FADD:
1844  case TargetOpcode::G_FSUB:
1845  case TargetOpcode::G_FMUL:
1846  case TargetOpcode::G_FDIV:
1847 
1848  case TargetOpcode::G_ASHR:
1849  if (MRI.getType(I.getOperand(0).getReg()).isVector())
1850  return selectVectorASHR(I, MRI);
1852  case TargetOpcode::G_SHL:
1853  if (Opcode == TargetOpcode::G_SHL &&
1854  MRI.getType(I.getOperand(0).getReg()).isVector())
1855  return selectVectorSHL(I, MRI);
1857  case TargetOpcode::G_OR:
1858  case TargetOpcode::G_LSHR: {
1859  // Reject the various things we don't support yet.
1860  if (unsupportedBinOp(I, RBI, MRI, TRI))
1861  return false;
1862 
1863  const unsigned OpSize = Ty.getSizeInBits();
1864 
1865  const Register DefReg = I.getOperand(0).getReg();
1866  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1867 
1868  const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1869  if (NewOpc == I.getOpcode())
1870  return false;
1871 
1872  I.setDesc(TII.get(NewOpc));
1873  // FIXME: Should the type be always reset in setDesc?
1874 
1875  // Now that we selected an opcode, we need to constrain the register
1876  // operands to use appropriate classes.
1877  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1878  }
1879 
1880  case TargetOpcode::G_GEP: {
1881  MachineIRBuilder MIRBuilder(I);
1882  emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
1883  MIRBuilder);
1884  I.eraseFromParent();
1885  return true;
1886  }
1887  case TargetOpcode::G_UADDO: {
1888  // TODO: Support other types.
1889  unsigned OpSize = Ty.getSizeInBits();
1890  if (OpSize != 32 && OpSize != 64) {
1891  LLVM_DEBUG(
1892  dbgs()
1893  << "G_UADDO currently only supported for 32 and 64 b types.\n");
1894  return false;
1895  }
1896 
1897  // TODO: Support vectors.
1898  if (Ty.isVector()) {
1899  LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1900  return false;
1901  }
1902 
1903  // Add and set the set condition flag.
1904  unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1905  MachineIRBuilder MIRBuilder(I);
1906  auto AddsMI = MIRBuilder.buildInstr(
1907  AddsOpc, {I.getOperand(0).getReg()},
1908  {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1909  constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1910 
1911  // Now, put the overflow result in the register given by the first operand
1912  // to the G_UADDO. CSINC increments the result when the predicate is false,
1913  // so to get the increment when it's true, we need to use the inverse. In
1914  // this case, we want to increment when carry is set.
1915  auto CsetMI = MIRBuilder
1916  .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1917  {Register(AArch64::WZR), Register(AArch64::WZR)})
1919  constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1920  I.eraseFromParent();
1921  return true;
1922  }
1923 
1924  case TargetOpcode::G_PTR_MASK: {
1925  uint64_t Align = I.getOperand(2).getImm();
1926  if (Align >= 64 || Align == 0)
1927  return false;
1928 
1929  uint64_t Mask = ~((1ULL << Align) - 1);
1930  I.setDesc(TII.get(AArch64::ANDXri));
1932 
1933  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1934  }
1935  case TargetOpcode::G_PTRTOINT:
1936  case TargetOpcode::G_TRUNC: {
1937  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1938  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1939 
1940  const Register DstReg = I.getOperand(0).getReg();
1941  const Register SrcReg = I.getOperand(1).getReg();
1942 
1943  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1944  const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1945 
1946  if (DstRB.getID() != SrcRB.getID()) {
1947  LLVM_DEBUG(
1948  dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
1949  return false;
1950  }
1951 
1952  if (DstRB.getID() == AArch64::GPRRegBankID) {
1953  const TargetRegisterClass *DstRC =
1954  getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1955  if (!DstRC)
1956  return false;
1957 
1958  const TargetRegisterClass *SrcRC =
1959  getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1960  if (!SrcRC)
1961  return false;
1962 
1963  if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1964  !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1965  LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
1966  return false;
1967  }
1968 
1969  if (DstRC == SrcRC) {
1970  // Nothing to be done
1971  } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1972  SrcTy == LLT::scalar(64)) {
1973  llvm_unreachable("TableGen can import this case");
1974  return false;
1975  } else if (DstRC == &AArch64::GPR32RegClass &&
1976  SrcRC == &AArch64::GPR64RegClass) {
1977  I.getOperand(1).setSubReg(AArch64::sub_32);
1978  } else {
1979  LLVM_DEBUG(
1980  dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
1981  return false;
1982  }
1983 
1984  I.setDesc(TII.get(TargetOpcode::COPY));
1985  return true;
1986  } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1987  if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1988  I.setDesc(TII.get(AArch64::XTNv4i16));
1990  return true;
1991  }
1992 
1993  if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
1994  MachineIRBuilder MIB(I);
1995  MachineInstr *Extract = emitExtractVectorElt(
1996  DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
1997  if (!Extract)
1998  return false;
1999  I.eraseFromParent();
2000  return true;
2001  }
2002  }
2003 
2004  return false;
2005  }
2006 
2007  case TargetOpcode::G_ANYEXT: {
2008  const Register DstReg = I.getOperand(0).getReg();
2009  const Register SrcReg = I.getOperand(1).getReg();
2010 
2011  const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2012  if (RBDst.getID() != AArch64::GPRRegBankID) {
2013  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
2014  << ", expected: GPR\n");
2015  return false;
2016  }
2017 
2018  const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2019  if (RBSrc.getID() != AArch64::GPRRegBankID) {
2020  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
2021  << ", expected: GPR\n");
2022  return false;
2023  }
2024 
2025  const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2026 
2027  if (DstSize == 0) {
2028  LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
2029  return false;
2030  }
2031 
2032  if (DstSize != 64 && DstSize > 32) {
2033  LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2034  << ", expected: 32 or 64\n");
2035  return false;
2036  }
2037  // At this point G_ANYEXT is just like a plain COPY, but we need
2038  // to explicitly form the 64-bit value if any.
2039  if (DstSize > 32) {
2040  Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2041  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2042  .addDef(ExtSrc)
2043  .addImm(0)
2044  .addUse(SrcReg)
2045  .addImm(AArch64::sub_32);
2046  I.getOperand(1).setReg(ExtSrc);
2047  }
2048  return selectCopy(I, TII, MRI, TRI, RBI);
2049  }
2050 
2051  case TargetOpcode::G_ZEXT:
2052  case TargetOpcode::G_SEXT: {
2053  unsigned Opcode = I.getOpcode();
2054  const bool IsSigned = Opcode == TargetOpcode::G_SEXT;
2055  const Register DefReg = I.getOperand(0).getReg();
2056  const Register SrcReg = I.getOperand(1).getReg();
2057  const LLT DstTy = MRI.getType(DefReg);
2058  const LLT SrcTy = MRI.getType(SrcReg);
2059  unsigned DstSize = DstTy.getSizeInBits();
2060  unsigned SrcSize = SrcTy.getSizeInBits();
2061 
2062  assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
2063  AArch64::GPRRegBankID &&
2064  "Unexpected ext regbank");
2065 
2066  MachineIRBuilder MIB(I);
2067  MachineInstr *ExtI;
2068  if (DstTy.isVector())
2069  return false; // Should be handled by imported patterns.
2070 
2071  // First check if we're extending the result of a load which has a dest type
2072  // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2073  // GPR register on AArch64 and all loads which are smaller automatically
2074  // zero-extend the upper bits. E.g.
2075  // %v(s8) = G_LOAD %p, :: (load 1)
2076  // %v2(s32) = G_ZEXT %v(s8)
2077  if (!IsSigned) {
2078  auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2079  if (LoadMI &&
2080  RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) {
2081  const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2082  unsigned BytesLoaded = MemOp->getSize();
2083  if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2084  return selectCopy(I, TII, MRI, TRI, RBI);
2085  }
2086  }
2087 
2088  if (DstSize == 64) {
2089  // FIXME: Can we avoid manually doing this?
2090  if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
2091  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2092  << " operand\n");
2093  return false;
2094  }
2095 
2096  auto SubregToReg =
2097  MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {})
2098  .addImm(0)
2099  .addUse(SrcReg)
2100  .addImm(AArch64::sub_32);
2101 
2102  ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2103  {DefReg}, {SubregToReg})
2104  .addImm(0)
2105  .addImm(SrcSize - 1);
2106  } else if (DstSize <= 32) {
2107  ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2108  {DefReg}, {SrcReg})
2109  .addImm(0)
2110  .addImm(SrcSize - 1);
2111  } else {
2112  return false;
2113  }
2114 
2116  I.eraseFromParent();
2117  return true;
2118  }
2119 
2120  case TargetOpcode::G_SITOFP:
2121  case TargetOpcode::G_UITOFP:
2122  case TargetOpcode::G_FPTOSI:
2123  case TargetOpcode::G_FPTOUI: {
2124  const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2125  SrcTy = MRI.getType(I.getOperand(1).getReg());
2126  const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2127  if (NewOpc == Opcode)
2128  return false;
2129 
2130  I.setDesc(TII.get(NewOpc));
2132 
2133  return true;
2134  }
2135 
2136 
2137  case TargetOpcode::G_INTTOPTR:
2138  // The importer is currently unable to import pointer types since they
2139  // didn't exist in SelectionDAG.
2140  return selectCopy(I, TII, MRI, TRI, RBI);
2141 
2142  case TargetOpcode::G_BITCAST:
2143  // Imported SelectionDAG rules can handle every bitcast except those that
2144  // bitcast from a type to the same type. Ideally, these shouldn't occur
2145  // but we might not run an optimizer that deletes them. The other exception
2146  // is bitcasts involving pointer types, as SelectionDAG has no knowledge
2147  // of them.
2148  return selectCopy(I, TII, MRI, TRI, RBI);
2149 
2150  case TargetOpcode::G_SELECT: {
2151  if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
2152  LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
2153  << ", expected: " << LLT::scalar(1) << '\n');
2154  return false;
2155  }
2156 
2157  const Register CondReg = I.getOperand(1).getReg();
2158  const Register TReg = I.getOperand(2).getReg();
2159  const Register FReg = I.getOperand(3).getReg();
2160 
2161  if (tryOptSelect(I))
2162  return true;
2163 
2164  Register CSelOpc = selectSelectOpc(I, MRI, RBI);
2165  MachineInstr &TstMI =
2166  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
2167  .addDef(AArch64::WZR)
2168  .addUse(CondReg)
2170 
2171  MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
2172  .addDef(I.getOperand(0).getReg())
2173  .addUse(TReg)
2174  .addUse(FReg)
2176 
2178  constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
2179 
2180  I.eraseFromParent();
2181  return true;
2182  }
2183  case TargetOpcode::G_ICMP: {
2184  if (Ty.isVector())
2185  return selectVectorICmp(I, MRI);
2186 
2187  if (Ty != LLT::scalar(32)) {
2188  LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
2189  << ", expected: " << LLT::scalar(32) << '\n');
2190  return false;
2191  }
2192 
2193  MachineIRBuilder MIRBuilder(I);
2194  if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
2195  MIRBuilder))
2196  return false;
2197  emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
2198  MIRBuilder);
2199  I.eraseFromParent();
2200  return true;
2201  }
2202 
2203  case TargetOpcode::G_FCMP: {
2204  if (Ty != LLT::scalar(32)) {
2205  LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2206  << ", expected: " << LLT::scalar(32) << '\n');
2207  return false;
2208  }
2209 
2210  unsigned CmpOpc = selectFCMPOpc(I, MRI);
2211  if (!CmpOpc)
2212  return false;
2213 
2214  // FIXME: regbank
2215 
2216  AArch64CC::CondCode CC1, CC2;
2218  (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2219 
2220  // Partially build the compare. Decide if we need to add a use for the
2221  // third operand based off whether or not we're comparing against 0.0.
2222  auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2223  .addUse(I.getOperand(2).getReg());
2224 
2225  // If we don't have an immediate compare, then we need to add a use of the
2226  // register which wasn't used for the immediate.
2227  // Note that the immediate will always be the last operand.
2228  if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2229  CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
2230 
2231  const Register DefReg = I.getOperand(0).getReg();
2232  Register Def1Reg = DefReg;
2233  if (CC2 != AArch64CC::AL)
2234  Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2235 
2236  MachineInstr &CSetMI =
2237  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2238  .addDef(Def1Reg)
2239  .addUse(AArch64::WZR)
2240  .addUse(AArch64::WZR)
2241  .addImm(getInvertedCondCode(CC1));
2242 
2243  if (CC2 != AArch64CC::AL) {
2244  Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2245  MachineInstr &CSet2MI =
2246  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2247  .addDef(Def2Reg)
2248  .addUse(AArch64::WZR)
2249  .addUse(AArch64::WZR)
2250  .addImm(getInvertedCondCode(CC2));
2251  MachineInstr &OrMI =
2252  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2253  .addDef(DefReg)
2254  .addUse(Def1Reg)
2255  .addUse(Def2Reg);
2257  constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2258  }
2259  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2260  constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2261 
2262  I.eraseFromParent();
2263  return true;
2264  }
2265  case TargetOpcode::G_VASTART:
2266  return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2267  : selectVaStartAAPCS(I, MF, MRI);
2268  case TargetOpcode::G_INTRINSIC:
2269  return selectIntrinsic(I, MRI);
2270  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2271  return selectIntrinsicWithSideEffects(I, MRI);
2272  case TargetOpcode::G_IMPLICIT_DEF: {
2273  I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
2274  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2275  const Register DstReg = I.getOperand(0).getReg();
2276  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2277  const TargetRegisterClass *DstRC =
2278  getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2279  RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
2280  return true;
2281  }
2282  case TargetOpcode::G_BLOCK_ADDR: {
2283  if (TM.getCodeModel() == CodeModel::Large) {
2284  materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2285  I.eraseFromParent();
2286  return true;
2287  } else {
2288  I.setDesc(TII.get(AArch64::MOVaddrBA));
2289  auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2290  I.getOperand(0).getReg())
2291  .addBlockAddress(I.getOperand(1).getBlockAddress(),
2292  /* Offset */ 0, AArch64II::MO_PAGE)
2293  .addBlockAddress(
2294  I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2296  I.eraseFromParent();
2297  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2298  }
2299  }
2300  case TargetOpcode::G_INTRINSIC_TRUNC:
2301  return selectIntrinsicTrunc(I, MRI);
2302  case TargetOpcode::G_INTRINSIC_ROUND:
2303  return selectIntrinsicRound(I, MRI);
2304  case TargetOpcode::G_BUILD_VECTOR:
2305  return selectBuildVector(I, MRI);
2306  case TargetOpcode::G_MERGE_VALUES:
2307  return selectMergeValues(I, MRI);
2308  case TargetOpcode::G_UNMERGE_VALUES:
2309  return selectUnmergeValues(I, MRI);
2310  case TargetOpcode::G_SHUFFLE_VECTOR:
2311  return selectShuffleVector(I, MRI);
2312  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2313  return selectExtractElt(I, MRI);
2314  case TargetOpcode::G_INSERT_VECTOR_ELT:
2315  return selectInsertElt(I, MRI);
2316  case TargetOpcode::G_CONCAT_VECTORS:
2317  return selectConcatVectors(I, MRI);
2318  case TargetOpcode::G_JUMP_TABLE:
2319  return selectJumpTable(I, MRI);
2320  }
2321 
2322  return false;
2323 }
2324 
2325 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2326  MachineRegisterInfo &MRI) const {
2327  assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
2328  Register JTAddr = I.getOperand(0).getReg();
2329  unsigned JTI = I.getOperand(1).getIndex();
2330  Register Index = I.getOperand(2).getReg();
2331  MachineIRBuilder MIB(I);
2332 
2333  Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2334  Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
2335  MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
2336  {JTAddr, Index})
2337  .addJumpTableIndex(JTI);
2338 
2339  // Build the indirect branch.
2340  MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2341  I.eraseFromParent();
2342  return true;
2343 }
2344 
2345 bool AArch64InstructionSelector::selectJumpTable(
2346  MachineInstr &I, MachineRegisterInfo &MRI) const {
2347  assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2348  assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2349 
2350  Register DstReg = I.getOperand(0).getReg();
2351  unsigned JTI = I.getOperand(1).getIndex();
2352  // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2353  MachineIRBuilder MIB(I);
2354  auto MovMI =
2355  MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2356  .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2358  I.eraseFromParent();
2359  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2360 }
2361 
2362 bool AArch64InstructionSelector::selectTLSGlobalValue(
2363  MachineInstr &I, MachineRegisterInfo &MRI) const {
2364  if (!STI.isTargetMachO())
2365  return false;
2366  MachineFunction &MF = *I.getParent()->getParent();
2367  MF.getFrameInfo().setAdjustsStack(true);
2368 
2369  const GlobalValue &GV = *I.getOperand(1).getGlobal();
2370  MachineIRBuilder MIB(I);
2371 
2372  MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {})
2373  .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
2374 
2375  auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
2376  {Register(AArch64::X0)})
2377  .addImm(0);
2378 
2379  // TLS calls preserve all registers except those that absolutely must be
2380  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
2381  // silly).
2382  MIB.buildInstr(AArch64::BLR, {}, {Load})
2383  .addDef(AArch64::X0, RegState::Implicit)
2384  .addRegMask(TRI.getTLSCallPreservedMask());
2385 
2386  MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
2387  RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
2388  MRI);
2389  I.eraseFromParent();
2390  return true;
2391 }
2392 
2393 bool AArch64InstructionSelector::selectIntrinsicTrunc(
2394  MachineInstr &I, MachineRegisterInfo &MRI) const {
2395  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2396 
2397  // Select the correct opcode.
2398  unsigned Opc = 0;
2399  if (!SrcTy.isVector()) {
2400  switch (SrcTy.getSizeInBits()) {
2401  default:
2402  case 16:
2403  Opc = AArch64::FRINTZHr;
2404  break;
2405  case 32:
2406  Opc = AArch64::FRINTZSr;
2407  break;
2408  case 64:
2409  Opc = AArch64::FRINTZDr;
2410  break;
2411  }
2412  } else {
2413  unsigned NumElts = SrcTy.getNumElements();
2414  switch (SrcTy.getElementType().getSizeInBits()) {
2415  default:
2416  break;
2417  case 16:
2418  if (NumElts == 4)
2419  Opc = AArch64::FRINTZv4f16;
2420  else if (NumElts == 8)
2421  Opc = AArch64::FRINTZv8f16;
2422  break;
2423  case 32:
2424  if (NumElts == 2)
2425  Opc = AArch64::FRINTZv2f32;
2426  else if (NumElts == 4)
2427  Opc = AArch64::FRINTZv4f32;
2428  break;
2429  case 64:
2430  if (NumElts == 2)
2431  Opc = AArch64::FRINTZv2f64;
2432  break;
2433  }
2434  }
2435 
2436  if (!Opc) {
2437  // Didn't get an opcode above, bail.
2438  LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
2439  return false;
2440  }
2441 
2442  // Legalization would have set us up perfectly for this; we just need to
2443  // set the opcode and move on.
2444  I.setDesc(TII.get(Opc));
2445  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2446 }
2447 
2448 bool AArch64InstructionSelector::selectIntrinsicRound(
2449  MachineInstr &I, MachineRegisterInfo &MRI) const {
2450  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2451 
2452  // Select the correct opcode.
2453  unsigned Opc = 0;
2454  if (!SrcTy.isVector()) {
2455  switch (SrcTy.getSizeInBits()) {
2456  default:
2457  case 16:
2458  Opc = AArch64::FRINTAHr;
2459  break;
2460  case 32:
2461  Opc = AArch64::FRINTASr;
2462  break;
2463  case 64:
2464  Opc = AArch64::FRINTADr;
2465  break;
2466  }
2467  } else {
2468  unsigned NumElts = SrcTy.getNumElements();
2469  switch (SrcTy.getElementType().getSizeInBits()) {
2470  default:
2471  break;
2472  case 16:
2473  if (NumElts == 4)
2474  Opc = AArch64::FRINTAv4f16;
2475  else if (NumElts == 8)
2476  Opc = AArch64::FRINTAv8f16;
2477  break;
2478  case 32:
2479  if (NumElts == 2)
2480  Opc = AArch64::FRINTAv2f32;
2481  else if (NumElts == 4)
2482  Opc = AArch64::FRINTAv4f32;
2483  break;
2484  case 64:
2485  if (NumElts == 2)
2486  Opc = AArch64::FRINTAv2f64;
2487  break;
2488  }
2489  }
2490 
2491  if (!Opc) {
2492  // Didn't get an opcode above, bail.
2493  LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2494  return false;
2495  }
2496 
2497  // Legalization would have set us up perfectly for this; we just need to
2498  // set the opcode and move on.
2499  I.setDesc(TII.get(Opc));
2500  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2501 }
2502 
2503 bool AArch64InstructionSelector::selectVectorICmp(
2504  MachineInstr &I, MachineRegisterInfo &MRI) const {
2505  Register DstReg = I.getOperand(0).getReg();
2506  LLT DstTy = MRI.getType(DstReg);
2507  Register SrcReg = I.getOperand(2).getReg();
2508  Register Src2Reg = I.getOperand(3).getReg();
2509  LLT SrcTy = MRI.getType(SrcReg);
2510 
2511  unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2512  unsigned NumElts = DstTy.getNumElements();
2513 
2514  // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2515  // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2516  // Third index is cc opcode:
2517  // 0 == eq
2518  // 1 == ugt
2519  // 2 == uge
2520  // 3 == ult
2521  // 4 == ule
2522  // 5 == sgt
2523  // 6 == sge
2524  // 7 == slt
2525  // 8 == sle
2526  // ne is done by negating 'eq' result.
2527 
2528  // This table below assumes that for some comparisons the operands will be
2529  // commuted.
2530  // ult op == commute + ugt op
2531  // ule op == commute + uge op
2532  // slt op == commute + sgt op
2533  // sle op == commute + sge op
2534  unsigned PredIdx = 0;
2535  bool SwapOperands = false;
2537  switch (Pred) {
2538  case CmpInst::ICMP_NE:
2539  case CmpInst::ICMP_EQ:
2540  PredIdx = 0;
2541  break;
2542  case CmpInst::ICMP_UGT:
2543  PredIdx = 1;
2544  break;
2545  case CmpInst::ICMP_UGE:
2546  PredIdx = 2;
2547  break;
2548  case CmpInst::ICMP_ULT:
2549  PredIdx = 3;
2550  SwapOperands = true;
2551  break;
2552  case CmpInst::ICMP_ULE:
2553  PredIdx = 4;
2554  SwapOperands = true;
2555  break;
2556  case CmpInst::ICMP_SGT:
2557  PredIdx = 5;
2558  break;
2559  case CmpInst::ICMP_SGE:
2560  PredIdx = 6;
2561  break;
2562  case CmpInst::ICMP_SLT:
2563  PredIdx = 7;
2564  SwapOperands = true;
2565  break;
2566  case CmpInst::ICMP_SLE:
2567  PredIdx = 8;
2568  SwapOperands = true;
2569  break;
2570  default:
2571  llvm_unreachable("Unhandled icmp predicate");
2572  return false;
2573  }
2574 
2575  // This table obviously should be tablegen'd when we have our GISel native
2576  // tablegen selector.
2577 
2578  static const unsigned OpcTable[4][4][9] = {
2579  {
2580  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2581  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2582  0 /* invalid */},
2583  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2584  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2585  0 /* invalid */},
2586  {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2587  AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2588  AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2589  {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2590  AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2591  AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2592  },
2593  {
2594  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2595  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2596  0 /* invalid */},
2597  {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2598  AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2599  AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2600  {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2601  AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2602  AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2603  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2604  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2605  0 /* invalid */}
2606  },
2607  {
2608  {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2609  AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2610  AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2611  {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2612  AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2613  AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2614  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2615  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2616  0 /* invalid */},
2617  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2618  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2619  0 /* invalid */}
2620  },
2621  {
2622  {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2623  AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2624  AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2625  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2626  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2627  0 /* invalid */},
2628  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2629  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2630  0 /* invalid */},
2631  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2632  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2633  0 /* invalid */}
2634  },
2635  };
2636  unsigned EltIdx = Log2_32(SrcEltSize / 8);
2637  unsigned NumEltsIdx = Log2_32(NumElts / 2);
2638  unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2639  if (!Opc) {
2640  LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2641  return false;
2642  }
2643 
2644  const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2645  const TargetRegisterClass *SrcRC =
2646  getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2647  if (!SrcRC) {
2648  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2649  return false;
2650  }
2651 
2652  unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2653  if (SrcTy.getSizeInBits() == 128)
2654  NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2655 
2656  if (SwapOperands)
2657  std::swap(SrcReg, Src2Reg);
2658 
2659  MachineIRBuilder MIB(I);
2660  auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2662 
2663  // Invert if we had a 'ne' cc.
2664  if (NotOpc) {
2665  Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2667  } else {
2668  MIB.buildCopy(DstReg, Cmp.getReg(0));
2669  }
2670  RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2671  I.eraseFromParent();
2672  return true;
2673 }
2674 
2675 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
2676  unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
2677  MachineIRBuilder &MIRBuilder) const {
2678  auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
2679 
2680  auto BuildFn = [&](unsigned SubregIndex) {
2681  auto Ins =
2682  MIRBuilder
2683  .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2684  .addImm(SubregIndex);
2687  return &*Ins;
2688  };
2689 
2690  switch (EltSize) {
2691  case 16:
2692  return BuildFn(AArch64::hsub);
2693  case 32:
2694  return BuildFn(AArch64::ssub);
2695  case 64:
2696  return BuildFn(AArch64::dsub);
2697  default:
2698  return nullptr;
2699  }
2700 }
2701 
2703  MachineInstr &I, MachineRegisterInfo &MRI) const {
2704  assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2705  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2706  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2707  assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2708  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2709 
2710  if (I.getNumOperands() != 3)
2711  return false;
2712 
2713  // Merging 2 s64s into an s128.
2714  if (DstTy == LLT::scalar(128)) {
2715  if (SrcTy.getSizeInBits() != 64)
2716  return false;
2717  MachineIRBuilder MIB(I);
2718  Register DstReg = I.getOperand(0).getReg();
2719  Register Src1Reg = I.getOperand(1).getReg();
2720  Register Src2Reg = I.getOperand(2).getReg();
2721  auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
2722  MachineInstr *InsMI =
2723  emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
2724  if (!InsMI)
2725  return false;
2726  MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
2727  Src2Reg, /* LaneIdx */ 1, RB, MIB);
2728  if (!Ins2MI)
2729  return false;
2730  constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
2731  constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
2732  I.eraseFromParent();
2733  return true;
2734  }
2735 
2736  if (RB.getID() != AArch64::GPRRegBankID)
2737  return false;
2738 
2739  if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2740  return false;
2741 
2742  auto *DstRC = &AArch64::GPR64RegClass;
2743  Register SubToRegDef = MRI.createVirtualRegister(DstRC);
2744  MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2745  TII.get(TargetOpcode::SUBREG_TO_REG))
2746  .addDef(SubToRegDef)
2747  .addImm(0)
2748  .addUse(I.getOperand(1).getReg())
2749  .addImm(AArch64::sub_32);
2750  Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
2751  // Need to anyext the second scalar before we can use bfm
2752  MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2753  TII.get(TargetOpcode::SUBREG_TO_REG))
2754  .addDef(SubToRegDef2)
2755  .addImm(0)
2756  .addUse(I.getOperand(2).getReg())
2757  .addImm(AArch64::sub_32);
2758  MachineInstr &BFM =
2759  *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
2760  .addDef(I.getOperand(0).getReg())
2761  .addUse(SubToRegDef)
2762  .addUse(SubToRegDef2)
2763  .addImm(32)
2764  .addImm(31);
2765  constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2766  constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2768  I.eraseFromParent();
2769  return true;
2770 }
2771 
2772 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2773  const unsigned EltSize) {
2774  // Choose a lane copy opcode and subregister based off of the size of the
2775  // vector's elements.
2776  switch (EltSize) {
2777  case 16:
2778  CopyOpc = AArch64::CPYi16;
2779  ExtractSubReg = AArch64::hsub;
2780  break;
2781  case 32:
2782  CopyOpc = AArch64::CPYi32;
2783  ExtractSubReg = AArch64::ssub;
2784  break;
2785  case 64:
2786  CopyOpc = AArch64::CPYi64;
2787  ExtractSubReg = AArch64::dsub;
2788  break;
2789  default:
2790  // Unknown size, bail out.
2791  LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2792  return false;
2793  }
2794  return true;
2795 }
2796 
2797 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
2798  Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2799  Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
2800  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2801  unsigned CopyOpc = 0;
2802  unsigned ExtractSubReg = 0;
2803  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2804  LLVM_DEBUG(
2805  dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2806  return nullptr;
2807  }
2808 
2809  const TargetRegisterClass *DstRC =
2810  getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2811  if (!DstRC) {
2812  LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2813  return nullptr;
2814  }
2815 
2816  const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2817  const LLT &VecTy = MRI.getType(VecReg);
2818  const TargetRegisterClass *VecRC =
2819  getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2820  if (!VecRC) {
2821  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2822  return nullptr;
2823  }
2824 
2825  // The register that we're going to copy into.
2826  Register InsertReg = VecReg;
2827  if (!DstReg)
2828  DstReg = MRI.createVirtualRegister(DstRC);
2829  // If the lane index is 0, we just use a subregister COPY.
2830  if (LaneIdx == 0) {
2831  auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2832  .addReg(VecReg, 0, ExtractSubReg);
2833  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2834  return &*Copy;
2835  }
2836 
2837  // Lane copies require 128-bit wide registers. If we're dealing with an
2838  // unpacked vector, then we need to move up to that width. Insert an implicit
2839  // def and a subregister insert to get us there.
2840  if (VecTy.getSizeInBits() != 128) {
2841  MachineInstr *ScalarToVector = emitScalarToVector(
2842  VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2843  if (!ScalarToVector)
2844  return nullptr;
2845  InsertReg = ScalarToVector->getOperand(0).getReg();
2846  }
2847 
2848  MachineInstr *LaneCopyMI =
2849  MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2850  constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2851 
2852  // Make sure that we actually constrain the initial copy.
2853  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2854  return LaneCopyMI;
2855 }
2856 
2857 bool AArch64InstructionSelector::selectExtractElt(
2858  MachineInstr &I, MachineRegisterInfo &MRI) const {
2859  assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2860  "unexpected opcode!");
2861  Register DstReg = I.getOperand(0).getReg();
2862  const LLT NarrowTy = MRI.getType(DstReg);
2863  const Register SrcReg = I.getOperand(1).getReg();
2864  const LLT WideTy = MRI.getType(SrcReg);
2865  (void)WideTy;
2866  assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2867  "source register size too small!");
2868  assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2869 
2870  // Need the lane index to determine the correct copy opcode.
2871  MachineOperand &LaneIdxOp = I.getOperand(2);
2872  assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2873 
2874  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2875  LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2876  return false;
2877  }
2878 
2879  // Find the index to extract from.
2880  auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2881  if (!VRegAndVal)
2882  return false;
2883  unsigned LaneIdx = VRegAndVal->Value;
2884 
2885  MachineIRBuilder MIRBuilder(I);
2886 
2887  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2888  MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2889  LaneIdx, MIRBuilder);
2890  if (!Extract)
2891  return false;
2892 
2893  I.eraseFromParent();
2894  return true;
2895 }
2896 
2897 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2898  MachineInstr &I, MachineRegisterInfo &MRI) const {
2899  unsigned NumElts = I.getNumOperands() - 1;
2900  Register SrcReg = I.getOperand(NumElts).getReg();
2901  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2902  const LLT SrcTy = MRI.getType(SrcReg);
2903 
2904  assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2905  if (SrcTy.getSizeInBits() > 128) {
2906  LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2907  return false;
2908  }
2909 
2910  MachineIRBuilder MIB(I);
2911 
2912  // We implement a split vector operation by treating the sub-vectors as
2913  // scalars and extracting them.
2914  const RegisterBank &DstRB =
2915  *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2916  for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
2917  Register Dst = I.getOperand(OpIdx).getReg();
2918  MachineInstr *Extract =
2919  emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2920  if (!Extract)
2921  return false;
2922  }
2923  I.eraseFromParent();
2924  return true;
2925 }
2926 
2928  MachineInstr &I, MachineRegisterInfo &MRI) const {
2929  assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2930  "unexpected opcode");
2931 
2932  // TODO: Handle unmerging into GPRs and from scalars to scalars.
2933  if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2934  AArch64::FPRRegBankID ||
2935  RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2936  AArch64::FPRRegBankID) {
2937  LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2938  "currently unsupported.\n");
2939  return false;
2940  }
2941 
2942  // The last operand is the vector source register, and every other operand is
2943  // a register to unpack into.
2944  unsigned NumElts = I.getNumOperands() - 1;
2945  Register SrcReg = I.getOperand(NumElts).getReg();
2946  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2947  const LLT WideTy = MRI.getType(SrcReg);
2948  (void)WideTy;
2949  assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
2950  "can only unmerge from vector or s128 types!");
2951  assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2952  "source register size too small!");
2953 
2954  if (!NarrowTy.isScalar())
2955  return selectSplitVectorUnmerge(I, MRI);
2956 
2957  MachineIRBuilder MIB(I);
2958 
2959  // Choose a lane copy opcode and subregister based off of the size of the
2960  // vector's elements.
2961  unsigned CopyOpc = 0;
2962  unsigned ExtractSubReg = 0;
2963  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
2964  return false;
2965 
2966  // Set up for the lane copies.
2967  MachineBasicBlock &MBB = *I.getParent();
2968 
2969  // Stores the registers we'll be copying from.
2970  SmallVector<Register, 4> InsertRegs;
2971 
2972  // We'll use the first register twice, so we only need NumElts-1 registers.
2973  unsigned NumInsertRegs = NumElts - 1;
2974 
2975  // If our elements fit into exactly 128 bits, then we can copy from the source
2976  // directly. Otherwise, we need to do a bit of setup with some subregister
2977  // inserts.
2978  if (NarrowTy.getSizeInBits() * NumElts == 128) {
2979  InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
2980  } else {
2981  // No. We have to perform subregister inserts. For each insert, create an
2982  // implicit def and a subregister insert, and save the register we create.
2983  for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
2984  Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2985  MachineInstr &ImpDefMI =
2986  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2987  ImpDefReg);
2988 
2989  // Now, create the subregister insert from SrcReg.
2990  Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2991  MachineInstr &InsMI =
2992  *BuildMI(MBB, I, I.getDebugLoc(),
2993  TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
2994  .addUse(ImpDefReg)
2995  .addUse(SrcReg)
2996  .addImm(AArch64::dsub);
2997 
2998  constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
3000 
3001  // Save the register so that we can copy from it after.
3002  InsertRegs.push_back(InsertReg);
3003  }
3004  }
3005 
3006  // Now that we've created any necessary subregister inserts, we can
3007  // create the copies.
3008  //
3009  // Perform the first copy separately as a subregister copy.
3010  Register CopyTo = I.getOperand(0).getReg();
3011  auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3012  .addReg(InsertRegs[0], 0, ExtractSubReg);
3013  constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
3014 
3015  // Now, perform the remaining copies as vector lane copies.
3016  unsigned LaneIdx = 1;
3017  for (Register InsReg : InsertRegs) {
3018  Register CopyTo = I.getOperand(LaneIdx).getReg();
3019  MachineInstr &CopyInst =
3020  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3021  .addUse(InsReg)
3022  .addImm(LaneIdx);
3023  constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3024  ++LaneIdx;
3025  }
3026 
3027  // Separately constrain the first copy's destination. Because of the
3028  // limitation in constrainOperandRegClass, we can't guarantee that this will
3029  // actually be constrained. So, do it ourselves using the second operand.
3030  const TargetRegisterClass *RC =
3031  MRI.getRegClassOrNull(I.getOperand(1).getReg());
3032  if (!RC) {
3033  LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
3034  return false;
3035  }
3036 
3037  RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3038  I.eraseFromParent();
3039  return true;
3040 }
3041 
3042 bool AArch64InstructionSelector::selectConcatVectors(
3043  MachineInstr &I, MachineRegisterInfo &MRI) const {
3044  assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
3045  "Unexpected opcode");
3046  Register Dst = I.getOperand(0).getReg();
3047  Register Op1 = I.getOperand(1).getReg();
3048  Register Op2 = I.getOperand(2).getReg();
3049  MachineIRBuilder MIRBuilder(I);
3050  MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3051  if (!ConcatMI)
3052  return false;
3053  I.eraseFromParent();
3054  return true;
3055 }
3056 
3057 unsigned
3058 AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
3059  MachineFunction &MF) const {
3060  Type *CPTy = CPVal->getType();
3061  unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
3062  if (Align == 0)
3063  Align = MF.getDataLayout().getTypeAllocSize(CPTy);
3064 
3066  return MCP->getConstantPoolIndex(CPVal, Align);
3067 }
3068 
3069 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3070  Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3071  unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3072 
3073  auto Adrp =
3074  MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3075  .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3076 
3077  MachineInstr *LoadMI = nullptr;
3078  switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3079  case 16:
3080  LoadMI =
3081  &*MIRBuilder
3082  .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3083  .addConstantPoolIndex(CPIdx, 0,
3085  break;
3086  case 8:
3087  LoadMI = &*MIRBuilder
3088  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3089  .addConstantPoolIndex(
3091  break;
3092  default:
3093  LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3094  << *CPVal->getType());
3095  return nullptr;
3096  }
3098  constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3099  return LoadMI;
3100 }
3101 
3102 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3103 /// size and RB.
3104 static std::pair<unsigned, unsigned>
3105 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3106  unsigned Opc, SubregIdx;
3107  if (RB.getID() == AArch64::GPRRegBankID) {
3108  if (EltSize == 32) {
3109  Opc = AArch64::INSvi32gpr;
3110  SubregIdx = AArch64::ssub;
3111  } else if (EltSize == 64) {
3112  Opc = AArch64::INSvi64gpr;
3113  SubregIdx = AArch64::dsub;
3114  } else {
3115  llvm_unreachable("invalid elt size!");
3116  }
3117  } else {
3118  if (EltSize == 8) {
3119  Opc = AArch64::INSvi8lane;
3120  SubregIdx = AArch64::bsub;
3121  } else if (EltSize == 16) {
3122  Opc = AArch64::INSvi16lane;
3123  SubregIdx = AArch64::hsub;
3124  } else if (EltSize == 32) {
3125  Opc = AArch64::INSvi32lane;
3126  SubregIdx = AArch64::ssub;
3127  } else if (EltSize == 64) {
3128  Opc = AArch64::INSvi64lane;
3129  SubregIdx = AArch64::dsub;
3130  } else {
3131  llvm_unreachable("invalid elt size!");
3132  }
3133  }
3134  return std::make_pair(Opc, SubregIdx);
3135 }
3136 
3137 MachineInstr *
3138 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
3139  MachineOperand &RHS,
3140  MachineIRBuilder &MIRBuilder) const {
3141  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3142  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3143  static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
3144  {AArch64::ADDWrr, AArch64::ADDWri}};
3145  bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
3146  auto ImmFns = selectArithImmed(RHS);
3147  unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3148  auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS.getReg()});
3149 
3150  // If we matched a valid constant immediate, add those operands.
3151  if (ImmFns) {
3152  for (auto &RenderFn : *ImmFns)
3153  RenderFn(AddMI);
3154  } else {
3155  AddMI.addUse(RHS.getReg());
3156  }
3157 
3158  constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
3159  return &*AddMI;
3160 }
3161 
3162 MachineInstr *
3163 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
3164  MachineIRBuilder &MIRBuilder) const {
3165  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3166  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3167  static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
3168  {AArch64::ADDSWrr, AArch64::ADDSWri}};
3169  bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
3170  auto ImmFns = selectArithImmed(RHS);
3171  unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3172  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3173 
3174  auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()});
3175 
3176  // If we matched a valid constant immediate, add those operands.
3177  if (ImmFns) {
3178  for (auto &RenderFn : *ImmFns)
3179  RenderFn(CmpMI);
3180  } else {
3181  CmpMI.addUse(RHS.getReg());
3182  }
3183 
3184  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3185  return &*CmpMI;
3186 }
3187 
3188 MachineInstr *
3189 AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
3190  MachineIRBuilder &MIRBuilder) const {
3191  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3192  unsigned RegSize = MRI.getType(LHS).getSizeInBits();
3193  bool Is32Bit = (RegSize == 32);
3194  static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
3195  {AArch64::ANDSWrr, AArch64::ANDSWri}};
3196  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3197 
3198  // We might be able to fold in an immediate into the TST. We need to make sure
3199  // it's a logical immediate though, since ANDS requires that.
3200  auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
3201  bool IsImmForm = ValAndVReg.hasValue() &&
3202  AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
3203  unsigned Opc = OpcTable[Is32Bit][IsImmForm];
3204  auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3205 
3206  if (IsImmForm)
3207  TstMI.addImm(
3208  AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
3209  else
3210  TstMI.addUse(RHS);
3211 
3212  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3213  return &*TstMI;
3214 }
3215 
3216 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
3218  MachineIRBuilder &MIRBuilder) const {
3219  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3220  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3221 
3222  // Fold the compare if possible.
3223  MachineInstr *FoldCmp =
3224  tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
3225  if (FoldCmp)
3226  return FoldCmp;
3227 
3228  // Can't fold into a CMN. Just emit a normal compare.
3229  unsigned CmpOpc = 0;
3230  Register ZReg;
3231 
3232  LLT CmpTy = MRI.getType(LHS.getReg());
3233  assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
3234  "Expected scalar or pointer");
3235  if (CmpTy == LLT::scalar(32)) {
3236  CmpOpc = AArch64::SUBSWrr;
3237  ZReg = AArch64::WZR;
3238  } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
3239  CmpOpc = AArch64::SUBSXrr;
3240  ZReg = AArch64::XZR;
3241  } else {
3242  return nullptr;
3243  }
3244 
3245  // Try to match immediate forms.
3246  auto ImmFns = selectArithImmed(RHS);
3247  if (ImmFns)
3248  CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
3249 
3250  auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg());
3251  // If we matched a valid constant immediate, add those operands.
3252  if (ImmFns) {
3253  for (auto &RenderFn : *ImmFns)
3254  RenderFn(CmpMI);
3255  } else {
3256  CmpMI.addUse(RHS.getReg());
3257  }
3258 
3259  // Make sure that we can constrain the compare that we emitted.
3260  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3261  return &*CmpMI;
3262 }
3263 
3264 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
3265  Optional<Register> Dst, Register Op1, Register Op2,
3266  MachineIRBuilder &MIRBuilder) const {
3267  // We implement a vector concat by:
3268  // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3269  // 2. Insert the upper vector into the destination's upper element
3270  // TODO: some of this code is common with G_BUILD_VECTOR handling.
3271  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3272 
3273  const LLT Op1Ty = MRI.getType(Op1);
3274  const LLT Op2Ty = MRI.getType(Op2);
3275 
3276  if (Op1Ty != Op2Ty) {
3277  LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
3278  return nullptr;
3279  }
3280  assert(Op1Ty.isVector() && "Expected a vector for vector concat");
3281 
3282  if (Op1Ty.getSizeInBits() >= 128) {
3283  LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
3284  return nullptr;
3285  }
3286 
3287  // At the moment we just support 64 bit vector concats.
3288  if (Op1Ty.getSizeInBits() != 64) {
3289  LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
3290  return nullptr;
3291  }
3292 
3293  const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
3294  const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
3295  const TargetRegisterClass *DstRC =
3296  getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
3297 
3298  MachineInstr *WidenedOp1 =
3299  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
3300  MachineInstr *WidenedOp2 =
3301  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
3302  if (!WidenedOp1 || !WidenedOp2) {
3303  LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
3304  return nullptr;
3305  }
3306 
3307  // Now do the insert of the upper element.
3308  unsigned InsertOpc, InsSubRegIdx;
3309  std::tie(InsertOpc, InsSubRegIdx) =
3310  getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
3311 
3312  if (!Dst)
3313  Dst = MRI.createVirtualRegister(DstRC);
3314  auto InsElt =
3315  MIRBuilder
3316  .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
3317  .addImm(1) /* Lane index */
3318  .addUse(WidenedOp2->getOperand(0).getReg())
3319  .addImm(0);
3320  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3321  return &*InsElt;
3322 }
3323 
3324 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
3325  MachineInstr &I, MachineRegisterInfo &MRI) const {
3326  assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
3327  "Expected a G_FCONSTANT!");
3328  MachineOperand &ImmOp = I.getOperand(1);
3329  unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
3330 
3331  // Only handle 32 and 64 bit defs for now.
3332  if (DefSize != 32 && DefSize != 64)
3333  return nullptr;
3334 
3335  // Don't handle null values using FMOV.
3336  if (ImmOp.getFPImm()->isNullValue())
3337  return nullptr;
3338 
3339  // Get the immediate representation for the FMOV.
3340  const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
3341  int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
3342  : AArch64_AM::getFP64Imm(ImmValAPF);
3343 
3344  // If this is -1, it means the immediate can't be represented as the requested
3345  // floating point value. Bail.
3346  if (Imm == -1)
3347  return nullptr;
3348 
3349  // Update MI to represent the new FMOV instruction, constrain it, and return.
3350  ImmOp.ChangeToImmediate(Imm);
3351  unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
3352  I.setDesc(TII.get(MovOpc));
3354  return &I;
3355 }
3356 
3357 MachineInstr *
3358 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
3359  MachineIRBuilder &MIRBuilder) const {
3360  // CSINC increments the result when the predicate is false. Invert it.
3362  CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
3363  auto I =
3364  MIRBuilder
3365  .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
3366  .addImm(InvCC);
3368  return &*I;
3369 }
3370 
3371 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
3372  MachineIRBuilder MIB(I);
3373  MachineRegisterInfo &MRI = *MIB.getMRI();
3375 
3376  // We want to recognize this pattern:
3377  //
3378  // $z = G_FCMP pred, $x, $y
3379  // ...
3380  // $w = G_SELECT $z, $a, $b
3381  //
3382  // Where the value of $z is *only* ever used by the G_SELECT (possibly with
3383  // some copies/truncs in between.)
3384  //
3385  // If we see this, then we can emit something like this:
3386  //
3387  // fcmp $x, $y
3388  // fcsel $w, $a, $b, pred
3389  //
3390  // Rather than emitting both of the rather long sequences in the standard
3391  // G_FCMP/G_SELECT select methods.
3392 
3393  // First, check if the condition is defined by a compare.
3394  MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
3395  while (CondDef) {
3396  // We can only fold if all of the defs have one use.
3397  if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
3398  return false;
3399 
3400  // We can skip over G_TRUNC since the condition is 1-bit.
3401  // Truncating/extending can have no impact on the value.
3402  unsigned Opc = CondDef->getOpcode();
3403  if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
3404  break;
3405 
3406  // Can't see past copies from physregs.
3407  if (Opc == TargetOpcode::COPY &&
3408  Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
3409  return false;
3410 
3411  CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
3412  }
3413 
3414  // Is the condition defined by a compare?
3415  if (!CondDef)
3416  return false;
3417 
3418  unsigned CondOpc = CondDef->getOpcode();
3419  if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
3420  return false;
3421 
3423  if (CondOpc == TargetOpcode::G_ICMP) {
3424  CondCode = changeICMPPredToAArch64CC(
3425  (CmpInst::Predicate)CondDef->getOperand(1).getPredicate());
3426  if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
3427  CondDef->getOperand(1), MIB)) {
3428  LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
3429  return false;
3430  }
3431  } else {
3432  // Get the condition code for the select.
3433  AArch64CC::CondCode CondCode2;
3436  CondCode2);
3437 
3438  // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
3439  // instructions to emit the comparison.
3440  // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
3441  // unnecessary.
3442  if (CondCode2 != AArch64CC::AL)
3443  return false;
3444 
3445  // Make sure we'll be able to select the compare.
3446  unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
3447  if (!CmpOpc)
3448  return false;
3449 
3450  // Emit a new compare.
3451  auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
3452  if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
3453  Cmp.addUse(CondDef->getOperand(3).getReg());
3454  constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3455  }
3456 
3457  // Emit the select.
3458  unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
3459  auto CSel =
3460  MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
3461  {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
3462  .addImm(CondCode);
3463  constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
3464  I.eraseFromParent();
3465  return true;
3466 }
3467 
3468 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
3469  MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3470  MachineIRBuilder &MIRBuilder) const {
3471  assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
3472  "Unexpected MachineOperand");
3473  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3474  // We want to find this sort of thing:
3475  // x = G_SUB 0, y
3476  // G_ICMP z, x
3477  //
3478  // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
3479  // e.g:
3480  //
3481  // cmn z, y
3482 
3483  // Helper lambda to detect the subtract followed by the compare.
3484  // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
3485  auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
3486  if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
3487  return false;
3488 
3489  // Need to make sure NZCV is the same at the end of the transformation.
3490  if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
3491  return false;
3492 
3493  // We want to match against SUBs.
3494  if (DefMI->getOpcode() != TargetOpcode::G_SUB)
3495  return false;
3496 
3497  // Make sure that we're getting
3498  // x = G_SUB 0, y
3499  auto ValAndVReg =
3501  if (!ValAndVReg || ValAndVReg->Value != 0)
3502  return false;
3503 
3504  // This can safely be represented as a CMN.
3505  return true;
3506  };
3507 
3508  // Check if the RHS or LHS of the G_ICMP is defined by a SUB
3509  MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
3510  MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
3513 
3514  // Given this:
3515  //
3516  // x = G_SUB 0, y
3517  // G_ICMP x, z
3518  //
3519  // Produce this:
3520  //
3521  // cmn y, z
3522  if (IsCMN(LHSDef, CC))
3523  return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
3524 
3525  // Same idea here, but with the RHS of the compare instead:
3526  //
3527  // Given this:
3528  //
3529  // x = G_SUB 0, y
3530  // G_ICMP z, x
3531  //
3532  // Produce this:
3533  //
3534  // cmn z, y
3535  if (IsCMN(RHSDef, CC))
3536  return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
3537 
3538  // Given this:
3539  //
3540  // z = G_AND x, y
3541  // G_ICMP z, 0
3542  //
3543  // Produce this if the compare is signed:
3544  //
3545  // tst x, y
3546  if (!isUnsignedICMPPred(P) && LHSDef &&
3547  LHSDef->getOpcode() == TargetOpcode::G_AND) {
3548  // Make sure that the RHS is 0.
3549  auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
3550  if (!ValAndVReg || ValAndVReg->Value != 0)
3551  return nullptr;
3552 
3553  return emitTST(LHSDef->getOperand(1).getReg(),
3554  LHSDef->getOperand(2).getReg(), MIRBuilder);
3555  }
3556 
3557  return nullptr;
3558 }
3559 
3560 bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
3561  // Try to match a vector splat operation into a dup instruction.
3562  // We're looking for this pattern:
3563  // %scalar:gpr(s64) = COPY $x0
3564  // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
3565  // %cst0:gpr(s32) = G_CONSTANT i32 0
3566  // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
3567  // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
3568  // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
3569  // %zerovec(<2 x s32>)
3570  //
3571  // ...into:
3572  // %splat = DUP %scalar
3573  // We use the regbank of the scalar to determine which kind of dup to use.
3574  MachineIRBuilder MIB(I);
3575  MachineRegisterInfo &MRI = *MIB.getMRI();
3577  using namespace TargetOpcode;
3578  using namespace MIPatternMatch;
3579 
3580  // Begin matching the insert.
3581  auto *InsMI =
3582  getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI);
3583  if (!InsMI)
3584  return false;
3585  // Match the undef vector operand.
3586  auto *UndefMI =
3587  getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI);
3588  if (!UndefMI)
3589  return false;
3590  // Match the scalar being splatted.
3591  Register ScalarReg = InsMI->getOperand(2).getReg();
3592  const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
3593  // Match the index constant 0.
3594  int64_t Index = 0;
3595  if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
3596  return false;
3597 
3598  // The shuffle's second operand doesn't matter if the mask is all zero.
3599  const Constant *Mask = I.getOperand(3).getShuffleMask();
3600  if (!isa<ConstantAggregateZero>(Mask))
3601  return false;
3602 
3603  // We're done, now find out what kind of splat we need.
3604  LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3605  LLT EltTy = VecTy.getElementType();
3606  if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
3607  LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
3608  return false;
3609  }
3610  bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
3611  static const unsigned OpcTable[2][2] = {
3612  {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
3613  {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
3614  unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
3615 
3616  // For FP splats, we need to widen the scalar reg via undef too.
3617  if (IsFP) {
3618  MachineInstr *Widen = emitScalarToVector(
3619  EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
3620  if (!Widen)
3621  return false;
3622  ScalarReg = Widen->getOperand(0).getReg();
3623  }
3624  auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
3625  if (IsFP)
3626  Dup.addImm(0);
3627  constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
3628  I.eraseFromParent();
3629  return true;
3630 }
3631 
3632 bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
3633  if (TM.getOptLevel() == CodeGenOpt::None)
3634  return false;
3635  if (tryOptVectorDup(I))
3636  return true;
3637  return false;
3638 }
3639 
3640 bool AArch64InstructionSelector::selectShuffleVector(
3641  MachineInstr &I, MachineRegisterInfo &MRI) const {
3642  if (tryOptVectorShuffle(I))
3643  return true;
3644  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3645  Register Src1Reg = I.getOperand(1).getReg();
3646  const LLT Src1Ty = MRI.getType(Src1Reg);
3647  Register Src2Reg = I.getOperand(2).getReg();
3648  const LLT Src2Ty = MRI.getType(Src2Reg);
3649  const Constant *ShuffleMask = I.getOperand(3).getShuffleMask();
3650 
3651  MachineBasicBlock &MBB = *I.getParent();
3652  MachineFunction &MF = *MBB.getParent();
3653  LLVMContext &Ctx = MF.getFunction().getContext();
3654 
3656  ShuffleVectorInst::getShuffleMask(ShuffleMask, Mask);
3657 
3658  // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
3659  // it's originated from a <1 x T> type. Those should have been lowered into
3660  // G_BUILD_VECTOR earlier.
3661  if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
3662  LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
3663  return false;
3664  }
3665 
3666  unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
3667 
3669  for (int Val : Mask) {
3670  // For now, any undef indexes we'll just assume to be 0. This should be
3671  // optimized in future, e.g. to select DUP etc.
3672  Val = Val < 0 ? 0 : Val;
3673  for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
3674  unsigned Offset = Byte + Val * BytesPerElt;
3675  CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
3676  }
3677  }
3678 
3679  MachineIRBuilder MIRBuilder(I);
3680 
3681  // Use a constant pool to load the index vector for TBL.
3682  Constant *CPVal = ConstantVector::get(CstIdxs);
3683  MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
3684  if (!IndexLoad) {
3685  LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
3686  return false;
3687  }
3688 
3689  if (DstTy.getSizeInBits() != 128) {
3690  assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
3691  // This case can be done with TBL1.
3692  MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
3693  if (!Concat) {
3694  LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
3695  return false;
3696  }
3697 
3698  // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
3699  IndexLoad =
3700  emitScalarToVector(64, &AArch64::FPR128RegClass,
3701  IndexLoad->getOperand(0).getReg(), MIRBuilder);
3702 
3703  auto TBL1 = MIRBuilder.buildInstr(
3704  AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
3705  {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
3707 
3708  auto Copy =
3709  MIRBuilder
3710  .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
3711  .addReg(TBL1.getReg(0), 0, AArch64::dsub);
3712  RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
3713  I.eraseFromParent();
3714  return true;
3715  }
3716 
3717  // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
3718  // Q registers for regalloc.
3719  auto RegSeq = MIRBuilder
3720  .buildInstr(TargetOpcode::REG_SEQUENCE,
3721  {&AArch64::QQRegClass}, {Src1Reg})
3722  .addImm(AArch64::qsub0)
3723  .addUse(Src2Reg)
3724  .addImm(AArch64::qsub1);
3725 
3726  auto TBL2 =
3727  MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
3728  {RegSeq, IndexLoad->getOperand(0).getReg()});
3729  constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
3731  I.eraseFromParent();
3732  return true;
3733 }
3734 
3735 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
3736  Optional<Register> DstReg, Register SrcReg, Register EltReg,
3737  unsigned LaneIdx, const RegisterBank &RB,
3738  MachineIRBuilder &MIRBuilder) const {
3739  MachineInstr *InsElt = nullptr;
3740  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3741  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3742 
3743  // Create a register to define with the insert if one wasn't passed in.
3744  if (!DstReg)
3745  DstReg = MRI.createVirtualRegister(DstRC);
3746 
3747  unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
3748  unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
3749 
3750  if (RB.getID() == AArch64::FPRRegBankID) {
3751  auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
3752  InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3753  .addImm(LaneIdx)
3754  .addUse(InsSub->getOperand(0).getReg())
3755  .addImm(0);
3756  } else {
3757  InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3758  .addImm(LaneIdx)
3759  .addUse(EltReg);
3760  }
3761 
3762  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3763  return InsElt;
3764 }
3765 
3766 bool AArch64InstructionSelector::selectInsertElt(
3767  MachineInstr &I, MachineRegisterInfo &MRI) const {
3768  assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
3769 
3770  // Get information on the destination.
3771  Register DstReg = I.getOperand(0).getReg();
3772  const LLT DstTy = MRI.getType(DstReg);
3773  unsigned VecSize = DstTy.getSizeInBits();
3774 
3775  // Get information on the element we want to insert into the destination.
3776  Register EltReg = I.getOperand(2).getReg();
3777  const LLT EltTy = MRI.getType(EltReg);
3778  unsigned EltSize = EltTy.getSizeInBits();
3779  if (EltSize < 16 || EltSize > 64)
3780  return false; // Don't support all element types yet.
3781 
3782  // Find the definition of the index. Bail out if it's not defined by a
3783  // G_CONSTANT.
3784  Register IdxReg = I.getOperand(3).getReg();
3785  auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
3786  if (!VRegAndVal)
3787  return false;
3788  unsigned LaneIdx = VRegAndVal->Value;
3789 
3790  // Perform the lane insert.
3791  Register SrcReg = I.getOperand(1).getReg();
3792  const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
3793  MachineIRBuilder MIRBuilder(I);
3794 
3795  if (VecSize < 128) {
3796  // If the vector we're inserting into is smaller than 128 bits, widen it
3797  // to 128 to do the insert.
3798  MachineInstr *ScalarToVec = emitScalarToVector(
3799  VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
3800  if (!ScalarToVec)
3801  return false;
3802  SrcReg = ScalarToVec->getOperand(0).getReg();
3803  }
3804 
3805  // Create an insert into a new FPR128 register.
3806  // Note that if our vector is already 128 bits, we end up emitting an extra
3807  // register.
3808  MachineInstr *InsMI =
3809  emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
3810 
3811  if (VecSize < 128) {
3812  // If we had to widen to perform the insert, then we have to demote back to
3813  // the original size to get the result we want.
3814  Register DemoteVec = InsMI->getOperand(0).getReg();
3815  const TargetRegisterClass *RC =
3816  getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
3817  if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3818  LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3819  return false;
3820  }
3821  unsigned SubReg = 0;
3822  if (!getSubRegForClass(RC, TRI, SubReg))
3823  return false;
3824  if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3825  LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
3826  << "\n");
3827  return false;
3828  }
3829  MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3830  .addReg(DemoteVec, 0, SubReg);
3831  RBI.constrainGenericRegister(DstReg, *RC, MRI);
3832  } else {
3833  // No widening needed.
3834  InsMI->getOperand(0).setReg(DstReg);
3835  constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3836  }
3837 
3838  I.eraseFromParent();
3839  return true;
3840 }
3841 
3842 bool AArch64InstructionSelector::selectBuildVector(
3843  MachineInstr &I, MachineRegisterInfo &MRI) const {
3844  assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3845  // Until we port more of the optimized selections, for now just use a vector
3846  // insert sequence.
3847  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3848  const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3849  unsigned EltSize = EltTy.getSizeInBits();
3850  if (EltSize < 16 || EltSize > 64)
3851  return false; // Don't support all element types yet.
3852  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3853  MachineIRBuilder MIRBuilder(I);
3854 
3855  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3856  MachineInstr *ScalarToVec =
3857  emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3858  I.getOperand(1).getReg(), MIRBuilder);
3859  if (!ScalarToVec)
3860  return false;
3861 
3862  Register DstVec = ScalarToVec->getOperand(0).getReg();
3863  unsigned DstSize = DstTy.getSizeInBits();
3864 
3865  // Keep track of the last MI we inserted. Later on, we might be able to save
3866  // a copy using it.
3867  MachineInstr *PrevMI = nullptr;
3868  for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
3869  // Note that if we don't do a subregister copy, we can end up making an
3870  // extra register.
3871  PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
3872  MIRBuilder);
3873  DstVec = PrevMI->getOperand(0).getReg();
3874  }
3875 
3876  // If DstTy's size in bits is less than 128, then emit a subregister copy
3877  // from DstVec to the last register we've defined.
3878  if (DstSize < 128) {
3879  // Force this to be FPR using the destination vector.
3880  const TargetRegisterClass *RC =
3881  getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
3882  if (!RC)
3883  return false;
3884  if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3885  LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3886  return false;
3887  }
3888 
3889  unsigned SubReg = 0;
3890  if (!getSubRegForClass(RC, TRI, SubReg))
3891  return false;
3892  if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3893  LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
3894  << "\n");
3895  return false;
3896  }
3897 
3899  Register DstReg = I.getOperand(0).getReg();
3900 
3901  MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3902  .addReg(DstVec, 0, SubReg);
3903  MachineOperand &RegOp = I.getOperand(1);
3904  RegOp.setReg(Reg);
3905  RBI.constrainGenericRegister(DstReg, *RC, MRI);
3906  } else {
3907  // We don't need a subregister copy. Save a copy by re-using the
3908  // destination register on the final insert.
3909  assert(PrevMI && "PrevMI was null?");
3910  PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
3911  constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
3912  }
3913 
3914  I.eraseFromParent();
3915  return true;
3916 }
3917 
3918 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
3919 /// ID if it exists, and 0 otherwise.
3920 static unsigned findIntrinsicID(MachineInstr &I) {
3921  auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
3922  return Op.isIntrinsicID();
3923  });
3924  if (IntrinOp == I.operands_end())
3925  return 0;
3926  return IntrinOp->getIntrinsicID();
3927 }
3928 
3929 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
3930  MachineInstr &I, MachineRegisterInfo &MRI) const {
3931  // Find the intrinsic ID.
3932  unsigned IntrinID = findIntrinsicID(I);
3933  if (!IntrinID)
3934  return false;
3935  MachineIRBuilder MIRBuilder(I);
3936 
3937  // Select the instruction.
3938  switch (IntrinID) {
3939  default:
3940  return false;
3941  case Intrinsic::trap:
3942  MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
3943  break;
3944  case Intrinsic::debugtrap:
3945  if (!STI.isTargetWindows())
3946  return false;
3947  MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
3948  break;
3949  }
3950 
3951  I.eraseFromParent();
3952  return true;
3953 }
3954 
3955 bool AArch64InstructionSelector::selectIntrinsic(
3956  MachineInstr &I, MachineRegisterInfo &MRI) const {
3957  unsigned IntrinID = findIntrinsicID(I);
3958  if (!IntrinID)
3959  return false;
3960  MachineIRBuilder MIRBuilder(I);
3961 
3962  switch (IntrinID) {
3963  default:
3964  break;
3965  case Intrinsic::aarch64_crypto_sha1h:
3966  Register DstReg = I.getOperand(0).getReg();
3967  Register SrcReg = I.getOperand(2).getReg();
3968 
3969  // FIXME: Should this be an assert?
3970  if (MRI.getType(DstReg).getSizeInBits() != 32 ||
3971  MRI.getType(SrcReg).getSizeInBits() != 32)
3972  return false;
3973 
3974  // The operation has to happen on FPRs. Set up some new FPR registers for
3975  // the source and destination if they are on GPRs.
3976  if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3977  SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3978  MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
3979 
3980  // Make sure the copy ends up getting constrained properly.
3982  AArch64::GPR32RegClass, MRI);
3983  }
3984 
3985  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
3986  DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3987 
3988  // Actually insert the instruction.
3989  auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
3990  constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
3991 
3992  // Did we create a new register for the destination?
3993  if (DstReg != I.getOperand(0).getReg()) {
3994  // Yep. Copy the result of the instruction back into the original
3995  // destination.
3996  MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
3998  AArch64::GPR32RegClass, MRI);
3999  }
4000 
4001  I.eraseFromParent();
4002  return true;
4003  }
4004  return false;
4005 }
4006 
4008  auto &MI = *Root.getParent();
4009  auto &MBB = *MI.getParent();
4010  auto &MF = *MBB.getParent();
4011  auto &MRI = MF.getRegInfo();
4012  uint64_t Immed;
4013  if (Root.isImm())
4014  Immed = Root.getImm();
4015  else if (Root.isCImm())
4016  Immed = Root.getCImm()->getZExtValue();
4017  else if (Root.isReg()) {
4018  auto ValAndVReg =
4020  if (!ValAndVReg)
4021  return None;
4022  Immed = ValAndVReg->Value;
4023  } else
4024  return None;
4025  return Immed;
4026 }
4027 
4029 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
4030  auto MaybeImmed = getImmedFromMO(Root);
4031  if (MaybeImmed == None || *MaybeImmed > 31)
4032  return None;
4033  uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
4034  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4035 }
4036 
4038 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
4039  auto MaybeImmed = getImmedFromMO(Root);
4040  if (MaybeImmed == None || *MaybeImmed > 31)
4041  return None;
4042  uint64_t Enc = 31 - *MaybeImmed;
4043  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4044 }
4045 
4047 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
4048  auto MaybeImmed = getImmedFromMO(Root);
4049  if (MaybeImmed == None || *MaybeImmed > 63)
4050  return None;
4051  uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
4052  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4053 }
4054 
4056 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
4057  auto MaybeImmed = getImmedFromMO(Root);
4058  if (MaybeImmed == None || *MaybeImmed > 63)
4059  return None;
4060  uint64_t Enc = 63 - *MaybeImmed;
4061  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4062 }
4063 
4064 /// Helper to select an immediate value that can be represented as a 12-bit
4065 /// value shifted left by either 0 or 12. If it is possible to do so, return
4066 /// the immediate and shift value. If not, return None.
4067 ///
4068 /// Used by selectArithImmed and selectNegArithImmed.
4070 AArch64InstructionSelector::select12BitValueWithLeftShift(
4071  uint64_t Immed) const {
4072  unsigned ShiftAmt;
4073  if (Immed >> 12 == 0) {
4074  ShiftAmt = 0;
4075  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
4076  ShiftAmt = 12;
4077  Immed = Immed >> 12;
4078  } else
4079  return None;
4080 
4081  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
4082  return {{
4083  [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
4084  [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
4085  }};
4086 }
4087 
4088 /// SelectArithImmed - Select an immediate value that can be represented as
4089 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
4090 /// Val set to the 12-bit value and Shift set to the shifter operand.
4092 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
4093  // This function is called from the addsub_shifted_imm ComplexPattern,
4094  // which lists [imm] as the list of opcode it's interested in, however
4095  // we still need to check whether the operand is actually an immediate
4096  // here because the ComplexPattern opcode list is only used in
4097  // root-level opcode matching.
4098  auto MaybeImmed = getImmedFromMO(Root);
4099  if (MaybeImmed == None)
4100  return None;
4101  return select12BitValueWithLeftShift(*MaybeImmed);
4102 }
4103 
4104 /// SelectNegArithImmed - As above, but negates the value before trying to
4105 /// select it.
4107 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
4108  // We need a register here, because we need to know if we have a 64 or 32
4109  // bit immediate.
4110  if (!Root.isReg())
4111  return None;
4112  auto MaybeImmed = getImmedFromMO(Root);
4113  if (MaybeImmed == None)
4114  return None;
4115  uint64_t Immed = *MaybeImmed;
4116 
4117  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
4118  // have the opposite effect on the C flag, so this pattern mustn't match under
4119  // those circumstances.
4120  if (Immed == 0)
4121  return None;
4122 
4123  // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
4124  // the root.
4125  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4126  if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
4127  Immed = ~((uint32_t)Immed) + 1;
4128  else
4129  Immed = ~Immed + 1ULL;
4130 
4131  if (Immed & 0xFFFFFFFFFF000000ULL)
4132  return None;
4133 
4134  Immed &= 0xFFFFFFULL;
4135  return select12BitValueWithLeftShift(Immed);
4136 }
4137 
4138 /// Return true if it is worth folding MI into an extended register. That is,
4139 /// if it's safe to pull it into the addressing mode of a load or store as a
4140 /// shift.
4141 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
4142  MachineInstr &MI, const MachineRegisterInfo &MRI) const {
4143  // Always fold if there is one use, or if we're optimizing for size.
4144  Register DefReg = MI.getOperand(0).getReg();
4145  if (MRI.hasOneUse(DefReg) ||
4147  return true;
4148 
4149  // It's better to avoid folding and recomputing shifts when we don't have a
4150  // fastpath.
4151  if (!STI.hasLSLFast())
4152  return false;
4153 
4154  // We have a fastpath, so folding a shift in and potentially computing it
4155  // many times may be beneficial. Check if this is only used in memory ops.
4156  // If it is, then we should fold.
4157  return all_of(MRI.use_instructions(DefReg),
4158  [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
4159 }
4160 
4161 /// This is used for computing addresses like this:
4162 ///
4163 /// ldr x1, [x2, x3, lsl #3]
4164 ///
4165 /// Where x2 is the base register, and x3 is an offset register. The shift-left
4166 /// is a constant value specific to this load instruction. That is, we'll never
4167 /// see anything other than a 3 here (which corresponds to the size of the
4168 /// element being loaded.)
4170 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4171  MachineOperand &Root, unsigned SizeInBytes) const {
4172  if (!Root.isReg())
4173  return None;
4174  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4175 
4176  // Make sure that the memory op is a valid size.
4177  int64_t LegalShiftVal = Log2_32(SizeInBytes);
4178  if (LegalShiftVal == 0)
4179  return None;
4180 
4181  // We want to find something like this:
4182  //
4183  // val = G_CONSTANT LegalShiftVal
4184  // shift = G_SHL off_reg val
4185  // ptr = G_GEP base_reg shift
4186  // x = G_LOAD ptr
4187  //
4188  // And fold it into this addressing mode:
4189  //
4190  // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
4191 
4192  // Check if we can find the G_GEP.
4193  MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_GEP, Root.getReg(), MRI);
4194  if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI))
4195  return None;
4196 
4197  // Now, try to match an opcode which will match our specific offset.
4198  // We want a G_SHL or a G_MUL.
4199  MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI);
4200  if (!OffsetInst)
4201  return None;
4202 
4203  unsigned OffsetOpc = OffsetInst->getOpcode();
4204  if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
4205  return None;
4206 
4207  if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
4208  return None;
4209 
4210  // Now, try to find the specific G_CONSTANT. Start by assuming that the
4211  // register we will offset is the LHS, and the register containing the
4212  // constant is the RHS.
4213  Register OffsetReg = OffsetInst->getOperand(1).getReg();
4214  Register ConstantReg = OffsetInst->getOperand(2).getReg();
4215  auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4216  if (!ValAndVReg) {
4217  // We didn't get a constant on the RHS. If the opcode is a shift, then
4218  // we're done.
4219  if (OffsetOpc == TargetOpcode::G_SHL)
4220  return None;
4221 
4222  // If we have a G_MUL, we can use either register. Try looking at the RHS.
4223  std::swap(OffsetReg, ConstantReg);
4224  ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4225  if (!ValAndVReg)
4226  return None;
4227  }
4228 
4229  // The value must fit into 3 bits, and must be positive. Make sure that is
4230  // true.
4231  int64_t ImmVal = ValAndVReg->Value;
4232 
4233  // Since we're going to pull this into a shift, the constant value must be
4234  // a power of 2. If we got a multiply, then we need to check this.
4235  if (OffsetOpc == TargetOpcode::G_MUL) {
4236  if (!isPowerOf2_32(ImmVal))
4237  return None;
4238 
4239  // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
4240  ImmVal = Log2_32(ImmVal);
4241  }
4242 
4243  if ((ImmVal & 0x7) != ImmVal)
4244  return None;
4245 
4246  // We are only allowed to shift by LegalShiftVal. This shift value is built
4247  // into the instruction, so we can't just use whatever we want.
4248  if (ImmVal != LegalShiftVal)
4249  return None;
4250 
4251  // We can use the LHS of the GEP as the base, and the LHS of the shift as an
4252  // offset. Signify that we are shifting by setting the shift flag to 1.
4253  return {{[=](MachineInstrBuilder &MIB) {
4254  MIB.addUse(Gep->getOperand(1).getReg());
4255  },
4256  [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
4257  [=](MachineInstrBuilder &MIB) {
4258  // Need to add both immediates here to make sure that they are both
4259  // added to the instruction.
4260  MIB.addImm(0);
4261  MIB.addImm(1);
4262  }}};
4263 }
4264 
4265 /// This is used for computing addresses like this:
4266 ///
4267 /// ldr x1, [x2, x3]
4268 ///
4269 /// Where x2 is the base register, and x3 is an offset register.
4270 ///
4271 /// When possible (or profitable) to fold a G_GEP into the address calculation,
4272 /// this will do so. Otherwise, it will return None.
4274 AArch64InstructionSelector::selectAddrModeRegisterOffset(
4275  MachineOperand &Root) const {
4276  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4277 
4278  // We need a GEP.
4279  MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
4280  if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP)
4281  return None;
4282 
4283  // If this is used more than once, let's not bother folding.
4284  // TODO: Check if they are memory ops. If they are, then we can still fold
4285  // without having to recompute anything.
4286  if (!MRI.hasOneUse(Gep->getOperand(0).getReg()))
4287  return None;
4288 
4289  // Base is the GEP's LHS, offset is its RHS.
4290  return {{[=](MachineInstrBuilder &MIB) {
4291  MIB.addUse(Gep->getOperand(1).getReg());
4292  },
4293  [=](MachineInstrBuilder &MIB) {
4294  MIB.addUse(Gep->getOperand(2).getReg());
4295  },
4296  [=](MachineInstrBuilder &MIB) {
4297  // Need to add both immediates here to make sure that they are both
4298  // added to the instruction.
4299  MIB.addImm(0);
4300  MIB.addImm(0);
4301  }}};
4302 }
4303 
4304 /// This is intended to be equivalent to selectAddrModeXRO in
4305 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
4307 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
4308  unsigned SizeInBytes) const {
4309  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4310 
4311  // If we have a constant offset, then we probably don't want to match a
4312  // register offset.
4313  if (isBaseWithConstantOffset(Root, MRI))
4314  return None;
4315 
4316  // Try to fold shifts into the addressing mode.
4317  auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
4318  if (AddrModeFns)
4319  return AddrModeFns;
4320 
4321  // If that doesn't work, see if it's possible to fold in registers from
4322  // a GEP.
4323  return selectAddrModeRegisterOffset(Root);
4324 }
4325 
4326 /// Select a "register plus unscaled signed 9-bit immediate" address. This
4327 /// should only match when there is an offset that is not valid for a scaled
4328 /// immediate addressing mode. The "Size" argument is the size in bytes of the
4329 /// memory reference, which is needed here to know what is valid for a scaled
4330 /// immediate.
4332 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
4333  unsigned Size) const {
4334  MachineRegisterInfo &MRI =
4335  Root.getParent()->getParent()->getParent()->getRegInfo();
4336 
4337  if (!Root.isReg())
4338  return None;
4339 
4340  if (!isBaseWithConstantOffset(Root, MRI))
4341  return None;
4342 
4343  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4344  if (!RootDef)
4345  return None;
4346 
4347  MachineOperand &OffImm = RootDef->getOperand(2);
4348  if (!OffImm.isReg())
4349  return None;
4350  MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
4351  if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
4352  return None;
4353  int64_t RHSC;
4354  MachineOperand &RHSOp1 = RHS->getOperand(1);
4355  if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
4356  return None;
4357  RHSC = RHSOp1.getCImm()->getSExtValue();
4358 
4359  // If the offset is valid as a scaled immediate, don't match here.
4360  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
4361  return None;
4362  if (RHSC >= -256 && RHSC < 256) {
4363  MachineOperand &Base = RootDef->getOperand(1);
4364  return {{
4365  [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
4366  [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
4367  }};
4368  }
4369  return None;
4370 }
4371 
4372 /// Select a "register plus scaled unsigned 12-bit immediate" address. The
4373 /// "Size" argument is the size in bytes of the memory reference, which
4374 /// determines the scale.
4376 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
4377  unsigned Size) const {
4378  MachineRegisterInfo &MRI =
4379  Root.getParent()->getParent()->getParent()->getRegInfo();
4380 
4381  if (!Root.isReg())
4382  return None;
4383 
4384  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4385  if (!RootDef)
4386  return None;
4387 
4388  if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
4389  return {{
4390  [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
4391  [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4392  }};
4393  }
4394 
4395  if (isBaseWithConstantOffset(Root, MRI)) {
4396  MachineOperand &LHS = RootDef->getOperand(1);
4397  MachineOperand &RHS = RootDef->getOperand(2);
4398  MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
4399  MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
4400  if (LHSDef && RHSDef) {
4401  int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
4402  unsigned Scale = Log2_32(Size);
4403  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
4404  if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
4405  return {{
4406  [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
4407  [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4408  }};
4409 
4410  return {{
4411  [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
4412  [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4413  }};
4414  }
4415  }
4416  }
4417 
4418  // Before falling back to our general case, check if the unscaled
4419  // instructions can handle this. If so, that's preferable.
4420  if (selectAddrModeUnscaled(Root, Size).hasValue())
4421  return None;
4422 
4423  return {{
4424  [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
4425  [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4426  }};
4427 }
4428 
4429 /// Given a shift instruction, return the correct shift type for that
4430 /// instruction.
4432  // TODO: Handle AArch64_AM::ROR
4433  switch (MI.getOpcode()) {
4434  default:
4436  case TargetOpcode::G_SHL:
4437  return AArch64_AM::LSL;
4438  case TargetOpcode::G_LSHR:
4439  return AArch64_AM::LSR;
4440  case TargetOpcode::G_ASHR:
4441  return AArch64_AM::ASR;
4442  }
4443 }
4444 
4445 /// Select a "shifted register" operand. If the value is not shifted, set the
4446 /// shift operand to a default value of "lsl 0".
4447 ///
4448 /// TODO: Allow shifted register to be rotated in logical instructions.
4450 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
4451  if (!Root.isReg())
4452  return None;
4453  MachineRegisterInfo &MRI =
4454  Root.getParent()->getParent()->getParent()->getRegInfo();
4455 
4456  // Check if the operand is defined by an instruction which corresponds to
4457  // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
4458  //
4459  // TODO: Handle AArch64_AM::ROR for logical instructions.
4460  MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
4461  if (!ShiftInst)
4462  return None;
4463  AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
4464  if (ShType == AArch64_AM::InvalidShiftExtend)
4465  return None;
4466  if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
4467  return None;
4468 
4469  // Need an immediate on the RHS.
4470  MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
4471  auto Immed = getImmedFromMO(ShiftRHS);
4472  if (!Immed)
4473  return None;
4474 
4475  // We have something that we can fold. Fold in the shift's LHS and RHS into
4476  // the instruction.
4477  MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
4478  Register ShiftReg = ShiftLHS.getReg();
4479 
4480  unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
4481  unsigned Val = *Immed & (NumBits - 1);
4482  unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
4483 
4484  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
4485  [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
4486 }
4487 
4488 /// Get the correct ShiftExtendType for an extend instruction.
4491  unsigned Opc = MI.getOpcode();
4492 
4493  // Handle explicit extend instructions first.
4494  if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
4495  unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4496  assert(Size != 64 && "Extend from 64 bits?");
4497  switch (Size) {
4498  case 8:
4499  return AArch64_AM::SXTB;
4500  case 16:
4501  return AArch64_AM::SXTH;
4502  case 32:
4503  return AArch64_AM::SXTW;
4504  default:
4506  }
4507  }
4508 
4509  if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
4510  unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4511  assert(Size != 64 && "Extend from 64 bits?");
4512  switch (Size) {
4513  case 8:
4514  return AArch64_AM::UXTB;
4515  case 16:
4516  return AArch64_AM::UXTH;
4517  case 32:
4518  return AArch64_AM::UXTW;
4519  default:
4521  }
4522  }
4523 
4524  // Don't have an explicit extend. Try to handle a G_AND with a constant mask
4525  // on the RHS.
4526  if (Opc != TargetOpcode::G_AND)
4528 
4529  Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
4530  if (!MaybeAndMask)
4532  uint64_t AndMask = *MaybeAndMask;
4533  switch (AndMask) {
4534  default:
4536  case 0xFF:
4537  return AArch64_AM::UXTB;
4538  case 0xFFFF:
4539  return AArch64_AM::UXTH;
4540  case 0xFFFFFFFF:
4541  return AArch64_AM::UXTW;
4542  }
4543 }
4544 
4545 Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
4546  Register ExtReg, MachineIRBuilder &MIB) const {
4547  MachineRegisterInfo &MRI = *MIB.getMRI();
4548  if (MRI.getType(ExtReg).getSizeInBits() == 32)
4549  return ExtReg;
4550 
4551  // Insert a copy to move ExtReg to GPR32.
4552  Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
4553  auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg});
4554 
4555  // Select the copy into a subregister copy.
4556  selectCopy(*Copy, TII, MRI, TRI, RBI);
4557  return Copy.getReg(0);
4558 }
4559 
4560 /// Select an "extended register" operand. This operand folds in an extend
4561 /// followed by an optional left shift.
4563 AArch64InstructionSelector::selectArithExtendedRegister(
4564  MachineOperand &Root) const {
4565  if (!Root.isReg())
4566  return None;
4567  MachineRegisterInfo &MRI =
4568  Root.getParent()->getParent()->getParent()->getRegInfo();
4569 
4570  uint64_t ShiftVal = 0;
4571  Register ExtReg;
4573  MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
4574  if (!RootDef)
4575  return None;
4576 
4577  if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
4578  return None;
4579 
4580  // Check if we can fold a shift and an extend.
4581  if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
4582  // Look for a constant on the RHS of the shift.
4583  MachineOperand &RHS = RootDef->getOperand(2);
4584  Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
4585  if (!MaybeShiftVal)
4586  return None;
4587  ShiftVal = *MaybeShiftVal;
4588  if (ShiftVal > 4)
4589  return None;
4590  // Look for a valid extend instruction on the LHS of the shift.
4591  MachineOperand &LHS = RootDef->getOperand(1);
4592  MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4593  if (!ExtDef)
4594  return None;
4595  Ext = getExtendTypeForInst(*ExtDef, MRI);
4596  if (Ext == AArch64_AM::InvalidShiftExtend)
4597  return None;
4598  ExtReg = ExtDef->getOperand(1).getReg();
4599  } else {
4600  // Didn't get a shift. Try just folding an extend.
4601  Ext = getExtendTypeForInst(*RootDef, MRI);
4602  if (Ext == AArch64_AM::InvalidShiftExtend)
4603  return None;
4604  ExtReg = RootDef->getOperand(1).getReg();
4605 
4606  // If we have a 32 bit instruction which zeroes out the high half of a
4607  // register, we get an implicit zero extend for free. Check if we have one.
4608  // FIXME: We actually emit the extend right now even though we don't have
4609  // to.
4610  if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
4611  MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
4612  if (ExtInst && isDef32(*ExtInst))
4613  return None;
4614  }
4615  }
4616 
4617  // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
4618  // copy.
4619  MachineIRBuilder MIB(*RootDef);
4620  ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB);
4621 
4622  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
4623  [=](MachineInstrBuilder &MIB) {
4624  MIB.addImm(getArithExtendImm(Ext, ShiftVal));
4625  }}};
4626 }
4627 
4628 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
4629  const MachineInstr &MI) const {
4630  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4631  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4633  assert(CstVal && "Expected constant value");
4634  MIB.addImm(CstVal.getValue());
4635 }
4636 
4637 void AArch64InstructionSelector::renderLogicalImm32(
4638  MachineInstrBuilder &MIB, const MachineInstr &I) const {
4639  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4640  uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
4641  uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
4642  MIB.addImm(Enc);
4643 }
4644 
4645 void AArch64InstructionSelector::renderLogicalImm64(
4646  MachineInstrBuilder &MIB, const MachineInstr &I) const {
4647  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4648  uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
4649  uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
4650  MIB.addImm(Enc);
4651 }
4652 
4653 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
4654  const MachineInstr &MI, unsigned NumBytes) const {
4655  if (!MI.mayLoadOrStore())
4656  return false;
4657  assert(MI.hasOneMemOperand() &&
4658  "Expected load/store to have only one mem op!");
4659  return (*MI.memoperands_begin())->getSize() == NumBytes;
4660 }
4661 
4662 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
4663  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4664  if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
4665  return false;
4666 
4667  // Only return true if we know the operation will zero-out the high half of
4668  // the 64-bit register. Truncates can be subregister copies, which don't
4669  // zero out the high bits. Copies and other copy-like instructions can be
4670  // fed by truncates, or could be lowered as subregister copies.
4671  switch (MI.getOpcode()) {
4672  default:
4673  return true;
4674  case TargetOpcode::COPY:
4675  case TargetOpcode::G_BITCAST:
4676  case TargetOpcode::G_TRUNC:
4677  case TargetOpcode::G_PHI:
4678  return false;
4679  }
4680 }
4681 
4682 namespace llvm {
4685  AArch64Subtarget &Subtarget,
4686  AArch64RegisterBankInfo &RBI) {
4687  return new AArch64InstructionSelector(TM, Subtarget, RBI);
4688 }
4689 }
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
const NoneType None
Definition: None.h:23
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
mop_iterator operands_end()
Definition: MachineInstr.h:472
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address...
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1569
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
MachineBasicBlock * getMBB() const
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB...
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
void setTargetFlags(unsigned F)
virtual void setupMF(MachineFunction &mf, GISelKnownBits &KB, CodeGenCoverage &covinfo)
Setup per-MF selector state.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
const ConstantFP * getConstantFPVRegVal(unsigned VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:276
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:385
bool isScalar() const
static CondCode getInvertedCondCode(CondCode Code)
unsigned Reg
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
static uint64_t selectImpl(uint64_t CandidateMask, uint64_t &NextInSequenceMask)
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
uint64_t getSize() const
Return the size in bytes of the memory reference.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:848
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
static uint32_t Concat[]
unsigned const TargetRegisterInfo * TRI
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address...
void setRegBank(unsigned Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:477
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
T get() const
Returns the value of the specified pointer type.
Definition: PointerUnion.h:194
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:284
unsigned getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
unsigned getBitWidth() const
getBitWidth - Return the bitwidth of this constant.
Definition: Constants.h:142
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address...
This file declares the targeting of the RegisterBankInfo class for AArch64.
static AArch64_AM::ShiftExtendType getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI)
Get the correct ShiftExtendType for an extend instruction.
bool isVector() const
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:201
Holds all the information related to register banks.
A description of a memory reference used in the backend.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned SubReg
static StringRef getName(Value *V)
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static int getID(struct InternalInstruction *insn, const void *miiArg)
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
const RegClassOrRegBank & getRegClassOrRegBank(unsigned Reg) const
Return the register bank or register class of Reg.
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:308
static bool isStore(int Opcode)
MachineFunction & getMF()
Getter for the function we currently build.
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
static bool isUnsignedICMPPred(const CmpInst::Predicate P)
Returns true if P is an unsigned integer comparison predicate.
bool isPredicate() const
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:1012
void setReg(Register Reg)
Change the register this operand corresponds to.
#define EQ(a, b)
Definition: regexec.c:112
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
TargetInstrInfo - Interface to description of machine instruction set.
static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *From, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV)...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
#define P(N)
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineRegisterInfo * getMRI()
Getter for MRI.
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:657
const TargetRegisterInfo * getTargetRegisterInfo() const
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address...
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
unsigned const MachineRegisterInfo * MRI
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
This is an important base class in LLVM.
Definition: Constant.h:41
const GlobalValue * getGlobal() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
Helper class to build MachineInstr.
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:762
bool isExactlyValue(double V) const
We don&#39;t rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1145
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:567
bool isValid() const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
self_iterator getIterator()
Definition: ilist_node.h:81
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineInstrBuilder & addFrameIndex(int Idx) const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
bool isCopy() const
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:300
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:50
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
const Constant * getShuffleMask() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
#define GET_GLOBALISEL_TEMPORARIES_INIT
const APFloat & getValueAPF() const
Definition: Constants.h:302
static Optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
Optional< ValueAndVReg > getConstantVRegValWithLookThrough(unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT (LookThroug...
Definition: Utils.cpp:218
bool isJTI() const
isJTI - Tests if this is a MO_JumpTableIndex operand.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
BlockVerifier::State From
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:552
RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
MachineInstrBuilder MachineInstrBuilder & DefMI
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Promote Memory to Register
Definition: Mem2Reg.cpp:109
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:111
This class implements the register bank concept.
Definition: RegisterBank.h:28
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
This file declares the MachineIRBuilder class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function that verifies that we have a valid copy at the end of selectCopy. ...
Optional< int64_t > getConstantVRegVal(unsigned VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:207
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
bool isPointer() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:470
Provides the logic to select generic machine instructions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
This class provides the information for the target register banks.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
bool hasOneUse(unsigned RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
ConstantMatch m_ICst(int64_t &Cst)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:305
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &)
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
int64_t getOffset() const
Return the offset from the symbol in this operand.
const BlockAddress * getBlockAddress() const
#define I(x, y, z)
Definition: MD5.cpp:58
static unsigned findIntrinsicID(MachineInstr &I)
Helper function to find an intrinsic ID on an a MachineInstr.
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page...
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
void setSubReg(unsigned subReg)
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:619
#define GET_GLOBALISEL_PREDICATES_INIT