LLVM  9.0.0svn
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
21 #include "llvm/ADT/Optional.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/Support/Debug.h"
37 
38 #define DEBUG_TYPE "aarch64-isel"
39 
40 using namespace llvm;
41 
42 namespace {
43 
44 #define GET_GLOBALISEL_PREDICATE_BITSET
45 #include "AArch64GenGlobalISel.inc"
46 #undef GET_GLOBALISEL_PREDICATE_BITSET
47 
48 class AArch64InstructionSelector : public InstructionSelector {
49 public:
50  AArch64InstructionSelector(const AArch64TargetMachine &TM,
51  const AArch64Subtarget &STI,
52  const AArch64RegisterBankInfo &RBI);
53 
54  bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
55  static const char *getName() { return DEBUG_TYPE; }
56 
57 private:
58  /// tblgen-erated 'select' implementation, used as the initial selector for
59  /// the patterns that don't require complex C++.
60  bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
61 
62  // A lowering phase that runs before any selection attempts.
63 
64  void preISelLower(MachineInstr &I) const;
65 
66  // An early selection function that runs before the selectImpl() call.
67  bool earlySelect(MachineInstr &I) const;
68 
69  bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
70 
71  bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
72  MachineRegisterInfo &MRI) const;
73  bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
74  MachineRegisterInfo &MRI) const;
75 
76  bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
77  MachineRegisterInfo &MRI) const;
78 
79  bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
80  bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
81 
82  // Helper to generate an equivalent of scalar_to_vector into a new register,
83  // returned via 'Dst'.
84  MachineInstr *emitScalarToVector(unsigned EltSize,
85  const TargetRegisterClass *DstRC,
87  MachineIRBuilder &MIRBuilder) const;
88 
89  /// Emit a lane insert into \p DstReg, or a new vector register if None is
90  /// provided.
91  ///
92  /// The lane inserted into is defined by \p LaneIdx. The vector source
93  /// register is given by \p SrcReg. The register containing the element is
94  /// given by \p EltReg.
95  MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
96  Register EltReg, unsigned LaneIdx,
97  const RegisterBank &RB,
98  MachineIRBuilder &MIRBuilder) const;
99  bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
100  bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
103 
104  void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
105  SmallVectorImpl<Optional<int>> &Idxs) const;
106  bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
107  bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
108  bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
109  bool selectSplitVectorUnmerge(MachineInstr &I,
110  MachineRegisterInfo &MRI) const;
111  bool selectIntrinsicWithSideEffects(MachineInstr &I,
112  MachineRegisterInfo &MRI) const;
113  bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
114  bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
115  bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
116  bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
117  bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
118  bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
119 
120  unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
121  MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
122  MachineIRBuilder &MIRBuilder) const;
123 
124  // Emit a vector concat operation.
125  MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
126  Register Op2,
127  MachineIRBuilder &MIRBuilder) const;
128  MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
130  MachineIRBuilder &MIRBuilder) const;
131  MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
132  MachineIRBuilder &MIRBuilder) const;
133  MachineInstr *emitTST(const Register &LHS, const Register &RHS,
134  MachineIRBuilder &MIRBuilder) const;
135  MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
136  const RegisterBank &DstRB, LLT ScalarTy,
137  Register VecReg, unsigned LaneIdx,
138  MachineIRBuilder &MIRBuilder) const;
139 
140  /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
141  /// materialized using a FMOV instruction, then update MI and return it.
142  /// Otherwise, do nothing and return a nullptr.
143  MachineInstr *emitFMovForFConstant(MachineInstr &MI,
144  MachineRegisterInfo &MRI) const;
145 
146  /// Emit a CSet for a compare.
147  MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
148  MachineIRBuilder &MIRBuilder) const;
149 
150  // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
151  // We use these manually instead of using the importer since it doesn't
152  // support SDNodeXForm.
153  ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
154  ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
155  ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
156  ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
157 
158  ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
159 
160  ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
161  unsigned Size) const;
162 
163  ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
164  return selectAddrModeUnscaled(Root, 1);
165  }
166  ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
167  return selectAddrModeUnscaled(Root, 2);
168  }
169  ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
170  return selectAddrModeUnscaled(Root, 4);
171  }
172  ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
173  return selectAddrModeUnscaled(Root, 8);
174  }
175  ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
176  return selectAddrModeUnscaled(Root, 16);
177  }
178 
179  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
180  unsigned Size) const;
181  template <int Width>
182  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
183  return selectAddrModeIndexed(Root, Width / 8);
184  }
185 
186  void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
187 
188  // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
189  void materializeLargeCMVal(MachineInstr &I, const Value *V,
190  unsigned char OpFlags) const;
191 
192  // Optimization methods.
193  bool tryOptVectorShuffle(MachineInstr &I) const;
194  bool tryOptVectorDup(MachineInstr &MI) const;
195  bool tryOptSelect(MachineInstr &MI) const;
196  MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
197  MachineOperand &Predicate,
198  MachineIRBuilder &MIRBuilder) const;
199 
200  const AArch64TargetMachine &TM;
201  const AArch64Subtarget &STI;
202  const AArch64InstrInfo &TII;
203  const AArch64RegisterInfo &TRI;
204  const AArch64RegisterBankInfo &RBI;
205 
206 #define GET_GLOBALISEL_PREDICATES_DECL
207 #include "AArch64GenGlobalISel.inc"
208 #undef GET_GLOBALISEL_PREDICATES_DECL
209 
210 // We declare the temporaries used by selectImpl() in the class to minimize the
211 // cost of constructing placeholder values.
212 #define GET_GLOBALISEL_TEMPORARIES_DECL
213 #include "AArch64GenGlobalISel.inc"
214 #undef GET_GLOBALISEL_TEMPORARIES_DECL
215 };
216 
217 } // end anonymous namespace
218 
219 #define GET_GLOBALISEL_IMPL
220 #include "AArch64GenGlobalISel.inc"
221 #undef GET_GLOBALISEL_IMPL
222 
223 AArch64InstructionSelector::AArch64InstructionSelector(
224  const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
225  const AArch64RegisterBankInfo &RBI)
226  : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
227  TRI(*STI.getRegisterInfo()), RBI(RBI),
229 #include "AArch64GenGlobalISel.inc"
232 #include "AArch64GenGlobalISel.inc"
234 {
235 }
236 
237 // FIXME: This should be target-independent, inferred from the types declared
238 // for each class in the bank.
239 static const TargetRegisterClass *
240 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
241  const RegisterBankInfo &RBI,
242  bool GetAllRegSet = false) {
243  if (RB.getID() == AArch64::GPRRegBankID) {
244  if (Ty.getSizeInBits() <= 32)
245  return GetAllRegSet ? &AArch64::GPR32allRegClass
246  : &AArch64::GPR32RegClass;
247  if (Ty.getSizeInBits() == 64)
248  return GetAllRegSet ? &AArch64::GPR64allRegClass
249  : &AArch64::GPR64RegClass;
250  return nullptr;
251  }
252 
253  if (RB.getID() == AArch64::FPRRegBankID) {
254  if (Ty.getSizeInBits() <= 16)
255  return &AArch64::FPR16RegClass;
256  if (Ty.getSizeInBits() == 32)
257  return &AArch64::FPR32RegClass;
258  if (Ty.getSizeInBits() == 64)
259  return &AArch64::FPR64RegClass;
260  if (Ty.getSizeInBits() == 128)
261  return &AArch64::FPR128RegClass;
262  return nullptr;
263  }
264 
265  return nullptr;
266 }
267 
268 /// Given a register bank, and size in bits, return the smallest register class
269 /// that can represent that combination.
270 static const TargetRegisterClass *
271 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
272  bool GetAllRegSet = false) {
273  unsigned RegBankID = RB.getID();
274 
275  if (RegBankID == AArch64::GPRRegBankID) {
276  if (SizeInBits <= 32)
277  return GetAllRegSet ? &AArch64::GPR32allRegClass
278  : &AArch64::GPR32RegClass;
279  if (SizeInBits == 64)
280  return GetAllRegSet ? &AArch64::GPR64allRegClass
281  : &AArch64::GPR64RegClass;
282  }
283 
284  if (RegBankID == AArch64::FPRRegBankID) {
285  switch (SizeInBits) {
286  default:
287  return nullptr;
288  case 8:
289  return &AArch64::FPR8RegClass;
290  case 16:
291  return &AArch64::FPR16RegClass;
292  case 32:
293  return &AArch64::FPR32RegClass;
294  case 64:
295  return &AArch64::FPR64RegClass;
296  case 128:
297  return &AArch64::FPR128RegClass;
298  }
299  }
300 
301  return nullptr;
302 }
303 
304 /// Returns the correct subregister to use for a given register class.
306  const TargetRegisterInfo &TRI, unsigned &SubReg) {
307  switch (TRI.getRegSizeInBits(*RC)) {
308  case 8:
309  SubReg = AArch64::bsub;
310  break;
311  case 16:
312  SubReg = AArch64::hsub;
313  break;
314  case 32:
315  if (RC == &AArch64::GPR32RegClass)
316  SubReg = AArch64::sub_32;
317  else
318  SubReg = AArch64::ssub;
319  break;
320  case 64:
321  SubReg = AArch64::dsub;
322  break;
323  default:
324  LLVM_DEBUG(
325  dbgs() << "Couldn't find appropriate subregister for register class.");
326  return false;
327  }
328 
329  return true;
330 }
331 
332 /// Check whether \p I is a currently unsupported binary operation:
333 /// - it has an unsized type
334 /// - an operand is not a vreg
335 /// - all operands are not in the same bank
336 /// These are checks that should someday live in the verifier, but right now,
337 /// these are mostly limitations of the aarch64 selector.
338 static bool unsupportedBinOp(const MachineInstr &I,
339  const AArch64RegisterBankInfo &RBI,
340  const MachineRegisterInfo &MRI,
341  const AArch64RegisterInfo &TRI) {
342  LLT Ty = MRI.getType(I.getOperand(0).getReg());
343  if (!Ty.isValid()) {
344  LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
345  return true;
346  }
347 
348  const RegisterBank *PrevOpBank = nullptr;
349  for (auto &MO : I.operands()) {
350  // FIXME: Support non-register operands.
351  if (!MO.isReg()) {
352  LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
353  return true;
354  }
355 
356  // FIXME: Can generic operations have physical registers operands? If
357  // so, this will need to be taught about that, and we'll need to get the
358  // bank out of the minimal class for the register.
359  // Either way, this needs to be documented (and possibly verified).
360  if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
361  LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
362  return true;
363  }
364 
365  const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
366  if (!OpBank) {
367  LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
368  return true;
369  }
370 
371  if (PrevOpBank && OpBank != PrevOpBank) {
372  LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
373  return true;
374  }
375  PrevOpBank = OpBank;
376  }
377  return false;
378 }
379 
380 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
381 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
382 /// and of size \p OpSize.
383 /// \returns \p GenericOpc if the combination is unsupported.
384 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
385  unsigned OpSize) {
386  switch (RegBankID) {
387  case AArch64::GPRRegBankID:
388  if (OpSize == 32) {
389  switch (GenericOpc) {
390  case TargetOpcode::G_SHL:
391  return AArch64::LSLVWr;
392  case TargetOpcode::G_LSHR:
393  return AArch64::LSRVWr;
394  case TargetOpcode::G_ASHR:
395  return AArch64::ASRVWr;
396  default:
397  return GenericOpc;
398  }
399  } else if (OpSize == 64) {
400  switch (GenericOpc) {
401  case TargetOpcode::G_GEP:
402  return AArch64::ADDXrr;
403  case TargetOpcode::G_SHL:
404  return AArch64::LSLVXr;
405  case TargetOpcode::G_LSHR:
406  return AArch64::LSRVXr;
407  case TargetOpcode::G_ASHR:
408  return AArch64::ASRVXr;
409  default:
410  return GenericOpc;
411  }
412  }
413  break;
414  case AArch64::FPRRegBankID:
415  switch (OpSize) {
416  case 32:
417  switch (GenericOpc) {
418  case TargetOpcode::G_FADD:
419  return AArch64::FADDSrr;
420  case TargetOpcode::G_FSUB:
421  return AArch64::FSUBSrr;
422  case TargetOpcode::G_FMUL:
423  return AArch64::FMULSrr;
424  case TargetOpcode::G_FDIV:
425  return AArch64::FDIVSrr;
426  default:
427  return GenericOpc;
428  }
429  case 64:
430  switch (GenericOpc) {
431  case TargetOpcode::G_FADD:
432  return AArch64::FADDDrr;
433  case TargetOpcode::G_FSUB:
434  return AArch64::FSUBDrr;
435  case TargetOpcode::G_FMUL:
436  return AArch64::FMULDrr;
437  case TargetOpcode::G_FDIV:
438  return AArch64::FDIVDrr;
439  case TargetOpcode::G_OR:
440  return AArch64::ORRv8i8;
441  default:
442  return GenericOpc;
443  }
444  }
445  break;
446  }
447  return GenericOpc;
448 }
449 
450 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
451 /// appropriate for the (value) register bank \p RegBankID and of memory access
452 /// size \p OpSize. This returns the variant with the base+unsigned-immediate
453 /// addressing mode (e.g., LDRXui).
454 /// \returns \p GenericOpc if the combination is unsupported.
455 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
456  unsigned OpSize) {
457  const bool isStore = GenericOpc == TargetOpcode::G_STORE;
458  switch (RegBankID) {
459  case AArch64::GPRRegBankID:
460  switch (OpSize) {
461  case 8:
462  return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
463  case 16:
464  return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
465  case 32:
466  return isStore ? AArch64::STRWui : AArch64::LDRWui;
467  case 64:
468  return isStore ? AArch64::STRXui : AArch64::LDRXui;
469  }
470  break;
471  case AArch64::FPRRegBankID:
472  switch (OpSize) {
473  case 8:
474  return isStore ? AArch64::STRBui : AArch64::LDRBui;
475  case 16:
476  return isStore ? AArch64::STRHui : AArch64::LDRHui;
477  case 32:
478  return isStore ? AArch64::STRSui : AArch64::LDRSui;
479  case 64:
480  return isStore ? AArch64::STRDui : AArch64::LDRDui;
481  }
482  break;
483  }
484  return GenericOpc;
485 }
486 
487 #ifndef NDEBUG
488 /// Helper function that verifies that we have a valid copy at the end of
489 /// selectCopy. Verifies that the source and dest have the expected sizes and
490 /// then returns true.
491 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
492  const MachineRegisterInfo &MRI,
493  const TargetRegisterInfo &TRI,
494  const RegisterBankInfo &RBI) {
495  const unsigned DstReg = I.getOperand(0).getReg();
496  const unsigned SrcReg = I.getOperand(1).getReg();
497  const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
498  const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
499 
500  // Make sure the size of the source and dest line up.
501  assert(
502  (DstSize == SrcSize ||
503  // Copies are a mean to setup initial types, the number of
504  // bits may not exactly match.
505  (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
506  // Copies are a mean to copy bits around, as long as we are
507  // on the same register class, that's fine. Otherwise, that
508  // means we need some SUBREG_TO_REG or AND & co.
509  (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
510  "Copy with different width?!");
511 
512  // Check the size of the destination.
513  assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
514  "GPRs cannot get more than 64-bit width values");
515 
516  return true;
517 }
518 #endif
519 
520 /// Helper function for selectCopy. Inserts a subregister copy from
521 /// \p *From to \p *To, linking it up to \p I.
522 ///
523 /// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
524 ///
525 /// CopyReg (From class) = COPY SrcReg
526 /// SubRegCopy (To class) = COPY CopyReg:SubReg
527 /// Dst = COPY SubRegCopy
529  const RegisterBankInfo &RBI, unsigned SrcReg,
530  const TargetRegisterClass *From,
531  const TargetRegisterClass *To,
532  unsigned SubReg) {
533  MachineIRBuilder MIB(I);
534  auto Copy = MIB.buildCopy({From}, {SrcReg});
535  auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
536  .addReg(Copy.getReg(0), 0, SubReg);
537  MachineOperand &RegOp = I.getOperand(1);
538  RegOp.setReg(SubRegCopy.getReg(0));
539 
540  // It's possible that the destination register won't be constrained. Make
541  // sure that happens.
542  if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
543  RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
544 
545  return true;
546 }
547 
548 /// Helper function to get the source and destination register classes for a
549 /// copy. Returns a std::pair containing the source register class for the
550 /// copy, and the destination register class for the copy. If a register class
551 /// cannot be determined, then it will be nullptr.
552 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
555  const RegisterBankInfo &RBI) {
556  unsigned DstReg = I.getOperand(0).getReg();
557  unsigned SrcReg = I.getOperand(1).getReg();
558  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
559  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
560  unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
561  unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
562 
563  // Special casing for cross-bank copies of s1s. We can technically represent
564  // a 1-bit value with any size of register. The minimum size for a GPR is 32
565  // bits. So, we need to put the FPR on 32 bits as well.
566  //
567  // FIXME: I'm not sure if this case holds true outside of copies. If it does,
568  // then we can pull it into the helpers that get the appropriate class for a
569  // register bank. Or make a new helper that carries along some constraint
570  // information.
571  if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
572  SrcSize = DstSize = 32;
573 
574  return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
575  getMinClassForRegBank(DstRegBank, DstSize, true)};
576 }
577 
580  const RegisterBankInfo &RBI) {
581 
582  unsigned DstReg = I.getOperand(0).getReg();
583  unsigned SrcReg = I.getOperand(1).getReg();
584  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
585  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
586 
587  // Find the correct register classes for the source and destination registers.
588  const TargetRegisterClass *SrcRC;
589  const TargetRegisterClass *DstRC;
590  std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
591 
592  if (!DstRC) {
593  LLVM_DEBUG(dbgs() << "Unexpected dest size "
594  << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
595  return false;
596  }
597 
598  // A couple helpers below, for making sure that the copy we produce is valid.
599 
600  // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
601  // to verify that the src and dst are the same size, since that's handled by
602  // the SUBREG_TO_REG.
603  bool KnownValid = false;
604 
605  // Returns true, or asserts if something we don't expect happens. Instead of
606  // returning true, we return isValidCopy() to ensure that we verify the
607  // result.
608  auto CheckCopy = [&]() {
609  // If we have a bitcast or something, we can't have physical registers.
610  assert(
611  (I.isCopy() ||
612  (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
613  !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
614  "No phys reg on generic operator!");
615  assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
616  (void)KnownValid;
617  return true;
618  };
619 
620  // Is this a copy? If so, then we may need to insert a subregister copy, or
621  // a SUBREG_TO_REG.
622  if (I.isCopy()) {
623  // Yes. Check if there's anything to fix up.
624  if (!SrcRC) {
625  LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
626  return false;
627  }
628 
629  // Is this a cross-bank copy?
630  if (DstRegBank.getID() != SrcRegBank.getID()) {
631  // If we're doing a cross-bank copy on different-sized registers, we need
632  // to do a bit more work.
633  unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
634  unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
635 
636  if (SrcSize > DstSize) {
637  // We're doing a cross-bank copy into a smaller register. We need a
638  // subregister copy. First, get a register class that's on the same bank
639  // as the destination, but the same size as the source.
640  const TargetRegisterClass *SubregRC =
641  getMinClassForRegBank(DstRegBank, SrcSize, true);
642  assert(SubregRC && "Didn't get a register class for subreg?");
643 
644  // Get the appropriate subregister for the destination.
645  unsigned SubReg = 0;
646  if (!getSubRegForClass(DstRC, TRI, SubReg)) {
647  LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
648  return false;
649  }
650 
651  // Now, insert a subregister copy using the new register class.
652  selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
653  return CheckCopy();
654  }
655 
656  else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
657  SrcSize == 16) {
658  // Special case for FPR16 to GPR32.
659  // FIXME: This can probably be generalized like the above case.
660  unsigned PromoteReg =
661  MRI.createVirtualRegister(&AArch64::FPR32RegClass);
662  BuildMI(*I.getParent(), I, I.getDebugLoc(),
663  TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
664  .addImm(0)
665  .addUse(SrcReg)
666  .addImm(AArch64::hsub);
667  MachineOperand &RegOp = I.getOperand(1);
668  RegOp.setReg(PromoteReg);
669 
670  // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
671  KnownValid = true;
672  }
673  }
674 
675  // If the destination is a physical register, then there's nothing to
676  // change, so we're done.
677  if (TargetRegisterInfo::isPhysicalRegister(DstReg))
678  return CheckCopy();
679  }
680 
681  // No need to constrain SrcReg. It will get constrained when we hit another
682  // of its use or its defs. Copies do not have constraints.
683  if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
684  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
685  << " operand\n");
686  return false;
687  }
688  I.setDesc(TII.get(AArch64::COPY));
689  return CheckCopy();
690 }
691 
692 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
693  if (!DstTy.isScalar() || !SrcTy.isScalar())
694  return GenericOpc;
695 
696  const unsigned DstSize = DstTy.getSizeInBits();
697  const unsigned SrcSize = SrcTy.getSizeInBits();
698 
699  switch (DstSize) {
700  case 32:
701  switch (SrcSize) {
702  case 32:
703  switch (GenericOpc) {
704  case TargetOpcode::G_SITOFP:
705  return AArch64::SCVTFUWSri;
706  case TargetOpcode::G_UITOFP:
707  return AArch64::UCVTFUWSri;
708  case TargetOpcode::G_FPTOSI:
709  return AArch64::FCVTZSUWSr;
710  case TargetOpcode::G_FPTOUI:
711  return AArch64::FCVTZUUWSr;
712  default:
713  return GenericOpc;
714  }
715  case 64:
716  switch (GenericOpc) {
717  case TargetOpcode::G_SITOFP:
718  return AArch64::SCVTFUXSri;
719  case TargetOpcode::G_UITOFP:
720  return AArch64::UCVTFUXSri;
721  case TargetOpcode::G_FPTOSI:
722  return AArch64::FCVTZSUWDr;
723  case TargetOpcode::G_FPTOUI:
724  return AArch64::FCVTZUUWDr;
725  default:
726  return GenericOpc;
727  }
728  default:
729  return GenericOpc;
730  }
731  case 64:
732  switch (SrcSize) {
733  case 32:
734  switch (GenericOpc) {
735  case TargetOpcode::G_SITOFP:
736  return AArch64::SCVTFUWDri;
737  case TargetOpcode::G_UITOFP:
738  return AArch64::UCVTFUWDri;
739  case TargetOpcode::G_FPTOSI:
740  return AArch64::FCVTZSUXSr;
741  case TargetOpcode::G_FPTOUI:
742  return AArch64::FCVTZUUXSr;
743  default:
744  return GenericOpc;
745  }
746  case 64:
747  switch (GenericOpc) {
748  case TargetOpcode::G_SITOFP:
749  return AArch64::SCVTFUXDri;
750  case TargetOpcode::G_UITOFP:
751  return AArch64::UCVTFUXDri;
752  case TargetOpcode::G_FPTOSI:
753  return AArch64::FCVTZSUXDr;
754  case TargetOpcode::G_FPTOUI:
755  return AArch64::FCVTZUUXDr;
756  default:
757  return GenericOpc;
758  }
759  default:
760  return GenericOpc;
761  }
762  default:
763  return GenericOpc;
764  };
765  return GenericOpc;
766 }
767 
769  const RegisterBankInfo &RBI) {
771  bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
772  AArch64::GPRRegBankID);
773  LLT Ty = MRI.getType(I.getOperand(0).getReg());
774  if (Ty == LLT::scalar(32))
775  return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
776  else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
777  return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
778  return 0;
779 }
780 
781 /// Helper function to select the opcode for a G_FCMP.
783  // If this is a compare against +0.0, then we don't have to explicitly
784  // materialize a constant.
785  const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
786  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
787  unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
788  if (OpSize != 32 && OpSize != 64)
789  return 0;
790  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
791  {AArch64::FCMPSri, AArch64::FCMPDri}};
792  return CmpOpcTbl[ShouldUseImm][OpSize == 64];
793 }
794 
795 /// Returns true if \p P is an unsigned integer comparison predicate.
797  switch (P) {
798  default:
799  return false;
800  case CmpInst::ICMP_UGT:
801  case CmpInst::ICMP_UGE:
802  case CmpInst::ICMP_ULT:
803  case CmpInst::ICMP_ULE:
804  return true;
805  }
806 }
807 
809  switch (P) {
810  default:
811  llvm_unreachable("Unknown condition code!");
812  case CmpInst::ICMP_NE:
813  return AArch64CC::NE;
814  case CmpInst::ICMP_EQ:
815  return AArch64CC::EQ;
816  case CmpInst::ICMP_SGT:
817  return AArch64CC::GT;
818  case CmpInst::ICMP_SGE:
819  return AArch64CC::GE;
820  case CmpInst::ICMP_SLT:
821  return AArch64CC::LT;
822  case CmpInst::ICMP_SLE:
823  return AArch64CC::LE;
824  case CmpInst::ICMP_UGT:
825  return AArch64CC::HI;
826  case CmpInst::ICMP_UGE:
827  return AArch64CC::HS;
828  case CmpInst::ICMP_ULT:
829  return AArch64CC::LO;
830  case CmpInst::ICMP_ULE:
831  return AArch64CC::LS;
832  }
833 }
834 
837  AArch64CC::CondCode &CondCode2) {
838  CondCode2 = AArch64CC::AL;
839  switch (P) {
840  default:
841  llvm_unreachable("Unknown FP condition!");
842  case CmpInst::FCMP_OEQ:
843  CondCode = AArch64CC::EQ;
844  break;
845  case CmpInst::FCMP_OGT:
846  CondCode = AArch64CC::GT;
847  break;
848  case CmpInst::FCMP_OGE:
849  CondCode = AArch64CC::GE;
850  break;
851  case CmpInst::FCMP_OLT:
852  CondCode = AArch64CC::MI;
853  break;
854  case CmpInst::FCMP_OLE:
855  CondCode = AArch64CC::LS;
856  break;
857  case CmpInst::FCMP_ONE:
858  CondCode = AArch64CC::MI;
859  CondCode2 = AArch64CC::GT;
860  break;
861  case CmpInst::FCMP_ORD:
862  CondCode = AArch64CC::VC;
863  break;
864  case CmpInst::FCMP_UNO:
865  CondCode = AArch64CC::VS;
866  break;
867  case CmpInst::FCMP_UEQ:
868  CondCode = AArch64CC::EQ;
869  CondCode2 = AArch64CC::VS;
870  break;
871  case CmpInst::FCMP_UGT:
872  CondCode = AArch64CC::HI;
873  break;
874  case CmpInst::FCMP_UGE:
875  CondCode = AArch64CC::PL;
876  break;
877  case CmpInst::FCMP_ULT:
878  CondCode = AArch64CC::LT;
879  break;
880  case CmpInst::FCMP_ULE:
881  CondCode = AArch64CC::LE;
882  break;
883  case CmpInst::FCMP_UNE:
884  CondCode = AArch64CC::NE;
885  break;
886  }
887 }
888 
889 bool AArch64InstructionSelector::selectCompareBranch(
891 
892  const Register CondReg = I.getOperand(0).getReg();
893  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
894  MachineInstr *CCMI = MRI.getVRegDef(CondReg);
895  if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
896  CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
897  if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
898  return false;
899 
900  Register LHS = CCMI->getOperand(2).getReg();
901  Register RHS = CCMI->getOperand(3).getReg();
902  auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
903  if (!VRegAndVal)
904  std::swap(RHS, LHS);
905 
906  VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
907  if (!VRegAndVal || VRegAndVal->Value != 0) {
908  MachineIRBuilder MIB(I);
909  // If we can't select a CBZ then emit a cmp + Bcc.
910  if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
911  CCMI->getOperand(1), MIB))
912  return false;
915  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
916  I.eraseFromParent();
917  return true;
918  }
919 
920  const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
921  if (RB.getID() != AArch64::GPRRegBankID)
922  return false;
923 
924  const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
925  if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
926  return false;
927 
928  const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
929  unsigned CBOpc = 0;
930  if (CmpWidth <= 32)
931  CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
932  else if (CmpWidth == 64)
933  CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
934  else
935  return false;
936 
937  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
938  .addUse(LHS)
939  .addMBB(DestMBB)
940  .constrainAllUses(TII, TRI, RBI);
941 
942  I.eraseFromParent();
943  return true;
944 }
945 
946 bool AArch64InstructionSelector::selectVectorSHL(
947  MachineInstr &I, MachineRegisterInfo &MRI) const {
948  assert(I.getOpcode() == TargetOpcode::G_SHL);
949  Register DstReg = I.getOperand(0).getReg();
950  const LLT Ty = MRI.getType(DstReg);
951  Register Src1Reg = I.getOperand(1).getReg();
952  Register Src2Reg = I.getOperand(2).getReg();
953 
954  if (!Ty.isVector())
955  return false;
956 
957  unsigned Opc = 0;
958  if (Ty == LLT::vector(4, 32)) {
959  Opc = AArch64::USHLv4i32;
960  } else if (Ty == LLT::vector(2, 32)) {
961  Opc = AArch64::USHLv2i32;
962  } else {
963  LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
964  return false;
965  }
966 
967  MachineIRBuilder MIB(I);
968  auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
970  I.eraseFromParent();
971  return true;
972 }
973 
974 bool AArch64InstructionSelector::selectVectorASHR(
975  MachineInstr &I, MachineRegisterInfo &MRI) const {
976  assert(I.getOpcode() == TargetOpcode::G_ASHR);
977  Register DstReg = I.getOperand(0).getReg();
978  const LLT Ty = MRI.getType(DstReg);
979  Register Src1Reg = I.getOperand(1).getReg();
980  Register Src2Reg = I.getOperand(2).getReg();
981 
982  if (!Ty.isVector())
983  return false;
984 
985  // There is not a shift right register instruction, but the shift left
986  // register instruction takes a signed value, where negative numbers specify a
987  // right shift.
988 
989  unsigned Opc = 0;
990  unsigned NegOpc = 0;
991  const TargetRegisterClass *RC = nullptr;
992  if (Ty == LLT::vector(4, 32)) {
993  Opc = AArch64::SSHLv4i32;
994  NegOpc = AArch64::NEGv4i32;
995  RC = &AArch64::FPR128RegClass;
996  } else if (Ty == LLT::vector(2, 32)) {
997  Opc = AArch64::SSHLv2i32;
998  NegOpc = AArch64::NEGv2i32;
999  RC = &AArch64::FPR64RegClass;
1000  } else {
1001  LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1002  return false;
1003  }
1004 
1005  MachineIRBuilder MIB(I);
1006  auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1008  auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1010  I.eraseFromParent();
1011  return true;
1012 }
1013 
1014 bool AArch64InstructionSelector::selectVaStartAAPCS(
1015  MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1016  return false;
1017 }
1018 
1019 bool AArch64InstructionSelector::selectVaStartDarwin(
1020  MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1022  Register ListReg = I.getOperand(0).getReg();
1023 
1024  Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1025 
1026  auto MIB =
1027  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1028  .addDef(ArgsAddrReg)
1029  .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1030  .addImm(0)
1031  .addImm(0);
1032 
1034 
1035  MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1036  .addUse(ArgsAddrReg)
1037  .addUse(ListReg)
1038  .addImm(0)
1040 
1042  I.eraseFromParent();
1043  return true;
1044 }
1045 
1046 void AArch64InstructionSelector::materializeLargeCMVal(
1047  MachineInstr &I, const Value *V, unsigned char OpFlags) const {
1048  MachineBasicBlock &MBB = *I.getParent();
1049  MachineFunction &MF = *MBB.getParent();
1050  MachineRegisterInfo &MRI = MF.getRegInfo();
1051  MachineIRBuilder MIB(I);
1052 
1053  auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1054  MovZ->addOperand(MF, I.getOperand(1));
1055  MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1057  MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1059 
1060  auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1061  Register ForceDstReg) {
1062  Register DstReg = ForceDstReg
1063  ? ForceDstReg
1064  : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1065  auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1066  if (auto *GV = dyn_cast<GlobalValue>(V)) {
1067  MovI->addOperand(MF, MachineOperand::CreateGA(
1068  GV, MovZ->getOperand(1).getOffset(), Flags));
1069  } else {
1070  MovI->addOperand(
1071  MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1072  MovZ->getOperand(1).getOffset(), Flags));
1073  }
1074  MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1076  return DstReg;
1077  };
1078  Register DstReg = BuildMovK(MovZ.getReg(0),
1080  DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1081  BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1082  return;
1083 }
1084 
1085 void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
1086  MachineBasicBlock &MBB = *I.getParent();
1087  MachineFunction &MF = *MBB.getParent();
1088  MachineRegisterInfo &MRI = MF.getRegInfo();
1089 
1090  switch (I.getOpcode()) {
1091  case TargetOpcode::G_SHL:
1092  case TargetOpcode::G_ASHR:
1093  case TargetOpcode::G_LSHR: {
1094  // These shifts are legalized to have 64 bit shift amounts because we want
1095  // to take advantage of the existing imported selection patterns that assume
1096  // the immediates are s64s. However, if the shifted type is 32 bits and for
1097  // some reason we receive input GMIR that has an s64 shift amount that's not
1098  // a G_CONSTANT, insert a truncate so that we can still select the s32
1099  // register-register variant.
1100  unsigned SrcReg = I.getOperand(1).getReg();
1101  unsigned ShiftReg = I.getOperand(2).getReg();
1102  const LLT ShiftTy = MRI.getType(ShiftReg);
1103  const LLT SrcTy = MRI.getType(SrcReg);
1104  if (SrcTy.isVector())
1105  return;
1106  assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1107  if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1108  return;
1109  auto *AmtMI = MRI.getVRegDef(ShiftReg);
1110  assert(AmtMI && "could not find a vreg definition for shift amount");
1111  if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1112  // Insert a subregister copy to implement a 64->32 trunc
1113  MachineIRBuilder MIB(I);
1114  auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1115  .addReg(ShiftReg, 0, AArch64::sub_32);
1116  MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1117  I.getOperand(2).setReg(Trunc.getReg(0));
1118  }
1119  return;
1120  }
1121  default:
1122  return;
1123  }
1124 }
1125 
1126 bool AArch64InstructionSelector::earlySelectSHL(
1127  MachineInstr &I, MachineRegisterInfo &MRI) const {
1128  // We try to match the immediate variant of LSL, which is actually an alias
1129  // for a special case of UBFM. Otherwise, we fall back to the imported
1130  // selector which will match the register variant.
1131  assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1132  const auto &MO = I.getOperand(2);
1133  auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1134  if (!VRegAndVal)
1135  return false;
1136 
1137  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1138  if (DstTy.isVector())
1139  return false;
1140  bool Is64Bit = DstTy.getSizeInBits() == 64;
1141  auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1142  auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1143  MachineIRBuilder MIB(I);
1144 
1145  if (!Imm1Fn || !Imm2Fn)
1146  return false;
1147 
1148  auto NewI =
1149  MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1150  {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1151 
1152  for (auto &RenderFn : *Imm1Fn)
1153  RenderFn(NewI);
1154  for (auto &RenderFn : *Imm2Fn)
1155  RenderFn(NewI);
1156 
1157  I.eraseFromParent();
1158  return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1159 }
1160 
1161 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1162  assert(I.getParent() && "Instruction should be in a basic block!");
1163  assert(I.getParent()->getParent() && "Instruction should be in a function!");
1164 
1165  MachineBasicBlock &MBB = *I.getParent();
1166  MachineFunction &MF = *MBB.getParent();
1167  MachineRegisterInfo &MRI = MF.getRegInfo();
1168 
1169  switch (I.getOpcode()) {
1170  case TargetOpcode::G_SHL:
1171  return earlySelectSHL(I, MRI);
1172  default:
1173  return false;
1174  }
1175 }
1176 
1177 bool AArch64InstructionSelector::select(MachineInstr &I,
1178  CodeGenCoverage &CoverageInfo) const {
1179  assert(I.getParent() && "Instruction should be in a basic block!");
1180  assert(I.getParent()->getParent() && "Instruction should be in a function!");
1181 
1182  MachineBasicBlock &MBB = *I.getParent();
1183  MachineFunction &MF = *MBB.getParent();
1184  MachineRegisterInfo &MRI = MF.getRegInfo();
1185 
1186  unsigned Opcode = I.getOpcode();
1187  // G_PHI requires same handling as PHI
1188  if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
1189  // Certain non-generic instructions also need some special handling.
1190 
1191  if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
1192  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1193 
1194  if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
1195  const Register DefReg = I.getOperand(0).getReg();
1196  const LLT DefTy = MRI.getType(DefReg);
1197 
1198  const RegClassOrRegBank &RegClassOrBank =
1199  MRI.getRegClassOrRegBank(DefReg);
1200 
1201  const TargetRegisterClass *DefRC
1202  = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1203  if (!DefRC) {
1204  if (!DefTy.isValid()) {
1205  LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1206  return false;
1207  }
1208  const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1209  DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1210  if (!DefRC) {
1211  LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1212  return false;
1213  }
1214  }
1215 
1216  I.setDesc(TII.get(TargetOpcode::PHI));
1217 
1218  return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1219  }
1220 
1221  if (I.isCopy())
1222  return selectCopy(I, TII, MRI, TRI, RBI);
1223 
1224  return true;
1225  }
1226 
1227 
1228  if (I.getNumOperands() != I.getNumExplicitOperands()) {
1229  LLVM_DEBUG(
1230  dbgs() << "Generic instruction has unexpected implicit operands\n");
1231  return false;
1232  }
1233 
1234  // Try to do some lowering before we start instruction selecting. These
1235  // lowerings are purely transformations on the input G_MIR and so selection
1236  // must continue after any modification of the instruction.
1237  preISelLower(I);
1238 
1239  // There may be patterns where the importer can't deal with them optimally,
1240  // but does select it to a suboptimal sequence so our custom C++ selection
1241  // code later never has a chance to work on it. Therefore, we have an early
1242  // selection attempt here to give priority to certain selection routines
1243  // over the imported ones.
1244  if (earlySelect(I))
1245  return true;
1246 
1247  if (selectImpl(I, CoverageInfo))
1248  return true;
1249 
1250  LLT Ty =
1251  I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
1252 
1253  MachineIRBuilder MIB(I);
1254 
1255  switch (Opcode) {
1256  case TargetOpcode::G_BRCOND: {
1257  if (Ty.getSizeInBits() > 32) {
1258  // We shouldn't need this on AArch64, but it would be implemented as an
1259  // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1260  // bit being tested is < 32.
1261  LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1262  << ", expected at most 32-bits");
1263  return false;
1264  }
1265 
1266  const Register CondReg = I.getOperand(0).getReg();
1267  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1268 
1269  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1270  // instructions will not be produced, as they are conditional branch
1271  // instructions that do not set flags.
1272  bool ProduceNonFlagSettingCondBr =
1273  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1274  if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
1275  return true;
1276 
1277  if (ProduceNonFlagSettingCondBr) {
1278  auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1279  .addUse(CondReg)
1280  .addImm(/*bit offset=*/0)
1281  .addMBB(DestMBB);
1282 
1283  I.eraseFromParent();
1284  return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1285  } else {
1286  auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1287  .addDef(AArch64::WZR)
1288  .addUse(CondReg)
1289  .addImm(1);
1290  constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1291  auto Bcc =
1292  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1293  .addImm(AArch64CC::EQ)
1294  .addMBB(DestMBB);
1295 
1296  I.eraseFromParent();
1297  return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1298  }
1299  }
1300 
1301  case TargetOpcode::G_BRINDIRECT: {
1302  I.setDesc(TII.get(AArch64::BR));
1303  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1304  }
1305 
1306  case TargetOpcode::G_BRJT:
1307  return selectBrJT(I, MRI);
1308 
1309  case TargetOpcode::G_BSWAP: {
1310  // Handle vector types for G_BSWAP directly.
1311  Register DstReg = I.getOperand(0).getReg();
1312  LLT DstTy = MRI.getType(DstReg);
1313 
1314  // We should only get vector types here; everything else is handled by the
1315  // importer right now.
1316  if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1317  LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1318  return false;
1319  }
1320 
1321  // Only handle 4 and 2 element vectors for now.
1322  // TODO: 16-bit elements.
1323  unsigned NumElts = DstTy.getNumElements();
1324  if (NumElts != 4 && NumElts != 2) {
1325  LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1326  return false;
1327  }
1328 
1329  // Choose the correct opcode for the supported types. Right now, that's
1330  // v2s32, v4s32, and v2s64.
1331  unsigned Opc = 0;
1332  unsigned EltSize = DstTy.getElementType().getSizeInBits();
1333  if (EltSize == 32)
1334  Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1335  : AArch64::REV32v16i8;
1336  else if (EltSize == 64)
1337  Opc = AArch64::REV64v16i8;
1338 
1339  // We should always get something by the time we get here...
1340  assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1341 
1342  I.setDesc(TII.get(Opc));
1343  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1344  }
1345 
1346  case TargetOpcode::G_FCONSTANT:
1347  case TargetOpcode::G_CONSTANT: {
1348  const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1349 
1350  const LLT s8 = LLT::scalar(8);
1351  const LLT s16 = LLT::scalar(16);
1352  const LLT s32 = LLT::scalar(32);
1353  const LLT s64 = LLT::scalar(64);
1354  const LLT p0 = LLT::pointer(0, 64);
1355 
1356  const Register DefReg = I.getOperand(0).getReg();
1357  const LLT DefTy = MRI.getType(DefReg);
1358  const unsigned DefSize = DefTy.getSizeInBits();
1359  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1360 
1361  // FIXME: Redundant check, but even less readable when factored out.
1362  if (isFP) {
1363  if (Ty != s32 && Ty != s64) {
1364  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1365  << " constant, expected: " << s32 << " or " << s64
1366  << '\n');
1367  return false;
1368  }
1369 
1370  if (RB.getID() != AArch64::FPRRegBankID) {
1371  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1372  << " constant on bank: " << RB
1373  << ", expected: FPR\n");
1374  return false;
1375  }
1376 
1377  // The case when we have 0.0 is covered by tablegen. Reject it here so we
1378  // can be sure tablegen works correctly and isn't rescued by this code.
1379  if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1380  return false;
1381  } else {
1382  // s32 and s64 are covered by tablegen.
1383  if (Ty != p0 && Ty != s8 && Ty != s16) {
1384  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1385  << " constant, expected: " << s32 << ", " << s64
1386  << ", or " << p0 << '\n');
1387  return false;
1388  }
1389 
1390  if (RB.getID() != AArch64::GPRRegBankID) {
1391  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1392  << " constant on bank: " << RB
1393  << ", expected: GPR\n");
1394  return false;
1395  }
1396  }
1397 
1398  // We allow G_CONSTANT of types < 32b.
1399  const unsigned MovOpc =
1400  DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
1401 
1402  if (isFP) {
1403  // Either emit a FMOV, or emit a copy to emit a normal mov.
1404  const TargetRegisterClass &GPRRC =
1405  DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1406  const TargetRegisterClass &FPRRC =
1407  DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1408 
1409  // Can we use a FMOV instruction to represent the immediate?
1410  if (emitFMovForFConstant(I, MRI))
1411  return true;
1412 
1413  // Nope. Emit a copy and use a normal mov instead.
1414  const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1415  MachineOperand &RegOp = I.getOperand(0);
1416  RegOp.setReg(DefGPRReg);
1417  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1418  MIB.buildCopy({DefReg}, {DefGPRReg});
1419 
1420  if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
1421  LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
1422  return false;
1423  }
1424 
1425  MachineOperand &ImmOp = I.getOperand(1);
1426  // FIXME: Is going through int64_t always correct?
1427  ImmOp.ChangeToImmediate(
1429  } else if (I.getOperand(1).isCImm()) {
1430  uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1431  I.getOperand(1).ChangeToImmediate(Val);
1432  } else if (I.getOperand(1).isImm()) {
1433  uint64_t Val = I.getOperand(1).getImm();
1434  I.getOperand(1).ChangeToImmediate(Val);
1435  }
1436 
1437  I.setDesc(TII.get(MovOpc));
1439  return true;
1440  }
1441  case TargetOpcode::G_EXTRACT: {
1442  LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1443  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1444  (void)DstTy;
1445  unsigned SrcSize = SrcTy.getSizeInBits();
1446  // Larger extracts are vectors, same-size extracts should be something else
1447  // by now (either split up or simplified to a COPY).
1448  if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
1449  return false;
1450 
1451  I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
1453  Ty.getSizeInBits() - 1);
1454 
1455  if (SrcSize < 64) {
1456  assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1457  "unexpected G_EXTRACT types");
1458  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1459  }
1460 
1461  Register DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1462  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1463  MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1464  .addReg(DstReg, 0, AArch64::sub_32);
1466  AArch64::GPR32RegClass, MRI);
1467  I.getOperand(0).setReg(DstReg);
1468 
1469  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1470  }
1471 
1472  case TargetOpcode::G_INSERT: {
1473  LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
1474  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1475  unsigned DstSize = DstTy.getSizeInBits();
1476  // Larger inserts are vectors, same-size ones should be something else by
1477  // now (split up or turned into COPYs).
1478  if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1479  return false;
1480 
1481  I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
1482  unsigned LSB = I.getOperand(3).getImm();
1483  unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
1484  I.getOperand(3).setImm((DstSize - LSB) % DstSize);
1485  MachineInstrBuilder(MF, I).addImm(Width - 1);
1486 
1487  if (DstSize < 64) {
1488  assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1489  "unexpected G_INSERT types");
1490  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1491  }
1492 
1493  Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1494  BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1495  TII.get(AArch64::SUBREG_TO_REG))
1496  .addDef(SrcReg)
1497  .addImm(0)
1498  .addUse(I.getOperand(2).getReg())
1499  .addImm(AArch64::sub_32);
1501  AArch64::GPR32RegClass, MRI);
1502  I.getOperand(2).setReg(SrcReg);
1503 
1504  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1505  }
1506  case TargetOpcode::G_FRAME_INDEX: {
1507  // allocas and G_FRAME_INDEX are only supported in addrspace(0).
1508  if (Ty != LLT::pointer(0, 64)) {
1509  LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1510  << ", expected: " << LLT::pointer(0, 64) << '\n');
1511  return false;
1512  }
1513  I.setDesc(TII.get(AArch64::ADDXri));
1514 
1515  // MOs for a #0 shifted immediate.
1516  I.addOperand(MachineOperand::CreateImm(0));
1517  I.addOperand(MachineOperand::CreateImm(0));
1518 
1519  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1520  }
1521 
1522  case TargetOpcode::G_GLOBAL_VALUE: {
1523  auto GV = I.getOperand(1).getGlobal();
1524  if (GV->isThreadLocal()) {
1525  // FIXME: we don't support TLS yet.
1526  return false;
1527  }
1528  unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
1529  if (OpFlags & AArch64II::MO_GOT) {
1530  I.setDesc(TII.get(AArch64::LOADgot));
1531  I.getOperand(1).setTargetFlags(OpFlags);
1532  } else if (TM.getCodeModel() == CodeModel::Large) {
1533  // Materialize the global using movz/movk instructions.
1534  materializeLargeCMVal(I, GV, OpFlags);
1535  I.eraseFromParent();
1536  return true;
1537  } else if (TM.getCodeModel() == CodeModel::Tiny) {
1538  I.setDesc(TII.get(AArch64::ADR));
1539  I.getOperand(1).setTargetFlags(OpFlags);
1540  } else {
1541  I.setDesc(TII.get(AArch64::MOVaddr));
1543  MachineInstrBuilder MIB(MF, I);
1544  MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1546  }
1547  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1548  }
1549 
1550  case TargetOpcode::G_ZEXTLOAD:
1551  case TargetOpcode::G_LOAD:
1552  case TargetOpcode::G_STORE: {
1553  bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
1554  MachineIRBuilder MIB(I);
1555 
1556  LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
1557 
1558  if (PtrTy != LLT::pointer(0, 64)) {
1559  LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1560  << ", expected: " << LLT::pointer(0, 64) << '\n');
1561  return false;
1562  }
1563 
1564  auto &MemOp = **I.memoperands_begin();
1565  if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
1566  LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
1567  return false;
1568  }
1569  unsigned MemSizeInBits = MemOp.getSize() * 8;
1570 
1571  const Register PtrReg = I.getOperand(1).getReg();
1572 #ifndef NDEBUG
1573  const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
1574  // Sanity-check the pointer register.
1575  assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1576  "Load/Store pointer operand isn't a GPR");
1577  assert(MRI.getType(PtrReg).isPointer() &&
1578  "Load/Store pointer operand isn't a pointer");
1579 #endif
1580 
1581  const Register ValReg = I.getOperand(0).getReg();
1582  const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1583 
1584  const unsigned NewOpc =
1585  selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
1586  if (NewOpc == I.getOpcode())
1587  return false;
1588 
1589  I.setDesc(TII.get(NewOpc));
1590 
1591  uint64_t Offset = 0;
1592  auto *PtrMI = MRI.getVRegDef(PtrReg);
1593 
1594  // Try to fold a GEP into our unsigned immediate addressing mode.
1595  if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1596  if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1597  int64_t Imm = *COff;
1598  const unsigned Size = MemSizeInBits / 8;
1599  const unsigned Scale = Log2_32(Size);
1600  if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1601  unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
1602  I.getOperand(1).setReg(Ptr2Reg);
1603  PtrMI = MRI.getVRegDef(Ptr2Reg);
1604  Offset = Imm / Size;
1605  }
1606  }
1607  }
1608 
1609  // If we haven't folded anything into our addressing mode yet, try to fold
1610  // a frame index into the base+offset.
1611  if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1612  I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1613 
1614  I.addOperand(MachineOperand::CreateImm(Offset));
1615 
1616  // If we're storing a 0, use WZR/XZR.
1617  if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1618  if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1619  if (I.getOpcode() == AArch64::STRWui)
1620  I.getOperand(0).setReg(AArch64::WZR);
1621  else if (I.getOpcode() == AArch64::STRXui)
1622  I.getOperand(0).setReg(AArch64::XZR);
1623  }
1624  }
1625 
1626  if (IsZExtLoad) {
1627  // The zextload from a smaller type to i32 should be handled by the importer.
1628  if (MRI.getType(ValReg).getSizeInBits() != 64)
1629  return false;
1630  // If we have a ZEXTLOAD then change the load's type to be a narrower reg
1631  //and zero_extend with SUBREG_TO_REG.
1632  Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1633  Register DstReg = I.getOperand(0).getReg();
1634  I.getOperand(0).setReg(LdReg);
1635 
1636  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1637  MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
1638  .addImm(0)
1639  .addUse(LdReg)
1640  .addImm(AArch64::sub_32);
1642  return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
1643  MRI);
1644  }
1645  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1646  }
1647 
1648  case TargetOpcode::G_SMULH:
1649  case TargetOpcode::G_UMULH: {
1650  // Reject the various things we don't support yet.
1651  if (unsupportedBinOp(I, RBI, MRI, TRI))
1652  return false;
1653 
1654  const Register DefReg = I.getOperand(0).getReg();
1655  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1656 
1657  if (RB.getID() != AArch64::GPRRegBankID) {
1658  LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
1659  return false;
1660  }
1661 
1662  if (Ty != LLT::scalar(64)) {
1663  LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1664  << ", expected: " << LLT::scalar(64) << '\n');
1665  return false;
1666  }
1667 
1668  unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1669  : AArch64::UMULHrr;
1670  I.setDesc(TII.get(NewOpc));
1671 
1672  // Now that we selected an opcode, we need to constrain the register
1673  // operands to use appropriate classes.
1674  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1675  }
1676  case TargetOpcode::G_FADD:
1677  case TargetOpcode::G_FSUB:
1678  case TargetOpcode::G_FMUL:
1679  case TargetOpcode::G_FDIV:
1680 
1681  case TargetOpcode::G_ASHR:
1682  if (MRI.getType(I.getOperand(0).getReg()).isVector())
1683  return selectVectorASHR(I, MRI);
1685  case TargetOpcode::G_SHL:
1686  if (Opcode == TargetOpcode::G_SHL &&
1687  MRI.getType(I.getOperand(0).getReg()).isVector())
1688  return selectVectorSHL(I, MRI);
1690  case TargetOpcode::G_OR:
1691  case TargetOpcode::G_LSHR:
1692  case TargetOpcode::G_GEP: {
1693  // Reject the various things we don't support yet.
1694  if (unsupportedBinOp(I, RBI, MRI, TRI))
1695  return false;
1696 
1697  const unsigned OpSize = Ty.getSizeInBits();
1698 
1699  const Register DefReg = I.getOperand(0).getReg();
1700  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1701 
1702  const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1703  if (NewOpc == I.getOpcode())
1704  return false;
1705 
1706  I.setDesc(TII.get(NewOpc));
1707  // FIXME: Should the type be always reset in setDesc?
1708 
1709  // Now that we selected an opcode, we need to constrain the register
1710  // operands to use appropriate classes.
1711  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1712  }
1713 
1714  case TargetOpcode::G_UADDO: {
1715  // TODO: Support other types.
1716  unsigned OpSize = Ty.getSizeInBits();
1717  if (OpSize != 32 && OpSize != 64) {
1718  LLVM_DEBUG(
1719  dbgs()
1720  << "G_UADDO currently only supported for 32 and 64 b types.\n");
1721  return false;
1722  }
1723 
1724  // TODO: Support vectors.
1725  if (Ty.isVector()) {
1726  LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1727  return false;
1728  }
1729 
1730  // Add and set the set condition flag.
1731  unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1732  MachineIRBuilder MIRBuilder(I);
1733  auto AddsMI = MIRBuilder.buildInstr(
1734  AddsOpc, {I.getOperand(0).getReg()},
1735  {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1736  constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1737 
1738  // Now, put the overflow result in the register given by the first operand
1739  // to the G_UADDO. CSINC increments the result when the predicate is false,
1740  // so to get the increment when it's true, we need to use the inverse. In
1741  // this case, we want to increment when carry is set.
1742  auto CsetMI = MIRBuilder
1743  .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1744  {Register(AArch64::WZR), Register(AArch64::WZR)})
1746  constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1747  I.eraseFromParent();
1748  return true;
1749  }
1750 
1751  case TargetOpcode::G_PTR_MASK: {
1752  uint64_t Align = I.getOperand(2).getImm();
1753  if (Align >= 64 || Align == 0)
1754  return false;
1755 
1756  uint64_t Mask = ~((1ULL << Align) - 1);
1757  I.setDesc(TII.get(AArch64::ANDXri));
1759 
1760  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1761  }
1762  case TargetOpcode::G_PTRTOINT:
1763  case TargetOpcode::G_TRUNC: {
1764  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1765  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1766 
1767  const Register DstReg = I.getOperand(0).getReg();
1768  const Register SrcReg = I.getOperand(1).getReg();
1769 
1770  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1771  const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1772 
1773  if (DstRB.getID() != SrcRB.getID()) {
1774  LLVM_DEBUG(
1775  dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
1776  return false;
1777  }
1778 
1779  if (DstRB.getID() == AArch64::GPRRegBankID) {
1780  const TargetRegisterClass *DstRC =
1781  getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1782  if (!DstRC)
1783  return false;
1784 
1785  const TargetRegisterClass *SrcRC =
1786  getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1787  if (!SrcRC)
1788  return false;
1789 
1790  if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1791  !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1792  LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
1793  return false;
1794  }
1795 
1796  if (DstRC == SrcRC) {
1797  // Nothing to be done
1798  } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1799  SrcTy == LLT::scalar(64)) {
1800  llvm_unreachable("TableGen can import this case");
1801  return false;
1802  } else if (DstRC == &AArch64::GPR32RegClass &&
1803  SrcRC == &AArch64::GPR64RegClass) {
1804  I.getOperand(1).setSubReg(AArch64::sub_32);
1805  } else {
1806  LLVM_DEBUG(
1807  dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
1808  return false;
1809  }
1810 
1811  I.setDesc(TII.get(TargetOpcode::COPY));
1812  return true;
1813  } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1814  if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1815  I.setDesc(TII.get(AArch64::XTNv4i16));
1817  return true;
1818  }
1819  }
1820 
1821  return false;
1822  }
1823 
1824  case TargetOpcode::G_ANYEXT: {
1825  const Register DstReg = I.getOperand(0).getReg();
1826  const Register SrcReg = I.getOperand(1).getReg();
1827 
1828  const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
1829  if (RBDst.getID() != AArch64::GPRRegBankID) {
1830  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
1831  << ", expected: GPR\n");
1832  return false;
1833  }
1834 
1835  const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
1836  if (RBSrc.getID() != AArch64::GPRRegBankID) {
1837  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
1838  << ", expected: GPR\n");
1839  return false;
1840  }
1841 
1842  const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
1843 
1844  if (DstSize == 0) {
1845  LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
1846  return false;
1847  }
1848 
1849  if (DstSize != 64 && DstSize > 32) {
1850  LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
1851  << ", expected: 32 or 64\n");
1852  return false;
1853  }
1854  // At this point G_ANYEXT is just like a plain COPY, but we need
1855  // to explicitly form the 64-bit value if any.
1856  if (DstSize > 32) {
1857  Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
1858  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1859  .addDef(ExtSrc)
1860  .addImm(0)
1861  .addUse(SrcReg)
1862  .addImm(AArch64::sub_32);
1863  I.getOperand(1).setReg(ExtSrc);
1864  }
1865  return selectCopy(I, TII, MRI, TRI, RBI);
1866  }
1867 
1868  case TargetOpcode::G_ZEXT:
1869  case TargetOpcode::G_SEXT: {
1870  unsigned Opcode = I.getOpcode();
1871  const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1872  SrcTy = MRI.getType(I.getOperand(1).getReg());
1873  const bool isSigned = Opcode == TargetOpcode::G_SEXT;
1874  const Register DefReg = I.getOperand(0).getReg();
1875  const Register SrcReg = I.getOperand(1).getReg();
1876  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1877 
1878  if (RB.getID() != AArch64::GPRRegBankID) {
1879  LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB
1880  << ", expected: GPR\n");
1881  return false;
1882  }
1883 
1884  MachineInstr *ExtI;
1885  if (DstTy == LLT::scalar(64)) {
1886  // FIXME: Can we avoid manually doing this?
1887  if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
1888  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
1889  << " operand\n");
1890  return false;
1891  }
1892 
1893  const Register SrcXReg =
1894  MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1895  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1896  .addDef(SrcXReg)
1897  .addImm(0)
1898  .addUse(SrcReg)
1899  .addImm(AArch64::sub_32);
1900 
1901  const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri;
1902  ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1903  .addDef(DefReg)
1904  .addUse(SrcXReg)
1905  .addImm(0)
1906  .addImm(SrcTy.getSizeInBits() - 1);
1907  } else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) {
1908  const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri;
1909  ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1910  .addDef(DefReg)
1911  .addUse(SrcReg)
1912  .addImm(0)
1913  .addImm(SrcTy.getSizeInBits() - 1);
1914  } else {
1915  return false;
1916  }
1917 
1919 
1920  I.eraseFromParent();
1921  return true;
1922  }
1923 
1924  case TargetOpcode::G_SITOFP:
1925  case TargetOpcode::G_UITOFP:
1926  case TargetOpcode::G_FPTOSI:
1927  case TargetOpcode::G_FPTOUI: {
1928  const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1929  SrcTy = MRI.getType(I.getOperand(1).getReg());
1930  const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
1931  if (NewOpc == Opcode)
1932  return false;
1933 
1934  I.setDesc(TII.get(NewOpc));
1936 
1937  return true;
1938  }
1939 
1940 
1941  case TargetOpcode::G_INTTOPTR:
1942  // The importer is currently unable to import pointer types since they
1943  // didn't exist in SelectionDAG.
1944  return selectCopy(I, TII, MRI, TRI, RBI);
1945 
1946  case TargetOpcode::G_BITCAST:
1947  // Imported SelectionDAG rules can handle every bitcast except those that
1948  // bitcast from a type to the same type. Ideally, these shouldn't occur
1949  // but we might not run an optimizer that deletes them. The other exception
1950  // is bitcasts involving pointer types, as SelectionDAG has no knowledge
1951  // of them.
1952  return selectCopy(I, TII, MRI, TRI, RBI);
1953 
1954  case TargetOpcode::G_SELECT: {
1955  if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
1956  LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
1957  << ", expected: " << LLT::scalar(1) << '\n');
1958  return false;
1959  }
1960 
1961  const Register CondReg = I.getOperand(1).getReg();
1962  const Register TReg = I.getOperand(2).getReg();
1963  const Register FReg = I.getOperand(3).getReg();
1964 
1965  if (tryOptSelect(I))
1966  return true;
1967 
1968  Register CSelOpc = selectSelectOpc(I, MRI, RBI);
1969  MachineInstr &TstMI =
1970  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1971  .addDef(AArch64::WZR)
1972  .addUse(CondReg)
1974 
1975  MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
1976  .addDef(I.getOperand(0).getReg())
1977  .addUse(TReg)
1978  .addUse(FReg)
1980 
1982  constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
1983 
1984  I.eraseFromParent();
1985  return true;
1986  }
1987  case TargetOpcode::G_ICMP: {
1988  if (Ty.isVector())
1989  return selectVectorICmp(I, MRI);
1990 
1991  if (Ty != LLT::scalar(32)) {
1992  LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
1993  << ", expected: " << LLT::scalar(32) << '\n');
1994  return false;
1995  }
1996 
1997  MachineIRBuilder MIRBuilder(I);
1998  if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
1999  MIRBuilder))
2000  return false;
2001  emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
2002  MIRBuilder);
2003  I.eraseFromParent();
2004  return true;
2005  }
2006 
2007  case TargetOpcode::G_FCMP: {
2008  if (Ty != LLT::scalar(32)) {
2009  LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2010  << ", expected: " << LLT::scalar(32) << '\n');
2011  return false;
2012  }
2013 
2014  unsigned CmpOpc = selectFCMPOpc(I, MRI);
2015  if (!CmpOpc)
2016  return false;
2017 
2018  // FIXME: regbank
2019 
2020  AArch64CC::CondCode CC1, CC2;
2022  (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2023 
2024  // Partially build the compare. Decide if we need to add a use for the
2025  // third operand based off whether or not we're comparing against 0.0.
2026  auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2027  .addUse(I.getOperand(2).getReg());
2028 
2029  // If we don't have an immediate compare, then we need to add a use of the
2030  // register which wasn't used for the immediate.
2031  // Note that the immediate will always be the last operand.
2032  if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2033  CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
2034 
2035  const Register DefReg = I.getOperand(0).getReg();
2036  Register Def1Reg = DefReg;
2037  if (CC2 != AArch64CC::AL)
2038  Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2039 
2040  MachineInstr &CSetMI =
2041  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2042  .addDef(Def1Reg)
2043  .addUse(AArch64::WZR)
2044  .addUse(AArch64::WZR)
2045  .addImm(getInvertedCondCode(CC1));
2046 
2047  if (CC2 != AArch64CC::AL) {
2048  Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2049  MachineInstr &CSet2MI =
2050  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2051  .addDef(Def2Reg)
2052  .addUse(AArch64::WZR)
2053  .addUse(AArch64::WZR)
2054  .addImm(getInvertedCondCode(CC2));
2055  MachineInstr &OrMI =
2056  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2057  .addDef(DefReg)
2058  .addUse(Def1Reg)
2059  .addUse(Def2Reg);
2061  constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2062  }
2063  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2064  constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2065 
2066  I.eraseFromParent();
2067  return true;
2068  }
2069  case TargetOpcode::G_VASTART:
2070  return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2071  : selectVaStartAAPCS(I, MF, MRI);
2072  case TargetOpcode::G_INTRINSIC:
2073  return selectIntrinsic(I, MRI);
2074  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2075  return selectIntrinsicWithSideEffects(I, MRI);
2076  case TargetOpcode::G_IMPLICIT_DEF: {
2077  I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
2078  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2079  const Register DstReg = I.getOperand(0).getReg();
2080  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2081  const TargetRegisterClass *DstRC =
2082  getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2083  RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
2084  return true;
2085  }
2086  case TargetOpcode::G_BLOCK_ADDR: {
2087  if (TM.getCodeModel() == CodeModel::Large) {
2088  materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2089  I.eraseFromParent();
2090  return true;
2091  } else {
2092  I.setDesc(TII.get(AArch64::MOVaddrBA));
2093  auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2094  I.getOperand(0).getReg())
2095  .addBlockAddress(I.getOperand(1).getBlockAddress(),
2096  /* Offset */ 0, AArch64II::MO_PAGE)
2097  .addBlockAddress(
2098  I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2100  I.eraseFromParent();
2101  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2102  }
2103  }
2104  case TargetOpcode::G_INTRINSIC_TRUNC:
2105  return selectIntrinsicTrunc(I, MRI);
2106  case TargetOpcode::G_INTRINSIC_ROUND:
2107  return selectIntrinsicRound(I, MRI);
2108  case TargetOpcode::G_BUILD_VECTOR:
2109  return selectBuildVector(I, MRI);
2110  case TargetOpcode::G_MERGE_VALUES:
2111  return selectMergeValues(I, MRI);
2112  case TargetOpcode::G_UNMERGE_VALUES:
2113  return selectUnmergeValues(I, MRI);
2114  case TargetOpcode::G_SHUFFLE_VECTOR:
2115  return selectShuffleVector(I, MRI);
2116  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2117  return selectExtractElt(I, MRI);
2118  case TargetOpcode::G_INSERT_VECTOR_ELT:
2119  return selectInsertElt(I, MRI);
2120  case TargetOpcode::G_CONCAT_VECTORS:
2121  return selectConcatVectors(I, MRI);
2122  case TargetOpcode::G_JUMP_TABLE:
2123  return selectJumpTable(I, MRI);
2124  }
2125 
2126  return false;
2127 }
2128 
2129 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2130  MachineRegisterInfo &MRI) const {
2131  assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
2132  Register JTAddr = I.getOperand(0).getReg();
2133  unsigned JTI = I.getOperand(1).getIndex();
2134  Register Index = I.getOperand(2).getReg();
2135  MachineIRBuilder MIB(I);
2136 
2137  Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2138  Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
2139  MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
2140  {JTAddr, Index})
2141  .addJumpTableIndex(JTI);
2142 
2143  // Build the indirect branch.
2144  MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2145  I.eraseFromParent();
2146  return true;
2147 }
2148 
2149 bool AArch64InstructionSelector::selectJumpTable(
2150  MachineInstr &I, MachineRegisterInfo &MRI) const {
2151  assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2152  assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2153 
2154  Register DstReg = I.getOperand(0).getReg();
2155  unsigned JTI = I.getOperand(1).getIndex();
2156  // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2157  MachineIRBuilder MIB(I);
2158  auto MovMI =
2159  MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2160  .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2162  I.eraseFromParent();
2163  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2164 }
2165 
2166 bool AArch64InstructionSelector::selectIntrinsicTrunc(
2167  MachineInstr &I, MachineRegisterInfo &MRI) const {
2168  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2169 
2170  // Select the correct opcode.
2171  unsigned Opc = 0;
2172  if (!SrcTy.isVector()) {
2173  switch (SrcTy.getSizeInBits()) {
2174  default:
2175  case 16:
2176  Opc = AArch64::FRINTZHr;
2177  break;
2178  case 32:
2179  Opc = AArch64::FRINTZSr;
2180  break;
2181  case 64:
2182  Opc = AArch64::FRINTZDr;
2183  break;
2184  }
2185  } else {
2186  unsigned NumElts = SrcTy.getNumElements();
2187  switch (SrcTy.getElementType().getSizeInBits()) {
2188  default:
2189  break;
2190  case 16:
2191  if (NumElts == 4)
2192  Opc = AArch64::FRINTZv4f16;
2193  else if (NumElts == 8)
2194  Opc = AArch64::FRINTZv8f16;
2195  break;
2196  case 32:
2197  if (NumElts == 2)
2198  Opc = AArch64::FRINTZv2f32;
2199  else if (NumElts == 4)
2200  Opc = AArch64::FRINTZv4f32;
2201  break;
2202  case 64:
2203  if (NumElts == 2)
2204  Opc = AArch64::FRINTZv2f64;
2205  break;
2206  }
2207  }
2208 
2209  if (!Opc) {
2210  // Didn't get an opcode above, bail.
2211  LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
2212  return false;
2213  }
2214 
2215  // Legalization would have set us up perfectly for this; we just need to
2216  // set the opcode and move on.
2217  I.setDesc(TII.get(Opc));
2218  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2219 }
2220 
2221 bool AArch64InstructionSelector::selectIntrinsicRound(
2222  MachineInstr &I, MachineRegisterInfo &MRI) const {
2223  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2224 
2225  // Select the correct opcode.
2226  unsigned Opc = 0;
2227  if (!SrcTy.isVector()) {
2228  switch (SrcTy.getSizeInBits()) {
2229  default:
2230  case 16:
2231  Opc = AArch64::FRINTAHr;
2232  break;
2233  case 32:
2234  Opc = AArch64::FRINTASr;
2235  break;
2236  case 64:
2237  Opc = AArch64::FRINTADr;
2238  break;
2239  }
2240  } else {
2241  unsigned NumElts = SrcTy.getNumElements();
2242  switch (SrcTy.getElementType().getSizeInBits()) {
2243  default:
2244  break;
2245  case 16:
2246  if (NumElts == 4)
2247  Opc = AArch64::FRINTAv4f16;
2248  else if (NumElts == 8)
2249  Opc = AArch64::FRINTAv8f16;
2250  break;
2251  case 32:
2252  if (NumElts == 2)
2253  Opc = AArch64::FRINTAv2f32;
2254  else if (NumElts == 4)
2255  Opc = AArch64::FRINTAv4f32;
2256  break;
2257  case 64:
2258  if (NumElts == 2)
2259  Opc = AArch64::FRINTAv2f64;
2260  break;
2261  }
2262  }
2263 
2264  if (!Opc) {
2265  // Didn't get an opcode above, bail.
2266  LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2267  return false;
2268  }
2269 
2270  // Legalization would have set us up perfectly for this; we just need to
2271  // set the opcode and move on.
2272  I.setDesc(TII.get(Opc));
2273  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2274 }
2275 
2276 bool AArch64InstructionSelector::selectVectorICmp(
2277  MachineInstr &I, MachineRegisterInfo &MRI) const {
2278  Register DstReg = I.getOperand(0).getReg();
2279  LLT DstTy = MRI.getType(DstReg);
2280  Register SrcReg = I.getOperand(2).getReg();
2281  Register Src2Reg = I.getOperand(3).getReg();
2282  LLT SrcTy = MRI.getType(SrcReg);
2283 
2284  unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2285  unsigned NumElts = DstTy.getNumElements();
2286 
2287  // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2288  // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2289  // Third index is cc opcode:
2290  // 0 == eq
2291  // 1 == ugt
2292  // 2 == uge
2293  // 3 == ult
2294  // 4 == ule
2295  // 5 == sgt
2296  // 6 == sge
2297  // 7 == slt
2298  // 8 == sle
2299  // ne is done by negating 'eq' result.
2300 
2301  // This table below assumes that for some comparisons the operands will be
2302  // commuted.
2303  // ult op == commute + ugt op
2304  // ule op == commute + uge op
2305  // slt op == commute + sgt op
2306  // sle op == commute + sge op
2307  unsigned PredIdx = 0;
2308  bool SwapOperands = false;
2310  switch (Pred) {
2311  case CmpInst::ICMP_NE:
2312  case CmpInst::ICMP_EQ:
2313  PredIdx = 0;
2314  break;
2315  case CmpInst::ICMP_UGT:
2316  PredIdx = 1;
2317  break;
2318  case CmpInst::ICMP_UGE:
2319  PredIdx = 2;
2320  break;
2321  case CmpInst::ICMP_ULT:
2322  PredIdx = 3;
2323  SwapOperands = true;
2324  break;
2325  case CmpInst::ICMP_ULE:
2326  PredIdx = 4;
2327  SwapOperands = true;
2328  break;
2329  case CmpInst::ICMP_SGT:
2330  PredIdx = 5;
2331  break;
2332  case CmpInst::ICMP_SGE:
2333  PredIdx = 6;
2334  break;
2335  case CmpInst::ICMP_SLT:
2336  PredIdx = 7;
2337  SwapOperands = true;
2338  break;
2339  case CmpInst::ICMP_SLE:
2340  PredIdx = 8;
2341  SwapOperands = true;
2342  break;
2343  default:
2344  llvm_unreachable("Unhandled icmp predicate");
2345  return false;
2346  }
2347 
2348  // This table obviously should be tablegen'd when we have our GISel native
2349  // tablegen selector.
2350 
2351  static const unsigned OpcTable[4][4][9] = {
2352  {
2353  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2354  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2355  0 /* invalid */},
2356  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2357  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2358  0 /* invalid */},
2359  {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2360  AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2361  AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2362  {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2363  AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2364  AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2365  },
2366  {
2367  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2368  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2369  0 /* invalid */},
2370  {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2371  AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2372  AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2373  {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2374  AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2375  AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2376  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2377  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2378  0 /* invalid */}
2379  },
2380  {
2381  {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2382  AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2383  AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2384  {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2385  AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2386  AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2387  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2388  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2389  0 /* invalid */},
2390  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2391  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2392  0 /* invalid */}
2393  },
2394  {
2395  {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2396  AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2397  AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2398  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2399  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2400  0 /* invalid */},
2401  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2402  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2403  0 /* invalid */},
2404  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2405  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2406  0 /* invalid */}
2407  },
2408  };
2409  unsigned EltIdx = Log2_32(SrcEltSize / 8);
2410  unsigned NumEltsIdx = Log2_32(NumElts / 2);
2411  unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2412  if (!Opc) {
2413  LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2414  return false;
2415  }
2416 
2417  const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2418  const TargetRegisterClass *SrcRC =
2419  getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2420  if (!SrcRC) {
2421  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2422  return false;
2423  }
2424 
2425  unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2426  if (SrcTy.getSizeInBits() == 128)
2427  NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2428 
2429  if (SwapOperands)
2430  std::swap(SrcReg, Src2Reg);
2431 
2432  MachineIRBuilder MIB(I);
2433  auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2435 
2436  // Invert if we had a 'ne' cc.
2437  if (NotOpc) {
2438  Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2440  } else {
2441  MIB.buildCopy(DstReg, Cmp.getReg(0));
2442  }
2443  RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2444  I.eraseFromParent();
2445  return true;
2446 }
2447 
2448 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
2449  unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
2450  MachineIRBuilder &MIRBuilder) const {
2451  auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
2452 
2453  auto BuildFn = [&](unsigned SubregIndex) {
2454  auto Ins =
2455  MIRBuilder
2456  .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2457  .addImm(SubregIndex);
2460  return &*Ins;
2461  };
2462 
2463  switch (EltSize) {
2464  case 16:
2465  return BuildFn(AArch64::hsub);
2466  case 32:
2467  return BuildFn(AArch64::ssub);
2468  case 64:
2469  return BuildFn(AArch64::dsub);
2470  default:
2471  return nullptr;
2472  }
2473 }
2474 
2476  MachineInstr &I, MachineRegisterInfo &MRI) const {
2477  assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2478  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2479  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2480  assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2481 
2482  // At the moment we only support merging two s32s into an s64.
2483  if (I.getNumOperands() != 3)
2484  return false;
2485  if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2486  return false;
2487  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2488  if (RB.getID() != AArch64::GPRRegBankID)
2489  return false;
2490 
2491  auto *DstRC = &AArch64::GPR64RegClass;
2492  Register SubToRegDef = MRI.createVirtualRegister(DstRC);
2493  MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2494  TII.get(TargetOpcode::SUBREG_TO_REG))
2495  .addDef(SubToRegDef)
2496  .addImm(0)
2497  .addUse(I.getOperand(1).getReg())
2498  .addImm(AArch64::sub_32);
2499  Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
2500  // Need to anyext the second scalar before we can use bfm
2501  MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2502  TII.get(TargetOpcode::SUBREG_TO_REG))
2503  .addDef(SubToRegDef2)
2504  .addImm(0)
2505  .addUse(I.getOperand(2).getReg())
2506  .addImm(AArch64::sub_32);
2507  MachineInstr &BFM =
2508  *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
2509  .addDef(I.getOperand(0).getReg())
2510  .addUse(SubToRegDef)
2511  .addUse(SubToRegDef2)
2512  .addImm(32)
2513  .addImm(31);
2514  constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2515  constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2517  I.eraseFromParent();
2518  return true;
2519 }
2520 
2521 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2522  const unsigned EltSize) {
2523  // Choose a lane copy opcode and subregister based off of the size of the
2524  // vector's elements.
2525  switch (EltSize) {
2526  case 16:
2527  CopyOpc = AArch64::CPYi16;
2528  ExtractSubReg = AArch64::hsub;
2529  break;
2530  case 32:
2531  CopyOpc = AArch64::CPYi32;
2532  ExtractSubReg = AArch64::ssub;
2533  break;
2534  case 64:
2535  CopyOpc = AArch64::CPYi64;
2536  ExtractSubReg = AArch64::dsub;
2537  break;
2538  default:
2539  // Unknown size, bail out.
2540  LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2541  return false;
2542  }
2543  return true;
2544 }
2545 
2546 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
2547  Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2548  Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
2549  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2550  unsigned CopyOpc = 0;
2551  unsigned ExtractSubReg = 0;
2552  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2553  LLVM_DEBUG(
2554  dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2555  return nullptr;
2556  }
2557 
2558  const TargetRegisterClass *DstRC =
2559  getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2560  if (!DstRC) {
2561  LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2562  return nullptr;
2563  }
2564 
2565  const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2566  const LLT &VecTy = MRI.getType(VecReg);
2567  const TargetRegisterClass *VecRC =
2568  getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2569  if (!VecRC) {
2570  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2571  return nullptr;
2572  }
2573 
2574  // The register that we're going to copy into.
2575  Register InsertReg = VecReg;
2576  if (!DstReg)
2577  DstReg = MRI.createVirtualRegister(DstRC);
2578  // If the lane index is 0, we just use a subregister COPY.
2579  if (LaneIdx == 0) {
2580  auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2581  .addReg(VecReg, 0, ExtractSubReg);
2582  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2583  return &*Copy;
2584  }
2585 
2586  // Lane copies require 128-bit wide registers. If we're dealing with an
2587  // unpacked vector, then we need to move up to that width. Insert an implicit
2588  // def and a subregister insert to get us there.
2589  if (VecTy.getSizeInBits() != 128) {
2590  MachineInstr *ScalarToVector = emitScalarToVector(
2591  VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2592  if (!ScalarToVector)
2593  return nullptr;
2594  InsertReg = ScalarToVector->getOperand(0).getReg();
2595  }
2596 
2597  MachineInstr *LaneCopyMI =
2598  MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2599  constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2600 
2601  // Make sure that we actually constrain the initial copy.
2602  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2603  return LaneCopyMI;
2604 }
2605 
2606 bool AArch64InstructionSelector::selectExtractElt(
2607  MachineInstr &I, MachineRegisterInfo &MRI) const {
2608  assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2609  "unexpected opcode!");
2610  Register DstReg = I.getOperand(0).getReg();
2611  const LLT NarrowTy = MRI.getType(DstReg);
2612  const Register SrcReg = I.getOperand(1).getReg();
2613  const LLT WideTy = MRI.getType(SrcReg);
2614  (void)WideTy;
2615  assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2616  "source register size too small!");
2617  assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2618 
2619  // Need the lane index to determine the correct copy opcode.
2620  MachineOperand &LaneIdxOp = I.getOperand(2);
2621  assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2622 
2623  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2624  LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2625  return false;
2626  }
2627 
2628  // Find the index to extract from.
2629  auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2630  if (!VRegAndVal)
2631  return false;
2632  unsigned LaneIdx = VRegAndVal->Value;
2633 
2634  MachineIRBuilder MIRBuilder(I);
2635 
2636  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2637  MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2638  LaneIdx, MIRBuilder);
2639  if (!Extract)
2640  return false;
2641 
2642  I.eraseFromParent();
2643  return true;
2644 }
2645 
2646 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2647  MachineInstr &I, MachineRegisterInfo &MRI) const {
2648  unsigned NumElts = I.getNumOperands() - 1;
2649  Register SrcReg = I.getOperand(NumElts).getReg();
2650  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2651  const LLT SrcTy = MRI.getType(SrcReg);
2652 
2653  assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2654  if (SrcTy.getSizeInBits() > 128) {
2655  LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2656  return false;
2657  }
2658 
2659  MachineIRBuilder MIB(I);
2660 
2661  // We implement a split vector operation by treating the sub-vectors as
2662  // scalars and extracting them.
2663  const RegisterBank &DstRB =
2664  *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2665  for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
2666  Register Dst = I.getOperand(OpIdx).getReg();
2667  MachineInstr *Extract =
2668  emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2669  if (!Extract)
2670  return false;
2671  }
2672  I.eraseFromParent();
2673  return true;
2674 }
2675 
2677  MachineInstr &I, MachineRegisterInfo &MRI) const {
2678  assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2679  "unexpected opcode");
2680 
2681  // TODO: Handle unmerging into GPRs and from scalars to scalars.
2682  if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2683  AArch64::FPRRegBankID ||
2684  RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2685  AArch64::FPRRegBankID) {
2686  LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2687  "currently unsupported.\n");
2688  return false;
2689  }
2690 
2691  // The last operand is the vector source register, and every other operand is
2692  // a register to unpack into.
2693  unsigned NumElts = I.getNumOperands() - 1;
2694  Register SrcReg = I.getOperand(NumElts).getReg();
2695  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2696  const LLT WideTy = MRI.getType(SrcReg);
2697  (void)WideTy;
2698  assert(WideTy.isVector() && "can only unmerge from vector types!");
2699  assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2700  "source register size too small!");
2701 
2702  if (!NarrowTy.isScalar())
2703  return selectSplitVectorUnmerge(I, MRI);
2704 
2705  MachineIRBuilder MIB(I);
2706 
2707  // Choose a lane copy opcode and subregister based off of the size of the
2708  // vector's elements.
2709  unsigned CopyOpc = 0;
2710  unsigned ExtractSubReg = 0;
2711  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
2712  return false;
2713 
2714  // Set up for the lane copies.
2715  MachineBasicBlock &MBB = *I.getParent();
2716 
2717  // Stores the registers we'll be copying from.
2718  SmallVector<Register, 4> InsertRegs;
2719 
2720  // We'll use the first register twice, so we only need NumElts-1 registers.
2721  unsigned NumInsertRegs = NumElts - 1;
2722 
2723  // If our elements fit into exactly 128 bits, then we can copy from the source
2724  // directly. Otherwise, we need to do a bit of setup with some subregister
2725  // inserts.
2726  if (NarrowTy.getSizeInBits() * NumElts == 128) {
2727  InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
2728  } else {
2729  // No. We have to perform subregister inserts. For each insert, create an
2730  // implicit def and a subregister insert, and save the register we create.
2731  for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
2732  Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2733  MachineInstr &ImpDefMI =
2734  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2735  ImpDefReg);
2736 
2737  // Now, create the subregister insert from SrcReg.
2738  Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2739  MachineInstr &InsMI =
2740  *BuildMI(MBB, I, I.getDebugLoc(),
2741  TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
2742  .addUse(ImpDefReg)
2743  .addUse(SrcReg)
2744  .addImm(AArch64::dsub);
2745 
2746  constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
2748 
2749  // Save the register so that we can copy from it after.
2750  InsertRegs.push_back(InsertReg);
2751  }
2752  }
2753 
2754  // Now that we've created any necessary subregister inserts, we can
2755  // create the copies.
2756  //
2757  // Perform the first copy separately as a subregister copy.
2758  Register CopyTo = I.getOperand(0).getReg();
2759  auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
2760  .addReg(InsertRegs[0], 0, ExtractSubReg);
2761  constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
2762 
2763  // Now, perform the remaining copies as vector lane copies.
2764  unsigned LaneIdx = 1;
2765  for (Register InsReg : InsertRegs) {
2766  Register CopyTo = I.getOperand(LaneIdx).getReg();
2767  MachineInstr &CopyInst =
2768  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
2769  .addUse(InsReg)
2770  .addImm(LaneIdx);
2771  constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
2772  ++LaneIdx;
2773  }
2774 
2775  // Separately constrain the first copy's destination. Because of the
2776  // limitation in constrainOperandRegClass, we can't guarantee that this will
2777  // actually be constrained. So, do it ourselves using the second operand.
2778  const TargetRegisterClass *RC =
2779  MRI.getRegClassOrNull(I.getOperand(1).getReg());
2780  if (!RC) {
2781  LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
2782  return false;
2783  }
2784 
2785  RBI.constrainGenericRegister(CopyTo, *RC, MRI);
2786  I.eraseFromParent();
2787  return true;
2788 }
2789 
2790 bool AArch64InstructionSelector::selectConcatVectors(
2791  MachineInstr &I, MachineRegisterInfo &MRI) const {
2792  assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
2793  "Unexpected opcode");
2794  Register Dst = I.getOperand(0).getReg();
2795  Register Op1 = I.getOperand(1).getReg();
2796  Register Op2 = I.getOperand(2).getReg();
2797  MachineIRBuilder MIRBuilder(I);
2798  MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
2799  if (!ConcatMI)
2800  return false;
2801  I.eraseFromParent();
2802  return true;
2803 }
2804 
2805 void AArch64InstructionSelector::collectShuffleMaskIndices(
2807  SmallVectorImpl<Optional<int>> &Idxs) const {
2808  MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
2809  assert(
2810  MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
2811  "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
2812  // Find the constant indices.
2813  for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) {
2814  // Look through copies.
2815  MachineInstr *ScalarDef =
2816  getDefIgnoringCopies(MaskDef->getOperand(i).getReg(), MRI);
2817  assert(ScalarDef && "Could not find vreg def of shufflevec index op");
2818  if (ScalarDef->getOpcode() != TargetOpcode::G_CONSTANT) {
2819  // This be an undef if not a constant.
2820  assert(ScalarDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF);
2821  Idxs.push_back(None);
2822  } else {
2823  Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
2824  }
2825  }
2826 }
2827 
2828 unsigned
2829 AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
2830  MachineFunction &MF) const {
2831  Type *CPTy = CPVal->getType();
2832  unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
2833  if (Align == 0)
2834  Align = MF.getDataLayout().getTypeAllocSize(CPTy);
2835 
2837  return MCP->getConstantPoolIndex(CPVal, Align);
2838 }
2839 
2840 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
2841  Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
2842  unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
2843 
2844  auto Adrp =
2845  MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
2846  .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
2847 
2848  MachineInstr *LoadMI = nullptr;
2849  switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
2850  case 16:
2851  LoadMI =
2852  &*MIRBuilder
2853  .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
2854  .addConstantPoolIndex(CPIdx, 0,
2856  break;
2857  case 8:
2858  LoadMI = &*MIRBuilder
2859  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
2860  .addConstantPoolIndex(
2862  break;
2863  default:
2864  LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
2865  << *CPVal->getType());
2866  return nullptr;
2867  }
2869  constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
2870  return LoadMI;
2871 }
2872 
2873 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
2874 /// size and RB.
2875 static std::pair<unsigned, unsigned>
2876 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
2877  unsigned Opc, SubregIdx;
2878  if (RB.getID() == AArch64::GPRRegBankID) {
2879  if (EltSize == 32) {
2880  Opc = AArch64::INSvi32gpr;
2881  SubregIdx = AArch64::ssub;
2882  } else if (EltSize == 64) {
2883  Opc = AArch64::INSvi64gpr;
2884  SubregIdx = AArch64::dsub;
2885  } else {
2886  llvm_unreachable("invalid elt size!");
2887  }
2888  } else {
2889  if (EltSize == 8) {
2890  Opc = AArch64::INSvi8lane;
2891  SubregIdx = AArch64::bsub;
2892  } else if (EltSize == 16) {
2893  Opc = AArch64::INSvi16lane;
2894  SubregIdx = AArch64::hsub;
2895  } else if (EltSize == 32) {
2896  Opc = AArch64::INSvi32lane;
2897  SubregIdx = AArch64::ssub;
2898  } else if (EltSize == 64) {
2899  Opc = AArch64::INSvi64lane;
2900  SubregIdx = AArch64::dsub;
2901  } else {
2902  llvm_unreachable("invalid elt size!");
2903  }
2904  }
2905  return std::make_pair(Opc, SubregIdx);
2906 }
2907 
2908 MachineInstr *
2909 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
2910  MachineIRBuilder &MIRBuilder) const {
2911  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
2912  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
2913  static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
2914  {AArch64::ADDSWrr, AArch64::ADDSWri}};
2915  bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
2916  auto ImmFns = selectArithImmed(RHS);
2917  unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
2918  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
2919 
2920  auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()});
2921 
2922  // If we matched a valid constant immediate, add those operands.
2923  if (ImmFns) {
2924  for (auto &RenderFn : *ImmFns)
2925  RenderFn(CmpMI);
2926  } else {
2927  CmpMI.addUse(RHS.getReg());
2928  }
2929 
2930  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2931  return &*CmpMI;
2932 }
2933 
2934 MachineInstr *
2935 AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
2936  MachineIRBuilder &MIRBuilder) const {
2937  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
2938  unsigned RegSize = MRI.getType(LHS).getSizeInBits();
2939  bool Is32Bit = (RegSize == 32);
2940  static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
2941  {AArch64::ANDSWrr, AArch64::ANDSWri}};
2942  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
2943 
2944  // We might be able to fold in an immediate into the TST. We need to make sure
2945  // it's a logical immediate though, since ANDS requires that.
2946  auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
2947  bool IsImmForm = ValAndVReg.hasValue() &&
2948  AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
2949  unsigned Opc = OpcTable[Is32Bit][IsImmForm];
2950  auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
2951 
2952  if (IsImmForm)
2953  TstMI.addImm(
2954  AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
2955  else
2956  TstMI.addUse(RHS);
2957 
2958  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
2959  return &*TstMI;
2960 }
2961 
2962 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
2964  MachineIRBuilder &MIRBuilder) const {
2965  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
2966  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
2967 
2968  // Fold the compare if possible.
2969  MachineInstr *FoldCmp =
2970  tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
2971  if (FoldCmp)
2972  return FoldCmp;
2973 
2974  // Can't fold into a CMN. Just emit a normal compare.
2975  unsigned CmpOpc = 0;
2976  Register ZReg;
2977 
2978  LLT CmpTy = MRI.getType(LHS.getReg());
2979  assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
2980  "Expected scalar or pointer");
2981  if (CmpTy == LLT::scalar(32)) {
2982  CmpOpc = AArch64::SUBSWrr;
2983  ZReg = AArch64::WZR;
2984  } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
2985  CmpOpc = AArch64::SUBSXrr;
2986  ZReg = AArch64::XZR;
2987  } else {
2988  return nullptr;
2989  }
2990 
2991  // Try to match immediate forms.
2992  auto ImmFns = selectArithImmed(RHS);
2993  if (ImmFns)
2994  CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
2995 
2996  auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg());
2997  // If we matched a valid constant immediate, add those operands.
2998  if (ImmFns) {
2999  for (auto &RenderFn : *ImmFns)
3000  RenderFn(CmpMI);
3001  } else {
3002  CmpMI.addUse(RHS.getReg());
3003  }
3004 
3005  // Make sure that we can constrain the compare that we emitted.
3006  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3007  return &*CmpMI;
3008 }
3009 
3010 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
3011  Optional<Register> Dst, Register Op1, Register Op2,
3012  MachineIRBuilder &MIRBuilder) const {
3013  // We implement a vector concat by:
3014  // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3015  // 2. Insert the upper vector into the destination's upper element
3016  // TODO: some of this code is common with G_BUILD_VECTOR handling.
3017  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3018 
3019  const LLT Op1Ty = MRI.getType(Op1);
3020  const LLT Op2Ty = MRI.getType(Op2);
3021 
3022  if (Op1Ty != Op2Ty) {
3023  LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
3024  return nullptr;
3025  }
3026  assert(Op1Ty.isVector() && "Expected a vector for vector concat");
3027 
3028  if (Op1Ty.getSizeInBits() >= 128) {
3029  LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
3030  return nullptr;
3031  }
3032 
3033  // At the moment we just support 64 bit vector concats.
3034  if (Op1Ty.getSizeInBits() != 64) {
3035  LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
3036  return nullptr;
3037  }
3038 
3039  const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
3040  const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
3041  const TargetRegisterClass *DstRC =
3042  getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
3043 
3044  MachineInstr *WidenedOp1 =
3045  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
3046  MachineInstr *WidenedOp2 =
3047  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
3048  if (!WidenedOp1 || !WidenedOp2) {
3049  LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
3050  return nullptr;
3051  }
3052 
3053  // Now do the insert of the upper element.
3054  unsigned InsertOpc, InsSubRegIdx;
3055  std::tie(InsertOpc, InsSubRegIdx) =
3056  getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
3057 
3058  if (!Dst)
3059  Dst = MRI.createVirtualRegister(DstRC);
3060  auto InsElt =
3061  MIRBuilder
3062  .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
3063  .addImm(1) /* Lane index */
3064  .addUse(WidenedOp2->getOperand(0).getReg())
3065  .addImm(0);
3066  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3067  return &*InsElt;
3068 }
3069 
3070 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
3071  MachineInstr &I, MachineRegisterInfo &MRI) const {
3072  assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
3073  "Expected a G_FCONSTANT!");
3074  MachineOperand &ImmOp = I.getOperand(1);
3075  unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
3076 
3077  // Only handle 32 and 64 bit defs for now.
3078  if (DefSize != 32 && DefSize != 64)
3079  return nullptr;
3080 
3081  // Don't handle null values using FMOV.
3082  if (ImmOp.getFPImm()->isNullValue())
3083  return nullptr;
3084 
3085  // Get the immediate representation for the FMOV.
3086  const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
3087  int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
3088  : AArch64_AM::getFP64Imm(ImmValAPF);
3089 
3090  // If this is -1, it means the immediate can't be represented as the requested
3091  // floating point value. Bail.
3092  if (Imm == -1)
3093  return nullptr;
3094 
3095  // Update MI to represent the new FMOV instruction, constrain it, and return.
3096  ImmOp.ChangeToImmediate(Imm);
3097  unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
3098  I.setDesc(TII.get(MovOpc));
3100  return &I;
3101 }
3102 
3103 MachineInstr *
3104 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
3105  MachineIRBuilder &MIRBuilder) const {
3106  // CSINC increments the result when the predicate is false. Invert it.
3108  CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
3109  auto I =
3110  MIRBuilder
3111  .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
3112  .addImm(InvCC);
3114  return &*I;
3115 }
3116 
3117 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
3118  MachineIRBuilder MIB(I);
3119  MachineRegisterInfo &MRI = *MIB.getMRI();
3121 
3122  // We want to recognize this pattern:
3123  //
3124  // $z = G_FCMP pred, $x, $y
3125  // ...
3126  // $w = G_SELECT $z, $a, $b
3127  //
3128  // Where the value of $z is *only* ever used by the G_SELECT (possibly with
3129  // some copies/truncs in between.)
3130  //
3131  // If we see this, then we can emit something like this:
3132  //
3133  // fcmp $x, $y
3134  // fcsel $w, $a, $b, pred
3135  //
3136  // Rather than emitting both of the rather long sequences in the standard
3137  // G_FCMP/G_SELECT select methods.
3138 
3139  // First, check if the condition is defined by a compare.
3140  MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
3141  while (CondDef) {
3142  // We can only fold if all of the defs have one use.
3143  if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
3144  return false;
3145 
3146  // We can skip over G_TRUNC since the condition is 1-bit.
3147  // Truncating/extending can have no impact on the value.
3148  unsigned Opc = CondDef->getOpcode();
3149  if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
3150  break;
3151 
3152  // Can't see past copies from physregs.
3153  if (Opc == TargetOpcode::COPY &&
3154  TargetRegisterInfo::isPhysicalRegister(CondDef->getOperand(1).getReg()))
3155  return false;
3156 
3157  CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
3158  }
3159 
3160  // Is the condition defined by a compare?
3161  if (!CondDef)
3162  return false;
3163 
3164  unsigned CondOpc = CondDef->getOpcode();
3165  if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
3166  return false;
3167 
3169  if (CondOpc == TargetOpcode::G_ICMP) {
3170  CondCode = changeICMPPredToAArch64CC(
3171  (CmpInst::Predicate)CondDef->getOperand(1).getPredicate());
3172  if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
3173  CondDef->getOperand(1), MIB)) {
3174  LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
3175  return false;
3176  }
3177  } else {
3178  // Get the condition code for the select.
3179  AArch64CC::CondCode CondCode2;
3182  CondCode2);
3183 
3184  // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
3185  // instructions to emit the comparison.
3186  // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
3187  // unnecessary.
3188  if (CondCode2 != AArch64CC::AL)
3189  return false;
3190 
3191  // Make sure we'll be able to select the compare.
3192  unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
3193  if (!CmpOpc)
3194  return false;
3195 
3196  // Emit a new compare.
3197  auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
3198  if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
3199  Cmp.addUse(CondDef->getOperand(3).getReg());
3200  constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3201  }
3202 
3203  // Emit the select.
3204  unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
3205  auto CSel =
3206  MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
3207  {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
3208  .addImm(CondCode);
3209  constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
3210  I.eraseFromParent();
3211  return true;
3212 }
3213 
3214 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
3215  MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3216  MachineIRBuilder &MIRBuilder) const {
3217  assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
3218  "Unexpected MachineOperand");
3219  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3220  // We want to find this sort of thing:
3221  // x = G_SUB 0, y
3222  // G_ICMP z, x
3223  //
3224  // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
3225  // e.g:
3226  //
3227  // cmn z, y
3228 
3229  // Helper lambda to detect the subtract followed by the compare.
3230  // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
3231  auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
3232  if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
3233  return false;
3234 
3235  // Need to make sure NZCV is the same at the end of the transformation.
3236  if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
3237  return false;
3238 
3239  // We want to match against SUBs.
3240  if (DefMI->getOpcode() != TargetOpcode::G_SUB)
3241  return false;
3242 
3243  // Make sure that we're getting
3244  // x = G_SUB 0, y
3245  auto ValAndVReg =
3247  if (!ValAndVReg || ValAndVReg->Value != 0)
3248  return false;
3249 
3250  // This can safely be represented as a CMN.
3251  return true;
3252  };
3253 
3254  // Check if the RHS or LHS of the G_ICMP is defined by a SUB
3255  MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
3256  MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
3259 
3260  // Given this:
3261  //
3262  // x = G_SUB 0, y
3263  // G_ICMP x, z
3264  //
3265  // Produce this:
3266  //
3267  // cmn y, z
3268  if (IsCMN(LHSDef, CC))
3269  return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
3270 
3271  // Same idea here, but with the RHS of the compare instead:
3272  //
3273  // Given this:
3274  //
3275  // x = G_SUB 0, y
3276  // G_ICMP z, x
3277  //
3278  // Produce this:
3279  //
3280  // cmn z, y
3281  if (IsCMN(RHSDef, CC))
3282  return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
3283 
3284  // Given this:
3285  //
3286  // z = G_AND x, y
3287  // G_ICMP z, 0
3288  //
3289  // Produce this if the compare is signed:
3290  //
3291  // tst x, y
3292  if (!isUnsignedICMPPred(P) && LHSDef &&
3293  LHSDef->getOpcode() == TargetOpcode::G_AND) {
3294  // Make sure that the RHS is 0.
3295  auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
3296  if (!ValAndVReg || ValAndVReg->Value != 0)
3297  return nullptr;
3298 
3299  return emitTST(LHSDef->getOperand(1).getReg(),
3300  LHSDef->getOperand(2).getReg(), MIRBuilder);
3301  }
3302 
3303  return nullptr;
3304 }
3305 
3306 bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
3307  // Try to match a vector splat operation into a dup instruction.
3308  // We're looking for this pattern:
3309  // %scalar:gpr(s64) = COPY $x0
3310  // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
3311  // %cst0:gpr(s32) = G_CONSTANT i32 0
3312  // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
3313  // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
3314  // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
3315  // %zerovec(<2 x s32>)
3316  //
3317  // ...into:
3318  // %splat = DUP %scalar
3319  // We use the regbank of the scalar to determine which kind of dup to use.
3320  MachineIRBuilder MIB(I);
3321  MachineRegisterInfo &MRI = *MIB.getMRI();
3323  using namespace TargetOpcode;
3324  using namespace MIPatternMatch;
3325 
3326  // Begin matching the insert.
3327  auto *InsMI =
3328  getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI);
3329  if (!InsMI)
3330  return false;
3331  // Match the undef vector operand.
3332  auto *UndefMI =
3333  getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI);
3334  if (!UndefMI)
3335  return false;
3336  // Match the scalar being splatted.
3337  Register ScalarReg = InsMI->getOperand(2).getReg();
3338  const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
3339  // Match the index constant 0.
3340  int64_t Index = 0;
3341  if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
3342  return false;
3343 
3344  // The shuffle's second operand doesn't matter if the mask is all zero.
3345  auto *ZeroVec = getOpcodeDef(G_BUILD_VECTOR, I.getOperand(3).getReg(), MRI);
3346  if (!ZeroVec)
3347  return false;
3348  int64_t Zero = 0;
3349  if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
3350  return false;
3351  for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) {
3352  if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
3353  return false; // This wasn't an all zeros vector.
3354  }
3355 
3356  // We're done, now find out what kind of splat we need.
3357  LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3358  LLT EltTy = VecTy.getElementType();
3359  if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
3360  LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
3361  return false;
3362  }
3363  bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
3364  static const unsigned OpcTable[2][2] = {
3365  {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
3366  {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
3367  unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
3368 
3369  // For FP splats, we need to widen the scalar reg via undef too.
3370  if (IsFP) {
3371  MachineInstr *Widen = emitScalarToVector(
3372  EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
3373  if (!Widen)
3374  return false;
3375  ScalarReg = Widen->getOperand(0).getReg();
3376  }
3377  auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
3378  if (IsFP)
3379  Dup.addImm(0);
3380  constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
3381  I.eraseFromParent();
3382  return true;
3383 }
3384 
3385 bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
3386  if (TM.getOptLevel() == CodeGenOpt::None)
3387  return false;
3388  if (tryOptVectorDup(I))
3389  return true;
3390  return false;
3391 }
3392 
3393 bool AArch64InstructionSelector::selectShuffleVector(
3394  MachineInstr &I, MachineRegisterInfo &MRI) const {
3395  if (tryOptVectorShuffle(I))
3396  return true;
3397  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3398  Register Src1Reg = I.getOperand(1).getReg();
3399  const LLT Src1Ty = MRI.getType(Src1Reg);
3400  Register Src2Reg = I.getOperand(2).getReg();
3401  const LLT Src2Ty = MRI.getType(Src2Reg);
3402 
3403  MachineBasicBlock &MBB = *I.getParent();
3404  MachineFunction &MF = *MBB.getParent();
3405  LLVMContext &Ctx = MF.getFunction().getContext();
3406 
3407  // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
3408  // operand, it comes in as a normal vector value which we have to analyze to
3409  // find the mask indices. If the mask element is undef, then
3410  // collectShuffleMaskIndices() will add a None entry for that index into
3411  // the list.
3413  collectShuffleMaskIndices(I, MRI, Mask);
3414  assert(!Mask.empty() && "Expected to find mask indices");
3415 
3416  // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
3417  // it's originated from a <1 x T> type. Those should have been lowered into
3418  // G_BUILD_VECTOR earlier.
3419  if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
3420  LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
3421  return false;
3422  }
3423 
3424  unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
3425 
3427  for (auto &MaybeVal : Mask) {
3428  // For now, any undef indexes we'll just assume to be 0. This should be
3429  // optimized in future, e.g. to select DUP etc.
3430  int Val = MaybeVal.hasValue() ? *MaybeVal : 0;
3431  for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
3432  unsigned Offset = Byte + Val * BytesPerElt;
3433  CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
3434  }
3435  }
3436 
3437  MachineIRBuilder MIRBuilder(I);
3438 
3439  // Use a constant pool to load the index vector for TBL.
3440  Constant *CPVal = ConstantVector::get(CstIdxs);
3441  MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
3442  if (!IndexLoad) {
3443  LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
3444  return false;
3445  }
3446 
3447  if (DstTy.getSizeInBits() != 128) {
3448  assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
3449  // This case can be done with TBL1.
3450  MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
3451  if (!Concat) {
3452  LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
3453  return false;
3454  }
3455 
3456  // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
3457  IndexLoad =
3458  emitScalarToVector(64, &AArch64::FPR128RegClass,
3459  IndexLoad->getOperand(0).getReg(), MIRBuilder);
3460 
3461  auto TBL1 = MIRBuilder.buildInstr(
3462  AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
3463  {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
3465 
3466  auto Copy =
3467  MIRBuilder
3468  .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
3469  .addReg(TBL1.getReg(0), 0, AArch64::dsub);
3470  RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
3471  I.eraseFromParent();
3472  return true;
3473  }
3474 
3475  // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
3476  // Q registers for regalloc.
3477  auto RegSeq = MIRBuilder
3478  .buildInstr(TargetOpcode::REG_SEQUENCE,
3479  {&AArch64::QQRegClass}, {Src1Reg})
3480  .addImm(AArch64::qsub0)
3481  .addUse(Src2Reg)
3482  .addImm(AArch64::qsub1);
3483 
3484  auto TBL2 =
3485  MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
3486  {RegSeq, IndexLoad->getOperand(0).getReg()});
3487  constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
3489  I.eraseFromParent();
3490  return true;
3491 }
3492 
3493 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
3494  Optional<Register> DstReg, Register SrcReg, Register EltReg,
3495  unsigned LaneIdx, const RegisterBank &RB,
3496  MachineIRBuilder &MIRBuilder) const {
3497  MachineInstr *InsElt = nullptr;
3498  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3499  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3500 
3501  // Create a register to define with the insert if one wasn't passed in.
3502  if (!DstReg)
3503  DstReg = MRI.createVirtualRegister(DstRC);
3504 
3505  unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
3506  unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
3507 
3508  if (RB.getID() == AArch64::FPRRegBankID) {
3509  auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
3510  InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3511  .addImm(LaneIdx)
3512  .addUse(InsSub->getOperand(0).getReg())
3513  .addImm(0);
3514  } else {
3515  InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3516  .addImm(LaneIdx)
3517  .addUse(EltReg);
3518  }
3519 
3520  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3521  return InsElt;
3522 }
3523 
3524 bool AArch64InstructionSelector::selectInsertElt(
3525  MachineInstr &I, MachineRegisterInfo &MRI) const {
3526  assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
3527 
3528  // Get information on the destination.
3529  Register DstReg = I.getOperand(0).getReg();
3530  const LLT DstTy = MRI.getType(DstReg);
3531  unsigned VecSize = DstTy.getSizeInBits();
3532 
3533  // Get information on the element we want to insert into the destination.
3534  Register EltReg = I.getOperand(2).getReg();
3535  const LLT EltTy = MRI.getType(EltReg);
3536  unsigned EltSize = EltTy.getSizeInBits();
3537  if (EltSize < 16 || EltSize > 64)
3538  return false; // Don't support all element types yet.
3539 
3540  // Find the definition of the index. Bail out if it's not defined by a
3541  // G_CONSTANT.
3542  Register IdxReg = I.getOperand(3).getReg();
3543  auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
3544  if (!VRegAndVal)
3545  return false;
3546  unsigned LaneIdx = VRegAndVal->Value;
3547 
3548  // Perform the lane insert.
3549  Register SrcReg = I.getOperand(1).getReg();
3550  const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
3551  MachineIRBuilder MIRBuilder(I);
3552 
3553  if (VecSize < 128) {
3554  // If the vector we're inserting into is smaller than 128 bits, widen it
3555  // to 128 to do the insert.
3556  MachineInstr *ScalarToVec = emitScalarToVector(
3557  VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
3558  if (!ScalarToVec)
3559  return false;
3560  SrcReg = ScalarToVec->getOperand(0).getReg();
3561  }
3562 
3563  // Create an insert into a new FPR128 register.
3564  // Note that if our vector is already 128 bits, we end up emitting an extra
3565  // register.
3566  MachineInstr *InsMI =
3567  emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
3568 
3569  if (VecSize < 128) {
3570  // If we had to widen to perform the insert, then we have to demote back to
3571  // the original size to get the result we want.
3572  Register DemoteVec = InsMI->getOperand(0).getReg();
3573  const TargetRegisterClass *RC =
3574  getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
3575  if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3576  LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3577  return false;
3578  }
3579  unsigned SubReg = 0;
3580  if (!getSubRegForClass(RC, TRI, SubReg))
3581  return false;
3582  if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3583  LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
3584  << "\n");
3585  return false;
3586  }
3587  MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3588  .addReg(DemoteVec, 0, SubReg);
3589  RBI.constrainGenericRegister(DstReg, *RC, MRI);
3590  } else {
3591  // No widening needed.
3592  InsMI->getOperand(0).setReg(DstReg);
3593  constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3594  }
3595 
3596  I.eraseFromParent();
3597  return true;
3598 }
3599 
3600 bool AArch64InstructionSelector::selectBuildVector(
3601  MachineInstr &I, MachineRegisterInfo &MRI) const {
3602  assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3603  // Until we port more of the optimized selections, for now just use a vector
3604  // insert sequence.
3605  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3606  const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3607  unsigned EltSize = EltTy.getSizeInBits();
3608  if (EltSize < 16 || EltSize > 64)
3609  return false; // Don't support all element types yet.
3610  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3611  MachineIRBuilder MIRBuilder(I);
3612 
3613  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3614  MachineInstr *ScalarToVec =
3615  emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3616  I.getOperand(1).getReg(), MIRBuilder);
3617  if (!ScalarToVec)
3618  return false;
3619 
3620  Register DstVec = ScalarToVec->getOperand(0).getReg();
3621  unsigned DstSize = DstTy.getSizeInBits();
3622 
3623  // Keep track of the last MI we inserted. Later on, we might be able to save
3624  // a copy using it.
3625  MachineInstr *PrevMI = nullptr;
3626  for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
3627  // Note that if we don't do a subregister copy, we can end up making an
3628  // extra register.
3629  PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
3630  MIRBuilder);
3631  DstVec = PrevMI->getOperand(0).getReg();
3632  }
3633 
3634  // If DstTy's size in bits is less than 128, then emit a subregister copy
3635  // from DstVec to the last register we've defined.
3636  if (DstSize < 128) {
3637  // Force this to be FPR using the destination vector.
3638  const TargetRegisterClass *RC =
3639  getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
3640  if (!RC)
3641  return false;
3642  if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3643  LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3644  return false;
3645  }
3646 
3647  unsigned SubReg = 0;
3648  if (!getSubRegForClass(RC, TRI, SubReg))
3649  return false;
3650  if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3651  LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
3652  << "\n");
3653  return false;
3654  }
3655 
3657  Register DstReg = I.getOperand(0).getReg();
3658 
3659  MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3660  .addReg(DstVec, 0, SubReg);
3661  MachineOperand &RegOp = I.getOperand(1);
3662  RegOp.setReg(Reg);
3663  RBI.constrainGenericRegister(DstReg, *RC, MRI);
3664  } else {
3665  // We don't need a subregister copy. Save a copy by re-using the
3666  // destination register on the final insert.
3667  assert(PrevMI && "PrevMI was null?");
3668  PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
3669  constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
3670  }
3671 
3672  I.eraseFromParent();
3673  return true;
3674 }
3675 
3676 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
3677 /// ID if it exists, and 0 otherwise.
3678 static unsigned findIntrinsicID(MachineInstr &I) {
3679  auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
3680  return Op.isIntrinsicID();
3681  });
3682  if (IntrinOp == I.operands_end())
3683  return 0;
3684  return IntrinOp->getIntrinsicID();
3685 }
3686 
3687 /// Helper function to emit the correct opcode for a llvm.aarch64.stlxr
3688 /// intrinsic.
3689 static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
3690  switch (NumBytesToStore) {
3691  // TODO: 1, 2, and 4 byte stores.
3692  case 8:
3693  return AArch64::STLXRX;
3694  default:
3695  LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! ("
3696  << NumBytesToStore << ")\n");
3697  break;
3698  }
3699  return 0;
3700 }
3701 
3702 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
3703  MachineInstr &I, MachineRegisterInfo &MRI) const {
3704  // Find the intrinsic ID.
3705  unsigned IntrinID = findIntrinsicID(I);
3706  if (!IntrinID)
3707  return false;
3708  MachineIRBuilder MIRBuilder(I);
3709 
3710  // Select the instruction.
3711  switch (IntrinID) {
3712  default:
3713  return false;
3714  case Intrinsic::trap:
3715  MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
3716  break;
3717  case Intrinsic::debugtrap:
3718  if (!STI.isTargetWindows())
3719  return false;
3720  MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
3721  break;
3722  case Intrinsic::aarch64_stlxr:
3723  Register StatReg = I.getOperand(0).getReg();
3724  assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
3725  "Status register must be 32 bits!");
3726  Register SrcReg = I.getOperand(2).getReg();
3727 
3728  if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
3729  LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
3730  return false;
3731  }
3732 
3733  Register PtrReg = I.getOperand(3).getReg();
3734  assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand");
3735 
3736  // Expect only one memory operand.
3737  if (!I.hasOneMemOperand())
3738  return false;
3739 
3740  const MachineMemOperand *MemOp = *I.memoperands_begin();
3741  unsigned NumBytesToStore = MemOp->getSize();
3742  unsigned Opc = getStlxrOpcode(NumBytesToStore);
3743  if (!Opc)
3744  return false;
3745 
3746  auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg});
3747  constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
3748  }
3749 
3750  I.eraseFromParent();
3751  return true;
3752 }
3753 
3754 bool AArch64InstructionSelector::selectIntrinsic(
3755  MachineInstr &I, MachineRegisterInfo &MRI) const {
3756  unsigned IntrinID = findIntrinsicID(I);
3757  if (!IntrinID)
3758  return false;
3759  MachineIRBuilder MIRBuilder(I);
3760 
3761  switch (IntrinID) {
3762  default:
3763  break;
3764  case Intrinsic::aarch64_crypto_sha1h:
3765  Register DstReg = I.getOperand(0).getReg();
3766  Register SrcReg = I.getOperand(2).getReg();
3767 
3768  // FIXME: Should this be an assert?
3769  if (MRI.getType(DstReg).getSizeInBits() != 32 ||
3770  MRI.getType(SrcReg).getSizeInBits() != 32)
3771  return false;
3772 
3773  // The operation has to happen on FPRs. Set up some new FPR registers for
3774  // the source and destination if they are on GPRs.
3775  if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3776  SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3777  MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
3778 
3779  // Make sure the copy ends up getting constrained properly.
3781  AArch64::GPR32RegClass, MRI);
3782  }
3783 
3784  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
3785  DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3786 
3787  // Actually insert the instruction.
3788  auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
3789  constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
3790 
3791  // Did we create a new register for the destination?
3792  if (DstReg != I.getOperand(0).getReg()) {
3793  // Yep. Copy the result of the instruction back into the original
3794  // destination.
3795  MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
3797  AArch64::GPR32RegClass, MRI);
3798  }
3799 
3800  I.eraseFromParent();
3801  return true;
3802  }
3803  return false;
3804 }
3805 
3807  auto &MI = *Root.getParent();
3808  auto &MBB = *MI.getParent();
3809  auto &MF = *MBB.getParent();
3810  auto &MRI = MF.getRegInfo();
3811  uint64_t Immed;
3812  if (Root.isImm())
3813  Immed = Root.getImm();
3814  else if (Root.isCImm())
3815  Immed = Root.getCImm()->getZExtValue();
3816  else if (Root.isReg()) {
3817  auto ValAndVReg =
3819  if (!ValAndVReg)
3820  return None;
3821  Immed = ValAndVReg->Value;
3822  } else
3823  return None;
3824  return Immed;
3825 }
3826 
3828 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
3829  auto MaybeImmed = getImmedFromMO(Root);
3830  if (MaybeImmed == None || *MaybeImmed > 31)
3831  return None;
3832  uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
3833  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
3834 }
3835 
3837 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
3838  auto MaybeImmed = getImmedFromMO(Root);
3839  if (MaybeImmed == None || *MaybeImmed > 31)
3840  return None;
3841  uint64_t Enc = 31 - *MaybeImmed;
3842  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
3843 }
3844 
3846 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
3847  auto MaybeImmed = getImmedFromMO(Root);
3848  if (MaybeImmed == None || *MaybeImmed > 63)
3849  return None;
3850  uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
3851  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
3852 }
3853 
3855 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
3856  auto MaybeImmed = getImmedFromMO(Root);
3857  if (MaybeImmed == None || *MaybeImmed > 63)
3858  return None;
3859  uint64_t Enc = 63 - *MaybeImmed;
3860  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
3861 }
3862 
3863 /// SelectArithImmed - Select an immediate value that can be represented as
3864 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
3865 /// Val set to the 12-bit value and Shift set to the shifter operand.
3867 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
3868  // This function is called from the addsub_shifted_imm ComplexPattern,
3869  // which lists [imm] as the list of opcode it's interested in, however
3870  // we still need to check whether the operand is actually an immediate
3871  // here because the ComplexPattern opcode list is only used in
3872  // root-level opcode matching.
3873  auto MaybeImmed = getImmedFromMO(Root);
3874  if (MaybeImmed == None)
3875  return None;
3876  uint64_t Immed = *MaybeImmed;
3877  unsigned ShiftAmt;
3878 
3879  if (Immed >> 12 == 0) {
3880  ShiftAmt = 0;
3881  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
3882  ShiftAmt = 12;
3883  Immed = Immed >> 12;
3884  } else
3885  return None;
3886 
3887  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
3888  return {{
3889  [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
3890  [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
3891  }};
3892 }
3893 
3894 /// Select a "register plus unscaled signed 9-bit immediate" address. This
3895 /// should only match when there is an offset that is not valid for a scaled
3896 /// immediate addressing mode. The "Size" argument is the size in bytes of the
3897 /// memory reference, which is needed here to know what is valid for a scaled
3898 /// immediate.
3900 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
3901  unsigned Size) const {
3902  MachineRegisterInfo &MRI =
3903  Root.getParent()->getParent()->getParent()->getRegInfo();
3904 
3905  if (!Root.isReg())
3906  return None;
3907 
3908  if (!isBaseWithConstantOffset(Root, MRI))
3909  return None;
3910 
3911  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
3912  if (!RootDef)
3913  return None;
3914 
3915  MachineOperand &OffImm = RootDef->getOperand(2);
3916  if (!OffImm.isReg())
3917  return None;
3918  MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
3919  if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
3920  return None;
3921  int64_t RHSC;
3922  MachineOperand &RHSOp1 = RHS->getOperand(1);
3923  if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
3924  return None;
3925  RHSC = RHSOp1.getCImm()->getSExtValue();
3926 
3927  // If the offset is valid as a scaled immediate, don't match here.
3928  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
3929  return None;
3930  if (RHSC >= -256 && RHSC < 256) {
3931  MachineOperand &Base = RootDef->getOperand(1);
3932  return {{
3933  [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
3934  [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
3935  }};
3936  }
3937  return None;
3938 }
3939 
3940 /// Select a "register plus scaled unsigned 12-bit immediate" address. The
3941 /// "Size" argument is the size in bytes of the memory reference, which
3942 /// determines the scale.
3944 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
3945  unsigned Size) const {
3946  MachineRegisterInfo &MRI =
3947  Root.getParent()->getParent()->getParent()->getRegInfo();
3948 
3949  if (!Root.isReg())
3950  return None;
3951 
3952  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
3953  if (!RootDef)
3954  return None;
3955 
3956  if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
3957  return {{
3958  [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
3959  [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
3960  }};
3961  }
3962 
3963  if (isBaseWithConstantOffset(Root, MRI)) {
3964  MachineOperand &LHS = RootDef->getOperand(1);
3965  MachineOperand &RHS = RootDef->getOperand(2);
3966  MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
3967  MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
3968  if (LHSDef && RHSDef) {
3969  int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
3970  unsigned Scale = Log2_32(Size);
3971  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
3972  if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
3973  return {{
3974  [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
3975  [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
3976  }};
3977 
3978  return {{
3979  [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
3980  [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
3981  }};
3982  }
3983  }
3984  }
3985 
3986  // Before falling back to our general case, check if the unscaled
3987  // instructions can handle this. If so, that's preferable.
3988  if (selectAddrModeUnscaled(Root, Size).hasValue())
3989  return None;
3990 
3991  return {{
3992  [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
3993  [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
3994  }};
3995 }
3996 
3997 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
3998  const MachineInstr &MI) const {
3999  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4000  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4002  assert(CstVal && "Expected constant value");
4003  MIB.addImm(CstVal.getValue());
4004 }
4005 
4006 namespace llvm {
4009  AArch64Subtarget &Subtarget,
4010  AArch64RegisterBankInfo &RBI) {
4011  return new AArch64InstructionSelector(TM, Subtarget, RBI);
4012 }
4013 }
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
const NoneType None
Definition: None.h:23
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
mop_iterator operands_end()
Definition: MachineInstr.h:456
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address...
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
MachineBasicBlock * getMBB() const
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB...
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
void setTargetFlags(unsigned F)
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
const ConstantFP * getConstantFPVRegVal(unsigned VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:279
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:385
bool isScalar() const
static CondCode getInvertedCondCode(CondCode Code)
unsigned Reg
static uint64_t selectImpl(uint64_t CandidateMask, uint64_t &NextInSequenceMask)
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
uint64_t getSize() const
Return the size in bytes of the memory reference.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
static uint32_t Concat[]
unsigned const TargetRegisterInfo * TRI
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address...
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
void setRegBank(unsigned Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:461
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
T get() const
Returns the value of the specified pointer type.
Definition: PointerUnion.h:205
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:287
unsigned getBitWidth() const
getBitWidth - Return the bitwidth of this constant.
Definition: Constants.h:142
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address...
This file declares the targeting of the RegisterBankInfo class for AArch64.
bool isVector() const
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:212
Holds all the information related to register banks.
A description of a memory reference used in the backend.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
static unsigned getStlxrOpcode(unsigned NumBytesToStore)
Helper function to emit the correct opcode for a llvm.aarch64.stlxr intrinsic.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned SubReg
static StringRef getName(Value *V)
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
bool mi_match(Reg R, MachineRegisterInfo &MRI, Pattern &&P)
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
static int getID(struct InternalInstruction *insn, const void *miiArg)
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
const RegClassOrRegBank & getRegClassOrRegBank(unsigned Reg) const
Return the register bank or register class of Reg.
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:308
static bool isStore(int Opcode)
MachineFunction & getMF()
Getter for the function we currently build.
static bool isUnsignedICMPPred(const CmpInst::Predicate P)
Returns true if P is an unsigned integer comparison predicate.
bool isPredicate() const
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:995
#define EQ(a, b)
Definition: regexec.c:112
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
TargetInstrInfo - Interface to description of machine instruction set.
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV)...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
#define P(N)
MachineRegisterInfo * getMRI()
Getter for MRI.
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:657
const TargetRegisterInfo * getTargetRegisterInfo() const
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address...
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
unsigned const MachineRegisterInfo * MRI
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
This is an important base class in LLVM.
Definition: Constant.h:41
const GlobalValue * getGlobal() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
Helper class to build MachineInstr.
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:756
bool isExactlyValue(double V) const
We don&#39;t rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1140
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:551
bool isValid() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
self_iterator getIterator()
Definition: ilist_node.h:81
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1220
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineInstrBuilder & addFrameIndex(int Idx) const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
bool isCopy() const
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:303
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:50
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
#define GET_GLOBALISEL_TEMPORARIES_INIT
const APFloat & getValueAPF() const
Definition: Constants.h:302
static Optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
Optional< ValueAndVReg > getConstantVRegValWithLookThrough(unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT (LookThroug...
Definition: Utils.cpp:221
bool isJTI() const
isJTI - Tests if this is a MO_JumpTableIndex operand.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
BlockVerifier::State From
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:536
RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
MachineInstrBuilder MachineInstrBuilder & DefMI
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Promote Memory to Register
Definition: Mem2Reg.cpp:109
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:113
This class implements the register bank concept.
Definition: RegisterBank.h:28
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
This file declares the MachineIRBuilder class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function that verifies that we have a valid copy at the end of selectCopy. ...
Optional< int64_t > getConstantVRegVal(unsigned VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:210
bool isPointer() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:469
Provides the logic to select generic machine instructions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, unsigned SrcReg, const TargetRegisterClass *From, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
This class provides the information for the target register banks.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
bool hasOneUse(unsigned RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
ConstantMatch m_ICst(int64_t &Cst)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:305
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &)
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
int64_t getOffset() const
Return the offset from the symbol in this operand.
const BlockAddress * getBlockAddress() const
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
static unsigned findIntrinsicID(MachineInstr &I)
Helper function to find an intrinsic ID on an a MachineInstr.
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page...
void setSubReg(unsigned subReg)
#define GET_GLOBALISEL_PREDICATES_INIT
uint32_t Size
Definition: Profile.cpp:46
const DataLayout & getDataLayout() const
static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned char TargetFlags=0) const
const TargetRegisterClass * getRegClassOrNull(unsigned Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void changeFCMPPredToAArch64CC(CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel...
Definition: TargetOpcodes.h:30
LLVM Value Representation.
Definition: Value.h:72
static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI)
Helper function to select the opcode for a G_FCMP.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
void ChangeToFrameIndex(int Idx)
Replace this operand with a frame index.
IRTranslator LLVM IR MI
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow...
APInt bitcastToAPInt() const
Definition: APFloat.h:1104
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register getReg() const
getReg - Returns the register number.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:156
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
const ConstantInt * getCImm() const
#define DEBUG_TYPE
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
Wrapper class representing virtual and physical registers.
Definition: Register.h:18
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:47
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
Definition: PointerUnion.h:163
unsigned getPredicate() const