LLVM  9.0.0svn
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
21 #include "llvm/ADT/Optional.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/Support/Debug.h"
37 
38 #define DEBUG_TYPE "aarch64-isel"
39 
40 using namespace llvm;
41 
42 namespace {
43 
44 #define GET_GLOBALISEL_PREDICATE_BITSET
45 #include "AArch64GenGlobalISel.inc"
46 #undef GET_GLOBALISEL_PREDICATE_BITSET
47 
48 class AArch64InstructionSelector : public InstructionSelector {
49 public:
50  AArch64InstructionSelector(const AArch64TargetMachine &TM,
51  const AArch64Subtarget &STI,
52  const AArch64RegisterBankInfo &RBI);
53 
54  bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
55  static const char *getName() { return DEBUG_TYPE; }
56 
57 private:
58  /// tblgen-erated 'select' implementation, used as the initial selector for
59  /// the patterns that don't require complex C++.
60  bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
61 
62  bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
63  MachineRegisterInfo &MRI) const;
64  bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
65  MachineRegisterInfo &MRI) const;
66 
67  bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
68  MachineRegisterInfo &MRI) const;
69 
70  // Helper to generate an equivalent of scalar_to_vector into a new register,
71  // returned via 'Dst'.
72  MachineInstr *emitScalarToVector(unsigned EltSize,
73  const TargetRegisterClass *DstRC,
74  unsigned Scalar,
75  MachineIRBuilder &MIRBuilder) const;
76 
77  /// Emit a lane insert into \p DstReg, or a new vector register if None is
78  /// provided.
79  ///
80  /// The lane inserted into is defined by \p LaneIdx. The vector source
81  /// register is given by \p SrcReg. The register containing the element is
82  /// given by \p EltReg.
83  MachineInstr *emitLaneInsert(Optional<unsigned> DstReg, unsigned SrcReg,
84  unsigned EltReg, unsigned LaneIdx,
85  const RegisterBank &RB,
86  MachineIRBuilder &MIRBuilder) const;
87  bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
88  bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
91 
92  void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
93  SmallVectorImpl<int> &Idxs) const;
94  bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
95  bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
96  bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
97  bool selectSplitVectorUnmerge(MachineInstr &I,
98  MachineRegisterInfo &MRI) const;
99 
100  unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
101  MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
102  MachineIRBuilder &MIRBuilder) const;
103 
104  // Emit a vector concat operation.
105  MachineInstr *emitVectorConcat(Optional<unsigned> Dst, unsigned Op1,
106  unsigned Op2,
107  MachineIRBuilder &MIRBuilder) const;
108  MachineInstr *emitExtractVectorElt(Optional<unsigned> DstReg,
109  const RegisterBank &DstRB, LLT ScalarTy,
110  unsigned VecReg, unsigned LaneIdx,
111  MachineIRBuilder &MIRBuilder) const;
112 
113  ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
114 
115  ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
116  unsigned Size) const;
117 
118  ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
119  return selectAddrModeUnscaled(Root, 1);
120  }
121  ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
122  return selectAddrModeUnscaled(Root, 2);
123  }
124  ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
125  return selectAddrModeUnscaled(Root, 4);
126  }
127  ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
128  return selectAddrModeUnscaled(Root, 8);
129  }
130  ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
131  return selectAddrModeUnscaled(Root, 16);
132  }
133 
134  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
135  unsigned Size) const;
136  template <int Width>
137  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
138  return selectAddrModeIndexed(Root, Width / 8);
139  }
140 
141  void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
142 
143  // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
144  void materializeLargeCMVal(MachineInstr &I, const Value *V,
145  unsigned char OpFlags) const;
146 
147  // Optimization methods.
148 
149  // Helper function to check if a reg def is an MI with a given opcode and
150  // returns it if so.
151  MachineInstr *findMIFromReg(unsigned Reg, unsigned Opc,
152  MachineIRBuilder &MIB) const {
153  auto *Def = MIB.getMRI()->getVRegDef(Reg);
154  if (!Def || Def->getOpcode() != Opc)
155  return nullptr;
156  return Def;
157  }
158 
159  bool tryOptVectorShuffle(MachineInstr &I) const;
160  bool tryOptVectorDup(MachineInstr &MI) const;
161 
162  const AArch64TargetMachine &TM;
163  const AArch64Subtarget &STI;
164  const AArch64InstrInfo &TII;
165  const AArch64RegisterInfo &TRI;
166  const AArch64RegisterBankInfo &RBI;
167 
168 #define GET_GLOBALISEL_PREDICATES_DECL
169 #include "AArch64GenGlobalISel.inc"
170 #undef GET_GLOBALISEL_PREDICATES_DECL
171 
172 // We declare the temporaries used by selectImpl() in the class to minimize the
173 // cost of constructing placeholder values.
174 #define GET_GLOBALISEL_TEMPORARIES_DECL
175 #include "AArch64GenGlobalISel.inc"
176 #undef GET_GLOBALISEL_TEMPORARIES_DECL
177 };
178 
179 } // end anonymous namespace
180 
181 #define GET_GLOBALISEL_IMPL
182 #include "AArch64GenGlobalISel.inc"
183 #undef GET_GLOBALISEL_IMPL
184 
185 AArch64InstructionSelector::AArch64InstructionSelector(
186  const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
187  const AArch64RegisterBankInfo &RBI)
188  : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
189  TRI(*STI.getRegisterInfo()), RBI(RBI),
191 #include "AArch64GenGlobalISel.inc"
194 #include "AArch64GenGlobalISel.inc"
196 {
197 }
198 
199 // FIXME: This should be target-independent, inferred from the types declared
200 // for each class in the bank.
201 static const TargetRegisterClass *
202 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
203  const RegisterBankInfo &RBI,
204  bool GetAllRegSet = false) {
205  if (RB.getID() == AArch64::GPRRegBankID) {
206  if (Ty.getSizeInBits() <= 32)
207  return GetAllRegSet ? &AArch64::GPR32allRegClass
208  : &AArch64::GPR32RegClass;
209  if (Ty.getSizeInBits() == 64)
210  return GetAllRegSet ? &AArch64::GPR64allRegClass
211  : &AArch64::GPR64RegClass;
212  return nullptr;
213  }
214 
215  if (RB.getID() == AArch64::FPRRegBankID) {
216  if (Ty.getSizeInBits() <= 16)
217  return &AArch64::FPR16RegClass;
218  if (Ty.getSizeInBits() == 32)
219  return &AArch64::FPR32RegClass;
220  if (Ty.getSizeInBits() == 64)
221  return &AArch64::FPR64RegClass;
222  if (Ty.getSizeInBits() == 128)
223  return &AArch64::FPR128RegClass;
224  return nullptr;
225  }
226 
227  return nullptr;
228 }
229 
230 /// Given a register bank, and size in bits, return the smallest register class
231 /// that can represent that combination.
232 static const TargetRegisterClass *
233 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
234  bool GetAllRegSet = false) {
235  unsigned RegBankID = RB.getID();
236 
237  if (RegBankID == AArch64::GPRRegBankID) {
238  if (SizeInBits <= 32)
239  return GetAllRegSet ? &AArch64::GPR32allRegClass
240  : &AArch64::GPR32RegClass;
241  if (SizeInBits == 64)
242  return GetAllRegSet ? &AArch64::GPR64allRegClass
243  : &AArch64::GPR64RegClass;
244  }
245 
246  if (RegBankID == AArch64::FPRRegBankID) {
247  switch (SizeInBits) {
248  default:
249  return nullptr;
250  case 8:
251  return &AArch64::FPR8RegClass;
252  case 16:
253  return &AArch64::FPR16RegClass;
254  case 32:
255  return &AArch64::FPR32RegClass;
256  case 64:
257  return &AArch64::FPR64RegClass;
258  case 128:
259  return &AArch64::FPR128RegClass;
260  }
261  }
262 
263  return nullptr;
264 }
265 
266 /// Returns the correct subregister to use for a given register class.
268  const TargetRegisterInfo &TRI, unsigned &SubReg) {
269  switch (TRI.getRegSizeInBits(*RC)) {
270  case 8:
271  SubReg = AArch64::bsub;
272  break;
273  case 16:
274  SubReg = AArch64::hsub;
275  break;
276  case 32:
277  if (RC == &AArch64::GPR32RegClass)
278  SubReg = AArch64::sub_32;
279  else
280  SubReg = AArch64::ssub;
281  break;
282  case 64:
283  SubReg = AArch64::dsub;
284  break;
285  default:
286  LLVM_DEBUG(
287  dbgs() << "Couldn't find appropriate subregister for register class.");
288  return false;
289  }
290 
291  return true;
292 }
293 
294 /// Check whether \p I is a currently unsupported binary operation:
295 /// - it has an unsized type
296 /// - an operand is not a vreg
297 /// - all operands are not in the same bank
298 /// These are checks that should someday live in the verifier, but right now,
299 /// these are mostly limitations of the aarch64 selector.
300 static bool unsupportedBinOp(const MachineInstr &I,
301  const AArch64RegisterBankInfo &RBI,
302  const MachineRegisterInfo &MRI,
303  const AArch64RegisterInfo &TRI) {
304  LLT Ty = MRI.getType(I.getOperand(0).getReg());
305  if (!Ty.isValid()) {
306  LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
307  return true;
308  }
309 
310  const RegisterBank *PrevOpBank = nullptr;
311  for (auto &MO : I.operands()) {
312  // FIXME: Support non-register operands.
313  if (!MO.isReg()) {
314  LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
315  return true;
316  }
317 
318  // FIXME: Can generic operations have physical registers operands? If
319  // so, this will need to be taught about that, and we'll need to get the
320  // bank out of the minimal class for the register.
321  // Either way, this needs to be documented (and possibly verified).
322  if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
323  LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
324  return true;
325  }
326 
327  const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
328  if (!OpBank) {
329  LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
330  return true;
331  }
332 
333  if (PrevOpBank && OpBank != PrevOpBank) {
334  LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
335  return true;
336  }
337  PrevOpBank = OpBank;
338  }
339  return false;
340 }
341 
342 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
343 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
344 /// and of size \p OpSize.
345 /// \returns \p GenericOpc if the combination is unsupported.
346 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
347  unsigned OpSize) {
348  switch (RegBankID) {
349  case AArch64::GPRRegBankID:
350  if (OpSize == 32) {
351  switch (GenericOpc) {
352  case TargetOpcode::G_SHL:
353  return AArch64::LSLVWr;
354  case TargetOpcode::G_LSHR:
355  return AArch64::LSRVWr;
356  case TargetOpcode::G_ASHR:
357  return AArch64::ASRVWr;
358  default:
359  return GenericOpc;
360  }
361  } else if (OpSize == 64) {
362  switch (GenericOpc) {
363  case TargetOpcode::G_GEP:
364  return AArch64::ADDXrr;
365  case TargetOpcode::G_SHL:
366  return AArch64::LSLVXr;
367  case TargetOpcode::G_LSHR:
368  return AArch64::LSRVXr;
369  case TargetOpcode::G_ASHR:
370  return AArch64::ASRVXr;
371  default:
372  return GenericOpc;
373  }
374  }
375  break;
376  case AArch64::FPRRegBankID:
377  switch (OpSize) {
378  case 32:
379  switch (GenericOpc) {
380  case TargetOpcode::G_FADD:
381  return AArch64::FADDSrr;
382  case TargetOpcode::G_FSUB:
383  return AArch64::FSUBSrr;
384  case TargetOpcode::G_FMUL:
385  return AArch64::FMULSrr;
386  case TargetOpcode::G_FDIV:
387  return AArch64::FDIVSrr;
388  default:
389  return GenericOpc;
390  }
391  case 64:
392  switch (GenericOpc) {
393  case TargetOpcode::G_FADD:
394  return AArch64::FADDDrr;
395  case TargetOpcode::G_FSUB:
396  return AArch64::FSUBDrr;
397  case TargetOpcode::G_FMUL:
398  return AArch64::FMULDrr;
399  case TargetOpcode::G_FDIV:
400  return AArch64::FDIVDrr;
401  case TargetOpcode::G_OR:
402  return AArch64::ORRv8i8;
403  default:
404  return GenericOpc;
405  }
406  }
407  break;
408  }
409  return GenericOpc;
410 }
411 
412 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
413 /// appropriate for the (value) register bank \p RegBankID and of memory access
414 /// size \p OpSize. This returns the variant with the base+unsigned-immediate
415 /// addressing mode (e.g., LDRXui).
416 /// \returns \p GenericOpc if the combination is unsupported.
417 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
418  unsigned OpSize) {
419  const bool isStore = GenericOpc == TargetOpcode::G_STORE;
420  switch (RegBankID) {
421  case AArch64::GPRRegBankID:
422  switch (OpSize) {
423  case 8:
424  return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
425  case 16:
426  return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
427  case 32:
428  return isStore ? AArch64::STRWui : AArch64::LDRWui;
429  case 64:
430  return isStore ? AArch64::STRXui : AArch64::LDRXui;
431  }
432  break;
433  case AArch64::FPRRegBankID:
434  switch (OpSize) {
435  case 8:
436  return isStore ? AArch64::STRBui : AArch64::LDRBui;
437  case 16:
438  return isStore ? AArch64::STRHui : AArch64::LDRHui;
439  case 32:
440  return isStore ? AArch64::STRSui : AArch64::LDRSui;
441  case 64:
442  return isStore ? AArch64::STRDui : AArch64::LDRDui;
443  }
444  break;
445  }
446  return GenericOpc;
447 }
448 
449 #ifndef NDEBUG
450 /// Helper function that verifies that we have a valid copy at the end of
451 /// selectCopy. Verifies that the source and dest have the expected sizes and
452 /// then returns true.
453 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
454  const MachineRegisterInfo &MRI,
455  const TargetRegisterInfo &TRI,
456  const RegisterBankInfo &RBI) {
457  const unsigned DstReg = I.getOperand(0).getReg();
458  const unsigned SrcReg = I.getOperand(1).getReg();
459  const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
460  const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
461 
462  // Make sure the size of the source and dest line up.
463  assert(
464  (DstSize == SrcSize ||
465  // Copies are a mean to setup initial types, the number of
466  // bits may not exactly match.
467  (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
468  // Copies are a mean to copy bits around, as long as we are
469  // on the same register class, that's fine. Otherwise, that
470  // means we need some SUBREG_TO_REG or AND & co.
471  (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
472  "Copy with different width?!");
473 
474  // Check the size of the destination.
475  assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
476  "GPRs cannot get more than 64-bit width values");
477 
478  return true;
479 }
480 #endif
481 
482 /// Helper function for selectCopy. Inserts a subregister copy from
483 /// \p *From to \p *To, linking it up to \p I.
484 ///
485 /// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
486 ///
487 /// CopyReg (From class) = COPY SrcReg
488 /// SubRegCopy (To class) = COPY CopyReg:SubReg
489 /// Dst = COPY SubRegCopy
491  const RegisterBankInfo &RBI, unsigned SrcReg,
492  const TargetRegisterClass *From,
493  const TargetRegisterClass *To,
494  unsigned SubReg) {
495  MachineIRBuilder MIB(I);
496  auto Copy = MIB.buildCopy({From}, {SrcReg});
497  auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
498  .addReg(Copy.getReg(0), 0, SubReg);
499  MachineOperand &RegOp = I.getOperand(1);
500  RegOp.setReg(SubRegCopy.getReg(0));
501 
502  // It's possible that the destination register won't be constrained. Make
503  // sure that happens.
504  if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
505  RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
506 
507  return true;
508 }
509 
512  const RegisterBankInfo &RBI) {
513 
514  unsigned DstReg = I.getOperand(0).getReg();
515  unsigned SrcReg = I.getOperand(1).getReg();
516  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
517  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
519  DstRegBank, RBI.getSizeInBits(DstReg, MRI, TRI), true);
520  if (!DstRC) {
521  LLVM_DEBUG(dbgs() << "Unexpected dest size "
522  << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
523  return false;
524  }
525 
526  // A couple helpers below, for making sure that the copy we produce is valid.
527 
528  // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
529  // to verify that the src and dst are the same size, since that's handled by
530  // the SUBREG_TO_REG.
531  bool KnownValid = false;
532 
533  // Returns true, or asserts if something we don't expect happens. Instead of
534  // returning true, we return isValidCopy() to ensure that we verify the
535  // result.
536  auto CheckCopy = [&]() {
537  // If we have a bitcast or something, we can't have physical registers.
538  assert(
539  (I.isCopy() ||
540  (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
541  !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
542  "No phys reg on generic operator!");
543  assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
544  (void)KnownValid;
545  return true;
546  };
547 
548  // Is this a copy? If so, then we may need to insert a subregister copy, or
549  // a SUBREG_TO_REG.
550  if (I.isCopy()) {
551  // Yes. Check if there's anything to fix up.
553  SrcRegBank, RBI.getSizeInBits(SrcReg, MRI, TRI), true);
554  if (!SrcRC) {
555  LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
556  return false;
557  }
558 
559  // Is this a cross-bank copy?
560  if (DstRegBank.getID() != SrcRegBank.getID()) {
561  // If we're doing a cross-bank copy on different-sized registers, we need
562  // to do a bit more work.
563  unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
564  unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
565 
566  if (SrcSize > DstSize) {
567  // We're doing a cross-bank copy into a smaller register. We need a
568  // subregister copy. First, get a register class that's on the same bank
569  // as the destination, but the same size as the source.
570  const TargetRegisterClass *SubregRC =
571  getMinClassForRegBank(DstRegBank, SrcSize, true);
572  assert(SubregRC && "Didn't get a register class for subreg?");
573 
574  // Get the appropriate subregister for the destination.
575  unsigned SubReg = 0;
576  if (!getSubRegForClass(DstRC, TRI, SubReg)) {
577  LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
578  return false;
579  }
580 
581  // Now, insert a subregister copy using the new register class.
582  selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
583  return CheckCopy();
584  }
585 
586  else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
587  SrcSize == 16) {
588  // Special case for FPR16 to GPR32.
589  // FIXME: This can probably be generalized like the above case.
590  unsigned PromoteReg =
591  MRI.createVirtualRegister(&AArch64::FPR32RegClass);
592  BuildMI(*I.getParent(), I, I.getDebugLoc(),
593  TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
594  .addImm(0)
595  .addUse(SrcReg)
596  .addImm(AArch64::hsub);
597  MachineOperand &RegOp = I.getOperand(1);
598  RegOp.setReg(PromoteReg);
599 
600  // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
601  KnownValid = true;
602  }
603  }
604 
605  // If the destination is a physical register, then there's nothing to
606  // change, so we're done.
607  if (TargetRegisterInfo::isPhysicalRegister(DstReg))
608  return CheckCopy();
609  }
610 
611  // No need to constrain SrcReg. It will get constrained when we hit another
612  // of its use or its defs. Copies do not have constraints.
613  if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
614  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
615  << " operand\n");
616  return false;
617  }
618  I.setDesc(TII.get(AArch64::COPY));
619  return CheckCopy();
620 }
621 
622 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
623  if (!DstTy.isScalar() || !SrcTy.isScalar())
624  return GenericOpc;
625 
626  const unsigned DstSize = DstTy.getSizeInBits();
627  const unsigned SrcSize = SrcTy.getSizeInBits();
628 
629  switch (DstSize) {
630  case 32:
631  switch (SrcSize) {
632  case 32:
633  switch (GenericOpc) {
634  case TargetOpcode::G_SITOFP:
635  return AArch64::SCVTFUWSri;
636  case TargetOpcode::G_UITOFP:
637  return AArch64::UCVTFUWSri;
638  case TargetOpcode::G_FPTOSI:
639  return AArch64::FCVTZSUWSr;
640  case TargetOpcode::G_FPTOUI:
641  return AArch64::FCVTZUUWSr;
642  default:
643  return GenericOpc;
644  }
645  case 64:
646  switch (GenericOpc) {
647  case TargetOpcode::G_SITOFP:
648  return AArch64::SCVTFUXSri;
649  case TargetOpcode::G_UITOFP:
650  return AArch64::UCVTFUXSri;
651  case TargetOpcode::G_FPTOSI:
652  return AArch64::FCVTZSUWDr;
653  case TargetOpcode::G_FPTOUI:
654  return AArch64::FCVTZUUWDr;
655  default:
656  return GenericOpc;
657  }
658  default:
659  return GenericOpc;
660  }
661  case 64:
662  switch (SrcSize) {
663  case 32:
664  switch (GenericOpc) {
665  case TargetOpcode::G_SITOFP:
666  return AArch64::SCVTFUWDri;
667  case TargetOpcode::G_UITOFP:
668  return AArch64::UCVTFUWDri;
669  case TargetOpcode::G_FPTOSI:
670  return AArch64::FCVTZSUXSr;
671  case TargetOpcode::G_FPTOUI:
672  return AArch64::FCVTZUUXSr;
673  default:
674  return GenericOpc;
675  }
676  case 64:
677  switch (GenericOpc) {
678  case TargetOpcode::G_SITOFP:
679  return AArch64::SCVTFUXDri;
680  case TargetOpcode::G_UITOFP:
681  return AArch64::UCVTFUXDri;
682  case TargetOpcode::G_FPTOSI:
683  return AArch64::FCVTZSUXDr;
684  case TargetOpcode::G_FPTOUI:
685  return AArch64::FCVTZUUXDr;
686  default:
687  return GenericOpc;
688  }
689  default:
690  return GenericOpc;
691  }
692  default:
693  return GenericOpc;
694  };
695  return GenericOpc;
696 }
697 
699  switch (P) {
700  default:
701  llvm_unreachable("Unknown condition code!");
702  case CmpInst::ICMP_NE:
703  return AArch64CC::NE;
704  case CmpInst::ICMP_EQ:
705  return AArch64CC::EQ;
706  case CmpInst::ICMP_SGT:
707  return AArch64CC::GT;
708  case CmpInst::ICMP_SGE:
709  return AArch64CC::GE;
710  case CmpInst::ICMP_SLT:
711  return AArch64CC::LT;
712  case CmpInst::ICMP_SLE:
713  return AArch64CC::LE;
714  case CmpInst::ICMP_UGT:
715  return AArch64CC::HI;
716  case CmpInst::ICMP_UGE:
717  return AArch64CC::HS;
718  case CmpInst::ICMP_ULT:
719  return AArch64CC::LO;
720  case CmpInst::ICMP_ULE:
721  return AArch64CC::LS;
722  }
723 }
724 
727  AArch64CC::CondCode &CondCode2) {
728  CondCode2 = AArch64CC::AL;
729  switch (P) {
730  default:
731  llvm_unreachable("Unknown FP condition!");
732  case CmpInst::FCMP_OEQ:
733  CondCode = AArch64CC::EQ;
734  break;
735  case CmpInst::FCMP_OGT:
736  CondCode = AArch64CC::GT;
737  break;
738  case CmpInst::FCMP_OGE:
739  CondCode = AArch64CC::GE;
740  break;
741  case CmpInst::FCMP_OLT:
742  CondCode = AArch64CC::MI;
743  break;
744  case CmpInst::FCMP_OLE:
745  CondCode = AArch64CC::LS;
746  break;
747  case CmpInst::FCMP_ONE:
748  CondCode = AArch64CC::MI;
749  CondCode2 = AArch64CC::GT;
750  break;
751  case CmpInst::FCMP_ORD:
752  CondCode = AArch64CC::VC;
753  break;
754  case CmpInst::FCMP_UNO:
755  CondCode = AArch64CC::VS;
756  break;
757  case CmpInst::FCMP_UEQ:
758  CondCode = AArch64CC::EQ;
759  CondCode2 = AArch64CC::VS;
760  break;
761  case CmpInst::FCMP_UGT:
762  CondCode = AArch64CC::HI;
763  break;
764  case CmpInst::FCMP_UGE:
765  CondCode = AArch64CC::PL;
766  break;
767  case CmpInst::FCMP_ULT:
768  CondCode = AArch64CC::LT;
769  break;
770  case CmpInst::FCMP_ULE:
771  CondCode = AArch64CC::LE;
772  break;
773  case CmpInst::FCMP_UNE:
774  CondCode = AArch64CC::NE;
775  break;
776  }
777 }
778 
779 bool AArch64InstructionSelector::selectCompareBranch(
781 
782  const unsigned CondReg = I.getOperand(0).getReg();
783  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
784  MachineInstr *CCMI = MRI.getVRegDef(CondReg);
785  if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
786  CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
787  if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
788  return false;
789 
790  unsigned LHS = CCMI->getOperand(2).getReg();
791  unsigned RHS = CCMI->getOperand(3).getReg();
792  if (!getConstantVRegVal(RHS, MRI))
793  std::swap(RHS, LHS);
794 
795  const auto RHSImm = getConstantVRegVal(RHS, MRI);
796  if (!RHSImm || *RHSImm != 0)
797  return false;
798 
799  const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
800  if (RB.getID() != AArch64::GPRRegBankID)
801  return false;
802 
803  const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
804  if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
805  return false;
806 
807  const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
808  unsigned CBOpc = 0;
809  if (CmpWidth <= 32)
810  CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
811  else if (CmpWidth == 64)
812  CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
813  else
814  return false;
815 
816  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
817  .addUse(LHS)
818  .addMBB(DestMBB)
819  .constrainAllUses(TII, TRI, RBI);
820 
821  I.eraseFromParent();
822  return true;
823 }
824 
825 bool AArch64InstructionSelector::selectVaStartAAPCS(
826  MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
827  return false;
828 }
829 
830 bool AArch64InstructionSelector::selectVaStartDarwin(
831  MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
833  unsigned ListReg = I.getOperand(0).getReg();
834 
835  unsigned ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
836 
837  auto MIB =
838  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
839  .addDef(ArgsAddrReg)
840  .addFrameIndex(FuncInfo->getVarArgsStackIndex())
841  .addImm(0)
842  .addImm(0);
843 
845 
846  MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
847  .addUse(ArgsAddrReg)
848  .addUse(ListReg)
849  .addImm(0)
851 
853  I.eraseFromParent();
854  return true;
855 }
856 
857 void AArch64InstructionSelector::materializeLargeCMVal(
858  MachineInstr &I, const Value *V, unsigned char OpFlags) const {
859  MachineBasicBlock &MBB = *I.getParent();
860  MachineFunction &MF = *MBB.getParent();
861  MachineRegisterInfo &MRI = MF.getRegInfo();
862  MachineIRBuilder MIB(I);
863 
864  auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
865  MovZ->addOperand(MF, I.getOperand(1));
866  MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
868  MovZ->addOperand(MF, MachineOperand::CreateImm(0));
870 
871  auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, unsigned Offset,
872  unsigned ForceDstReg) {
873  unsigned DstReg = ForceDstReg
874  ? ForceDstReg
875  : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
876  auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
877  if (auto *GV = dyn_cast<GlobalValue>(V)) {
878  MovI->addOperand(MF, MachineOperand::CreateGA(
879  GV, MovZ->getOperand(1).getOffset(), Flags));
880  } else {
881  MovI->addOperand(
882  MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
883  MovZ->getOperand(1).getOffset(), Flags));
884  }
885  MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
887  return DstReg;
888  };
889  unsigned DstReg = BuildMovK(MovZ.getReg(0),
891  DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
892  BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
893  return;
894 }
895 
896 bool AArch64InstructionSelector::select(MachineInstr &I,
897  CodeGenCoverage &CoverageInfo) const {
898  assert(I.getParent() && "Instruction should be in a basic block!");
899  assert(I.getParent()->getParent() && "Instruction should be in a function!");
900 
901  MachineBasicBlock &MBB = *I.getParent();
902  MachineFunction &MF = *MBB.getParent();
903  MachineRegisterInfo &MRI = MF.getRegInfo();
904 
905  unsigned Opcode = I.getOpcode();
906  // G_PHI requires same handling as PHI
907  if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
908  // Certain non-generic instructions also need some special handling.
909 
910  if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
911  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
912 
913  if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
914  const unsigned DefReg = I.getOperand(0).getReg();
915  const LLT DefTy = MRI.getType(DefReg);
916 
917  const TargetRegisterClass *DefRC = nullptr;
918  if (TargetRegisterInfo::isPhysicalRegister(DefReg)) {
919  DefRC = TRI.getRegClass(DefReg);
920  } else {
921  const RegClassOrRegBank &RegClassOrBank =
922  MRI.getRegClassOrRegBank(DefReg);
923 
924  DefRC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
925  if (!DefRC) {
926  if (!DefTy.isValid()) {
927  LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
928  return false;
929  }
930  const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
931  DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
932  if (!DefRC) {
933  LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
934  return false;
935  }
936  }
937  }
938  I.setDesc(TII.get(TargetOpcode::PHI));
939 
940  return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
941  }
942 
943  if (I.isCopy())
944  return selectCopy(I, TII, MRI, TRI, RBI);
945 
946  return true;
947  }
948 
949 
950  if (I.getNumOperands() != I.getNumExplicitOperands()) {
951  LLVM_DEBUG(
952  dbgs() << "Generic instruction has unexpected implicit operands\n");
953  return false;
954  }
955 
956  if (selectImpl(I, CoverageInfo))
957  return true;
958 
959  LLT Ty =
960  I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
961 
962  MachineIRBuilder MIB(I);
963 
964  switch (Opcode) {
965  case TargetOpcode::G_BRCOND: {
966  if (Ty.getSizeInBits() > 32) {
967  // We shouldn't need this on AArch64, but it would be implemented as an
968  // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
969  // bit being tested is < 32.
970  LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
971  << ", expected at most 32-bits");
972  return false;
973  }
974 
975  const unsigned CondReg = I.getOperand(0).getReg();
976  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
977 
978  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
979  // instructions will not be produced, as they are conditional branch
980  // instructions that do not set flags.
981  bool ProduceNonFlagSettingCondBr =
982  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
983  if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
984  return true;
985 
986  if (ProduceNonFlagSettingCondBr) {
987  auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
988  .addUse(CondReg)
989  .addImm(/*bit offset=*/0)
990  .addMBB(DestMBB);
991 
992  I.eraseFromParent();
993  return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
994  } else {
995  auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
996  .addDef(AArch64::WZR)
997  .addUse(CondReg)
998  .addImm(1);
999  constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1000  auto Bcc =
1001  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1002  .addImm(AArch64CC::EQ)
1003  .addMBB(DestMBB);
1004 
1005  I.eraseFromParent();
1006  return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1007  }
1008  }
1009 
1010  case TargetOpcode::G_BRINDIRECT: {
1011  I.setDesc(TII.get(AArch64::BR));
1012  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1013  }
1014 
1015  case TargetOpcode::G_FCONSTANT:
1016  case TargetOpcode::G_CONSTANT: {
1017  const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1018 
1019  const LLT s32 = LLT::scalar(32);
1020  const LLT s64 = LLT::scalar(64);
1021  const LLT p0 = LLT::pointer(0, 64);
1022 
1023  const unsigned DefReg = I.getOperand(0).getReg();
1024  const LLT DefTy = MRI.getType(DefReg);
1025  const unsigned DefSize = DefTy.getSizeInBits();
1026  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1027 
1028  // FIXME: Redundant check, but even less readable when factored out.
1029  if (isFP) {
1030  if (Ty != s32 && Ty != s64) {
1031  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1032  << " constant, expected: " << s32 << " or " << s64
1033  << '\n');
1034  return false;
1035  }
1036 
1037  if (RB.getID() != AArch64::FPRRegBankID) {
1038  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1039  << " constant on bank: " << RB
1040  << ", expected: FPR\n");
1041  return false;
1042  }
1043 
1044  // The case when we have 0.0 is covered by tablegen. Reject it here so we
1045  // can be sure tablegen works correctly and isn't rescued by this code.
1046  if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1047  return false;
1048  } else {
1049  // s32 and s64 are covered by tablegen.
1050  if (Ty != p0) {
1051  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1052  << " constant, expected: " << s32 << ", " << s64
1053  << ", or " << p0 << '\n');
1054  return false;
1055  }
1056 
1057  if (RB.getID() != AArch64::GPRRegBankID) {
1058  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1059  << " constant on bank: " << RB
1060  << ", expected: GPR\n");
1061  return false;
1062  }
1063  }
1064 
1065  const unsigned MovOpc =
1066  DefSize == 32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
1067 
1068  I.setDesc(TII.get(MovOpc));
1069 
1070  if (isFP) {
1071  const TargetRegisterClass &GPRRC =
1072  DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1073  const TargetRegisterClass &FPRRC =
1074  DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1075 
1076  const unsigned DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1077  MachineOperand &RegOp = I.getOperand(0);
1078  RegOp.setReg(DefGPRReg);
1079  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1080  MIB.buildCopy({DefReg}, {DefGPRReg});
1081 
1082  if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
1083  LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
1084  return false;
1085  }
1086 
1087  MachineOperand &ImmOp = I.getOperand(1);
1088  // FIXME: Is going through int64_t always correct?
1089  ImmOp.ChangeToImmediate(
1091  } else if (I.getOperand(1).isCImm()) {
1092  uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1093  I.getOperand(1).ChangeToImmediate(Val);
1094  } else if (I.getOperand(1).isImm()) {
1095  uint64_t Val = I.getOperand(1).getImm();
1096  I.getOperand(1).ChangeToImmediate(Val);
1097  }
1098 
1100  return true;
1101  }
1102  case TargetOpcode::G_EXTRACT: {
1103  LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1104  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1105  (void)DstTy;
1106  unsigned SrcSize = SrcTy.getSizeInBits();
1107  // Larger extracts are vectors, same-size extracts should be something else
1108  // by now (either split up or simplified to a COPY).
1109  if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
1110  return false;
1111 
1112  I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
1114  Ty.getSizeInBits() - 1);
1115 
1116  if (SrcSize < 64) {
1117  assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1118  "unexpected G_EXTRACT types");
1119  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1120  }
1121 
1122  unsigned DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1123  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1124  MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1125  .addReg(DstReg, 0, AArch64::sub_32);
1127  AArch64::GPR32RegClass, MRI);
1128  I.getOperand(0).setReg(DstReg);
1129 
1130  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1131  }
1132 
1133  case TargetOpcode::G_INSERT: {
1134  LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
1135  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1136  unsigned DstSize = DstTy.getSizeInBits();
1137  // Larger inserts are vectors, same-size ones should be something else by
1138  // now (split up or turned into COPYs).
1139  if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1140  return false;
1141 
1142  I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
1143  unsigned LSB = I.getOperand(3).getImm();
1144  unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
1145  I.getOperand(3).setImm((DstSize - LSB) % DstSize);
1146  MachineInstrBuilder(MF, I).addImm(Width - 1);
1147 
1148  if (DstSize < 64) {
1149  assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1150  "unexpected G_INSERT types");
1151  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1152  }
1153 
1154  unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1155  BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1156  TII.get(AArch64::SUBREG_TO_REG))
1157  .addDef(SrcReg)
1158  .addImm(0)
1159  .addUse(I.getOperand(2).getReg())
1160  .addImm(AArch64::sub_32);
1162  AArch64::GPR32RegClass, MRI);
1163  I.getOperand(2).setReg(SrcReg);
1164 
1165  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1166  }
1167  case TargetOpcode::G_FRAME_INDEX: {
1168  // allocas and G_FRAME_INDEX are only supported in addrspace(0).
1169  if (Ty != LLT::pointer(0, 64)) {
1170  LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1171  << ", expected: " << LLT::pointer(0, 64) << '\n');
1172  return false;
1173  }
1174  I.setDesc(TII.get(AArch64::ADDXri));
1175 
1176  // MOs for a #0 shifted immediate.
1177  I.addOperand(MachineOperand::CreateImm(0));
1178  I.addOperand(MachineOperand::CreateImm(0));
1179 
1180  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1181  }
1182 
1183  case TargetOpcode::G_GLOBAL_VALUE: {
1184  auto GV = I.getOperand(1).getGlobal();
1185  if (GV->isThreadLocal()) {
1186  // FIXME: we don't support TLS yet.
1187  return false;
1188  }
1189  unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
1190  if (OpFlags & AArch64II::MO_GOT) {
1191  I.setDesc(TII.get(AArch64::LOADgot));
1192  I.getOperand(1).setTargetFlags(OpFlags);
1193  } else if (TM.getCodeModel() == CodeModel::Large) {
1194  // Materialize the global using movz/movk instructions.
1195  materializeLargeCMVal(I, GV, OpFlags);
1196  I.eraseFromParent();
1197  return true;
1198  } else if (TM.getCodeModel() == CodeModel::Tiny) {
1199  I.setDesc(TII.get(AArch64::ADR));
1200  I.getOperand(1).setTargetFlags(OpFlags);
1201  } else {
1202  I.setDesc(TII.get(AArch64::MOVaddr));
1204  MachineInstrBuilder MIB(MF, I);
1205  MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1207  }
1208  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1209  }
1210 
1211  case TargetOpcode::G_LOAD:
1212  case TargetOpcode::G_STORE: {
1213  LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
1214 
1215  if (PtrTy != LLT::pointer(0, 64)) {
1216  LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1217  << ", expected: " << LLT::pointer(0, 64) << '\n');
1218  return false;
1219  }
1220 
1221  auto &MemOp = **I.memoperands_begin();
1222  if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
1223  LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
1224  return false;
1225  }
1226  unsigned MemSizeInBits = MemOp.getSize() * 8;
1227 
1228  const unsigned PtrReg = I.getOperand(1).getReg();
1229 #ifndef NDEBUG
1230  const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
1231  // Sanity-check the pointer register.
1232  assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1233  "Load/Store pointer operand isn't a GPR");
1234  assert(MRI.getType(PtrReg).isPointer() &&
1235  "Load/Store pointer operand isn't a pointer");
1236 #endif
1237 
1238  const unsigned ValReg = I.getOperand(0).getReg();
1239  const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1240 
1241  const unsigned NewOpc =
1242  selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
1243  if (NewOpc == I.getOpcode())
1244  return false;
1245 
1246  I.setDesc(TII.get(NewOpc));
1247 
1248  uint64_t Offset = 0;
1249  auto *PtrMI = MRI.getVRegDef(PtrReg);
1250 
1251  // Try to fold a GEP into our unsigned immediate addressing mode.
1252  if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1253  if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1254  int64_t Imm = *COff;
1255  const unsigned Size = MemSizeInBits / 8;
1256  const unsigned Scale = Log2_32(Size);
1257  if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1258  unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
1259  I.getOperand(1).setReg(Ptr2Reg);
1260  PtrMI = MRI.getVRegDef(Ptr2Reg);
1261  Offset = Imm / Size;
1262  }
1263  }
1264  }
1265 
1266  // If we haven't folded anything into our addressing mode yet, try to fold
1267  // a frame index into the base+offset.
1268  if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1269  I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1270 
1271  I.addOperand(MachineOperand::CreateImm(Offset));
1272 
1273  // If we're storing a 0, use WZR/XZR.
1274  if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1275  if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1276  if (I.getOpcode() == AArch64::STRWui)
1277  I.getOperand(0).setReg(AArch64::WZR);
1278  else if (I.getOpcode() == AArch64::STRXui)
1279  I.getOperand(0).setReg(AArch64::XZR);
1280  }
1281  }
1282 
1283  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1284  }
1285 
1286  case TargetOpcode::G_SMULH:
1287  case TargetOpcode::G_UMULH: {
1288  // Reject the various things we don't support yet.
1289  if (unsupportedBinOp(I, RBI, MRI, TRI))
1290  return false;
1291 
1292  const unsigned DefReg = I.getOperand(0).getReg();
1293  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1294 
1295  if (RB.getID() != AArch64::GPRRegBankID) {
1296  LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
1297  return false;
1298  }
1299 
1300  if (Ty != LLT::scalar(64)) {
1301  LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1302  << ", expected: " << LLT::scalar(64) << '\n');
1303  return false;
1304  }
1305 
1306  unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1307  : AArch64::UMULHrr;
1308  I.setDesc(TII.get(NewOpc));
1309 
1310  // Now that we selected an opcode, we need to constrain the register
1311  // operands to use appropriate classes.
1312  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1313  }
1314  case TargetOpcode::G_FADD:
1315  case TargetOpcode::G_FSUB:
1316  case TargetOpcode::G_FMUL:
1317  case TargetOpcode::G_FDIV:
1318 
1319  case TargetOpcode::G_OR:
1320  case TargetOpcode::G_SHL:
1321  case TargetOpcode::G_LSHR:
1322  case TargetOpcode::G_ASHR:
1323  case TargetOpcode::G_GEP: {
1324  // Reject the various things we don't support yet.
1325  if (unsupportedBinOp(I, RBI, MRI, TRI))
1326  return false;
1327 
1328  const unsigned OpSize = Ty.getSizeInBits();
1329 
1330  const unsigned DefReg = I.getOperand(0).getReg();
1331  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1332 
1333  const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1334  if (NewOpc == I.getOpcode())
1335  return false;
1336 
1337  I.setDesc(TII.get(NewOpc));
1338  // FIXME: Should the type be always reset in setDesc?
1339 
1340  // Now that we selected an opcode, we need to constrain the register
1341  // operands to use appropriate classes.
1342  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1343  }
1344 
1345  case TargetOpcode::G_UADDO: {
1346  // TODO: Support other types.
1347  unsigned OpSize = Ty.getSizeInBits();
1348  if (OpSize != 32 && OpSize != 64) {
1349  LLVM_DEBUG(
1350  dbgs()
1351  << "G_UADDO currently only supported for 32 and 64 b types.\n");
1352  return false;
1353  }
1354 
1355  // TODO: Support vectors.
1356  if (Ty.isVector()) {
1357  LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1358  return false;
1359  }
1360 
1361  // Add and set the set condition flag.
1362  unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1363  MachineIRBuilder MIRBuilder(I);
1364  auto AddsMI = MIRBuilder.buildInstr(
1365  AddsOpc, {I.getOperand(0).getReg()},
1366  {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1367  constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1368 
1369  // Now, put the overflow result in the register given by the first operand
1370  // to the G_UADDO. CSINC increments the result when the predicate is false,
1371  // so to get the increment when it's true, we need to use the inverse. In
1372  // this case, we want to increment when carry is set.
1373  auto CsetMI = MIRBuilder
1374  .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1375  {AArch64::WZR, AArch64::WZR})
1377  constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1378  I.eraseFromParent();
1379  return true;
1380  }
1381 
1382  case TargetOpcode::G_PTR_MASK: {
1383  uint64_t Align = I.getOperand(2).getImm();
1384  if (Align >= 64 || Align == 0)
1385  return false;
1386 
1387  uint64_t Mask = ~((1ULL << Align) - 1);
1388  I.setDesc(TII.get(AArch64::ANDXri));
1390 
1391  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1392  }
1393  case TargetOpcode::G_PTRTOINT:
1394  case TargetOpcode::G_TRUNC: {
1395  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1396  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1397 
1398  const unsigned DstReg = I.getOperand(0).getReg();
1399  const unsigned SrcReg = I.getOperand(1).getReg();
1400 
1401  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1402  const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1403 
1404  if (DstRB.getID() != SrcRB.getID()) {
1405  LLVM_DEBUG(
1406  dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
1407  return false;
1408  }
1409 
1410  if (DstRB.getID() == AArch64::GPRRegBankID) {
1411  const TargetRegisterClass *DstRC =
1412  getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1413  if (!DstRC)
1414  return false;
1415 
1416  const TargetRegisterClass *SrcRC =
1417  getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1418  if (!SrcRC)
1419  return false;
1420 
1421  if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1422  !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1423  LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
1424  return false;
1425  }
1426 
1427  if (DstRC == SrcRC) {
1428  // Nothing to be done
1429  } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1430  SrcTy == LLT::scalar(64)) {
1431  llvm_unreachable("TableGen can import this case");
1432  return false;
1433  } else if (DstRC == &AArch64::GPR32RegClass &&
1434  SrcRC == &AArch64::GPR64RegClass) {
1435  I.getOperand(1).setSubReg(AArch64::sub_32);
1436  } else {
1437  LLVM_DEBUG(
1438  dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
1439  return false;
1440  }
1441 
1442  I.setDesc(TII.get(TargetOpcode::COPY));
1443  return true;
1444  } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1445  if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1446  I.setDesc(TII.get(AArch64::XTNv4i16));
1448  return true;
1449  }
1450  }
1451 
1452  return false;
1453  }
1454 
1455  case TargetOpcode::G_ANYEXT: {
1456  const unsigned DstReg = I.getOperand(0).getReg();
1457  const unsigned SrcReg = I.getOperand(1).getReg();
1458 
1459  const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
1460  if (RBDst.getID() != AArch64::GPRRegBankID) {
1461  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
1462  << ", expected: GPR\n");
1463  return false;
1464  }
1465 
1466  const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
1467  if (RBSrc.getID() != AArch64::GPRRegBankID) {
1468  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
1469  << ", expected: GPR\n");
1470  return false;
1471  }
1472 
1473  const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
1474 
1475  if (DstSize == 0) {
1476  LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
1477  return false;
1478  }
1479 
1480  if (DstSize != 64 && DstSize > 32) {
1481  LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
1482  << ", expected: 32 or 64\n");
1483  return false;
1484  }
1485  // At this point G_ANYEXT is just like a plain COPY, but we need
1486  // to explicitly form the 64-bit value if any.
1487  if (DstSize > 32) {
1488  unsigned ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
1489  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1490  .addDef(ExtSrc)
1491  .addImm(0)
1492  .addUse(SrcReg)
1493  .addImm(AArch64::sub_32);
1494  I.getOperand(1).setReg(ExtSrc);
1495  }
1496  return selectCopy(I, TII, MRI, TRI, RBI);
1497  }
1498 
1499  case TargetOpcode::G_ZEXT:
1500  case TargetOpcode::G_SEXT: {
1501  unsigned Opcode = I.getOpcode();
1502  const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1503  SrcTy = MRI.getType(I.getOperand(1).getReg());
1504  const bool isSigned = Opcode == TargetOpcode::G_SEXT;
1505  const unsigned DefReg = I.getOperand(0).getReg();
1506  const unsigned SrcReg = I.getOperand(1).getReg();
1507  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1508 
1509  if (RB.getID() != AArch64::GPRRegBankID) {
1510  LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB
1511  << ", expected: GPR\n");
1512  return false;
1513  }
1514 
1515  MachineInstr *ExtI;
1516  if (DstTy == LLT::scalar(64)) {
1517  // FIXME: Can we avoid manually doing this?
1518  if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
1519  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
1520  << " operand\n");
1521  return false;
1522  }
1523 
1524  const unsigned SrcXReg =
1525  MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1526  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1527  .addDef(SrcXReg)
1528  .addImm(0)
1529  .addUse(SrcReg)
1530  .addImm(AArch64::sub_32);
1531 
1532  const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri;
1533  ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1534  .addDef(DefReg)
1535  .addUse(SrcXReg)
1536  .addImm(0)
1537  .addImm(SrcTy.getSizeInBits() - 1);
1538  } else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) {
1539  const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri;
1540  ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1541  .addDef(DefReg)
1542  .addUse(SrcReg)
1543  .addImm(0)
1544  .addImm(SrcTy.getSizeInBits() - 1);
1545  } else {
1546  return false;
1547  }
1548 
1550 
1551  I.eraseFromParent();
1552  return true;
1553  }
1554 
1555  case TargetOpcode::G_SITOFP:
1556  case TargetOpcode::G_UITOFP:
1557  case TargetOpcode::G_FPTOSI:
1558  case TargetOpcode::G_FPTOUI: {
1559  const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1560  SrcTy = MRI.getType(I.getOperand(1).getReg());
1561  const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
1562  if (NewOpc == Opcode)
1563  return false;
1564 
1565  I.setDesc(TII.get(NewOpc));
1567 
1568  return true;
1569  }
1570 
1571 
1572  case TargetOpcode::G_INTTOPTR:
1573  // The importer is currently unable to import pointer types since they
1574  // didn't exist in SelectionDAG.
1575  return selectCopy(I, TII, MRI, TRI, RBI);
1576 
1577  case TargetOpcode::G_BITCAST:
1578  // Imported SelectionDAG rules can handle every bitcast except those that
1579  // bitcast from a type to the same type. Ideally, these shouldn't occur
1580  // but we might not run an optimizer that deletes them.
1581  if (MRI.getType(I.getOperand(0).getReg()) ==
1582  MRI.getType(I.getOperand(1).getReg()))
1583  return selectCopy(I, TII, MRI, TRI, RBI);
1584  return false;
1585 
1586  case TargetOpcode::G_SELECT: {
1587  if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
1588  LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
1589  << ", expected: " << LLT::scalar(1) << '\n');
1590  return false;
1591  }
1592 
1593  const unsigned CondReg = I.getOperand(1).getReg();
1594  const unsigned TReg = I.getOperand(2).getReg();
1595  const unsigned FReg = I.getOperand(3).getReg();
1596 
1597  unsigned CSelOpc = 0;
1598 
1599  if (Ty == LLT::scalar(32)) {
1600  CSelOpc = AArch64::CSELWr;
1601  } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
1602  CSelOpc = AArch64::CSELXr;
1603  } else {
1604  return false;
1605  }
1606 
1607  MachineInstr &TstMI =
1608  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1609  .addDef(AArch64::WZR)
1610  .addUse(CondReg)
1612 
1613  MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
1614  .addDef(I.getOperand(0).getReg())
1615  .addUse(TReg)
1616  .addUse(FReg)
1618 
1620  constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
1621 
1622  I.eraseFromParent();
1623  return true;
1624  }
1625  case TargetOpcode::G_ICMP: {
1626  if (Ty != LLT::scalar(32)) {
1627  LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
1628  << ", expected: " << LLT::scalar(32) << '\n');
1629  return false;
1630  }
1631 
1632  unsigned CmpOpc = 0;
1633  unsigned ZReg = 0;
1634 
1635  LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
1636  if (CmpTy == LLT::scalar(32)) {
1637  CmpOpc = AArch64::SUBSWrr;
1638  ZReg = AArch64::WZR;
1639  } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
1640  CmpOpc = AArch64::SUBSXrr;
1641  ZReg = AArch64::XZR;
1642  } else {
1643  return false;
1644  }
1645 
1646  // CSINC increments the result by one when the condition code is false.
1647  // Therefore, we have to invert the predicate to get an increment by 1 when
1648  // the predicate is true.
1649  const AArch64CC::CondCode invCC =
1650  changeICMPPredToAArch64CC(CmpInst::getInversePredicate(
1652 
1653  MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1654  .addDef(ZReg)
1655  .addUse(I.getOperand(2).getReg())
1656  .addUse(I.getOperand(3).getReg());
1657 
1658  MachineInstr &CSetMI =
1659  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1660  .addDef(I.getOperand(0).getReg())
1661  .addUse(AArch64::WZR)
1662  .addUse(AArch64::WZR)
1663  .addImm(invCC);
1664 
1666  constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
1667 
1668  I.eraseFromParent();
1669  return true;
1670  }
1671 
1672  case TargetOpcode::G_FCMP: {
1673  if (Ty != LLT::scalar(32)) {
1674  LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
1675  << ", expected: " << LLT::scalar(32) << '\n');
1676  return false;
1677  }
1678 
1679  unsigned CmpOpc = 0;
1680  LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
1681  if (CmpTy == LLT::scalar(32)) {
1682  CmpOpc = AArch64::FCMPSrr;
1683  } else if (CmpTy == LLT::scalar(64)) {
1684  CmpOpc = AArch64::FCMPDrr;
1685  } else {
1686  return false;
1687  }
1688 
1689  // FIXME: regbank
1690 
1691  AArch64CC::CondCode CC1, CC2;
1693  (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
1694 
1695  MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1696  .addUse(I.getOperand(2).getReg())
1697  .addUse(I.getOperand(3).getReg());
1698 
1699  const unsigned DefReg = I.getOperand(0).getReg();
1700  unsigned Def1Reg = DefReg;
1701  if (CC2 != AArch64CC::AL)
1702  Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1703 
1704  MachineInstr &CSetMI =
1705  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1706  .addDef(Def1Reg)
1707  .addUse(AArch64::WZR)
1708  .addUse(AArch64::WZR)
1709  .addImm(getInvertedCondCode(CC1));
1710 
1711  if (CC2 != AArch64CC::AL) {
1712  unsigned Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1713  MachineInstr &CSet2MI =
1714  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1715  .addDef(Def2Reg)
1716  .addUse(AArch64::WZR)
1717  .addUse(AArch64::WZR)
1718  .addImm(getInvertedCondCode(CC2));
1719  MachineInstr &OrMI =
1720  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
1721  .addDef(DefReg)
1722  .addUse(Def1Reg)
1723  .addUse(Def2Reg);
1725  constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
1726  }
1727 
1729  constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
1730 
1731  I.eraseFromParent();
1732  return true;
1733  }
1734  case TargetOpcode::G_VASTART:
1735  return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
1736  : selectVaStartAAPCS(I, MF, MRI);
1737  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1738  if (!I.getOperand(0).isIntrinsicID())
1739  return false;
1740  if (I.getOperand(0).getIntrinsicID() != Intrinsic::trap)
1741  return false;
1742  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::BRK))
1743  .addImm(1);
1744  I.eraseFromParent();
1745  return true;
1746  case TargetOpcode::G_IMPLICIT_DEF: {
1747  I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
1748  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1749  const unsigned DstReg = I.getOperand(0).getReg();
1750  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1751  const TargetRegisterClass *DstRC =
1752  getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1753  RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
1754  return true;
1755  }
1756  case TargetOpcode::G_BLOCK_ADDR: {
1757  if (TM.getCodeModel() == CodeModel::Large) {
1758  materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
1759  I.eraseFromParent();
1760  return true;
1761  } else {
1762  I.setDesc(TII.get(AArch64::MOVaddrBA));
1763  auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
1764  I.getOperand(0).getReg())
1765  .addBlockAddress(I.getOperand(1).getBlockAddress(),
1766  /* Offset */ 0, AArch64II::MO_PAGE)
1767  .addBlockAddress(
1768  I.getOperand(1).getBlockAddress(), /* Offset */ 0,
1770  I.eraseFromParent();
1771  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
1772  }
1773  }
1774  case TargetOpcode::G_BUILD_VECTOR:
1775  return selectBuildVector(I, MRI);
1776  case TargetOpcode::G_MERGE_VALUES:
1777  return selectMergeValues(I, MRI);
1778  case TargetOpcode::G_UNMERGE_VALUES:
1779  return selectUnmergeValues(I, MRI);
1780  case TargetOpcode::G_SHUFFLE_VECTOR:
1781  return selectShuffleVector(I, MRI);
1782  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1783  return selectExtractElt(I, MRI);
1784  case TargetOpcode::G_INSERT_VECTOR_ELT:
1785  return selectInsertElt(I, MRI);
1786  case TargetOpcode::G_CONCAT_VECTORS:
1787  return selectConcatVectors(I, MRI);
1788  }
1789 
1790  return false;
1791 }
1792 
1793 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
1794  unsigned EltSize, const TargetRegisterClass *DstRC, unsigned Scalar,
1795  MachineIRBuilder &MIRBuilder) const {
1796  auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
1797 
1798  auto BuildFn = [&](unsigned SubregIndex) {
1799  auto Ins =
1800  MIRBuilder
1801  .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
1802  .addImm(SubregIndex);
1805  return &*Ins;
1806  };
1807 
1808  switch (EltSize) {
1809  case 16:
1810  return BuildFn(AArch64::hsub);
1811  case 32:
1812  return BuildFn(AArch64::ssub);
1813  case 64:
1814  return BuildFn(AArch64::dsub);
1815  default:
1816  return nullptr;
1817  }
1818 }
1819 
1821  MachineInstr &I, MachineRegisterInfo &MRI) const {
1822  assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
1823  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1824  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1825  assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
1826 
1827  // At the moment we only support merging two s32s into an s64.
1828  if (I.getNumOperands() != 3)
1829  return false;
1830  if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
1831  return false;
1832  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
1833  if (RB.getID() != AArch64::GPRRegBankID)
1834  return false;
1835 
1836  auto *DstRC = &AArch64::GPR64RegClass;
1837  unsigned SubToRegDef = MRI.createVirtualRegister(DstRC);
1838  MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1839  TII.get(TargetOpcode::SUBREG_TO_REG))
1840  .addDef(SubToRegDef)
1841  .addImm(0)
1842  .addUse(I.getOperand(1).getReg())
1843  .addImm(AArch64::sub_32);
1844  unsigned SubToRegDef2 = MRI.createVirtualRegister(DstRC);
1845  // Need to anyext the second scalar before we can use bfm
1846  MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1847  TII.get(TargetOpcode::SUBREG_TO_REG))
1848  .addDef(SubToRegDef2)
1849  .addImm(0)
1850  .addUse(I.getOperand(2).getReg())
1851  .addImm(AArch64::sub_32);
1852  MachineInstr &BFM =
1853  *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
1854  .addDef(I.getOperand(0).getReg())
1855  .addUse(SubToRegDef)
1856  .addUse(SubToRegDef2)
1857  .addImm(32)
1858  .addImm(31);
1859  constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
1860  constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
1862  I.eraseFromParent();
1863  return true;
1864 }
1865 
1866 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
1867  const unsigned EltSize) {
1868  // Choose a lane copy opcode and subregister based off of the size of the
1869  // vector's elements.
1870  switch (EltSize) {
1871  case 16:
1872  CopyOpc = AArch64::CPYi16;
1873  ExtractSubReg = AArch64::hsub;
1874  break;
1875  case 32:
1876  CopyOpc = AArch64::CPYi32;
1877  ExtractSubReg = AArch64::ssub;
1878  break;
1879  case 64:
1880  CopyOpc = AArch64::CPYi64;
1881  ExtractSubReg = AArch64::dsub;
1882  break;
1883  default:
1884  // Unknown size, bail out.
1885  LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
1886  return false;
1887  }
1888  return true;
1889 }
1890 
1891 /// Given a register \p Reg, find the value of a constant defining \p Reg.
1892 /// Return true if one could be found, and store it in \p Val. Return false
1893 /// otherwise.
1895  unsigned &Val) {
1896  // Look at the def of the register.
1897  MachineInstr *Def = MRI.getVRegDef(Reg);
1898  if (!Def)
1899  return false;
1900 
1901  // Find the first definition which isn't a copy.
1902  if (Def->isCopy()) {
1903  Reg = Def->getOperand(1).getReg();
1904  auto It = find_if_not(MRI.reg_nodbg_instructions(Reg),
1905  [](const MachineInstr &MI) { return MI.isCopy(); });
1906  if (It == MRI.reg_instr_nodbg_end()) {
1907  LLVM_DEBUG(dbgs() << "Couldn't find non-copy def for register\n");
1908  return false;
1909  }
1910  Def = &*It;
1911  }
1912 
1913  // TODO: Handle opcodes other than G_CONSTANT.
1914  if (Def->getOpcode() != TargetOpcode::G_CONSTANT) {
1915  LLVM_DEBUG(dbgs() << "VRegs defined by anything other than G_CONSTANT "
1916  "currently unsupported.\n");
1917  return false;
1918  }
1919 
1920  // Return the constant value associated with the operand.
1921  Val = Def->getOperand(1).getCImm()->getLimitedValue();
1922  return true;
1923 }
1924 
1925 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
1926  Optional<unsigned> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
1927  unsigned VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
1928  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1929  unsigned CopyOpc = 0;
1930  unsigned ExtractSubReg = 0;
1931  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
1932  LLVM_DEBUG(
1933  dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
1934  return nullptr;
1935  }
1936 
1937  const TargetRegisterClass *DstRC =
1938  getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
1939  if (!DstRC) {
1940  LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
1941  return nullptr;
1942  }
1943 
1944  const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
1945  const LLT &VecTy = MRI.getType(VecReg);
1946  const TargetRegisterClass *VecRC =
1947  getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
1948  if (!VecRC) {
1949  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
1950  return nullptr;
1951  }
1952 
1953  // The register that we're going to copy into.
1954  unsigned InsertReg = VecReg;
1955  if (!DstReg)
1956  DstReg = MRI.createVirtualRegister(DstRC);
1957  // If the lane index is 0, we just use a subregister COPY.
1958  if (LaneIdx == 0) {
1959  auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
1960  .addReg(VecReg, 0, ExtractSubReg);
1961  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
1962  return &*Copy;
1963  }
1964 
1965  // Lane copies require 128-bit wide registers. If we're dealing with an
1966  // unpacked vector, then we need to move up to that width. Insert an implicit
1967  // def and a subregister insert to get us there.
1968  if (VecTy.getSizeInBits() != 128) {
1969  MachineInstr *ScalarToVector = emitScalarToVector(
1970  VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
1971  if (!ScalarToVector)
1972  return nullptr;
1973  InsertReg = ScalarToVector->getOperand(0).getReg();
1974  }
1975 
1976  MachineInstr *LaneCopyMI =
1977  MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
1978  constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
1979 
1980  // Make sure that we actually constrain the initial copy.
1981  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
1982  return LaneCopyMI;
1983 }
1984 
1985 bool AArch64InstructionSelector::selectExtractElt(
1986  MachineInstr &I, MachineRegisterInfo &MRI) const {
1987  assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
1988  "unexpected opcode!");
1989  unsigned DstReg = I.getOperand(0).getReg();
1990  const LLT NarrowTy = MRI.getType(DstReg);
1991  const unsigned SrcReg = I.getOperand(1).getReg();
1992  const LLT WideTy = MRI.getType(SrcReg);
1993  (void)WideTy;
1994  assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
1995  "source register size too small!");
1996  assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
1997 
1998  // Need the lane index to determine the correct copy opcode.
1999  MachineOperand &LaneIdxOp = I.getOperand(2);
2000  assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2001 
2002  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2003  LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2004  return false;
2005  }
2006 
2007  // Find the index to extract from.
2008  unsigned LaneIdx = 0;
2009  if (!getConstantValueForReg(LaneIdxOp.getReg(), MRI, LaneIdx))
2010  return false;
2011 
2012  MachineIRBuilder MIRBuilder(I);
2013 
2014  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2015  MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2016  LaneIdx, MIRBuilder);
2017  if (!Extract)
2018  return false;
2019 
2020  I.eraseFromParent();
2021  return true;
2022 }
2023 
2024 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2025  MachineInstr &I, MachineRegisterInfo &MRI) const {
2026  unsigned NumElts = I.getNumOperands() - 1;
2027  unsigned SrcReg = I.getOperand(NumElts).getReg();
2028  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2029  const LLT SrcTy = MRI.getType(SrcReg);
2030 
2031  assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2032  if (SrcTy.getSizeInBits() > 128) {
2033  LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2034  return false;
2035  }
2036 
2037  MachineIRBuilder MIB(I);
2038 
2039  // We implement a split vector operation by treating the sub-vectors as
2040  // scalars and extracting them.
2041  const RegisterBank &DstRB =
2042  *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2043  for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
2044  unsigned Dst = I.getOperand(OpIdx).getReg();
2045  MachineInstr *Extract =
2046  emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2047  if (!Extract)
2048  return false;
2049  }
2050  I.eraseFromParent();
2051  return true;
2052 }
2053 
2055  MachineInstr &I, MachineRegisterInfo &MRI) const {
2056  assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2057  "unexpected opcode");
2058 
2059  // TODO: Handle unmerging into GPRs and from scalars to scalars.
2060  if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2061  AArch64::FPRRegBankID ||
2062  RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2063  AArch64::FPRRegBankID) {
2064  LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2065  "currently unsupported.\n");
2066  return false;
2067  }
2068 
2069  // The last operand is the vector source register, and every other operand is
2070  // a register to unpack into.
2071  unsigned NumElts = I.getNumOperands() - 1;
2072  unsigned SrcReg = I.getOperand(NumElts).getReg();
2073  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2074  const LLT WideTy = MRI.getType(SrcReg);
2075  (void)WideTy;
2076  assert(WideTy.isVector() && "can only unmerge from vector types!");
2077  assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2078  "source register size too small!");
2079 
2080  if (!NarrowTy.isScalar())
2081  return selectSplitVectorUnmerge(I, MRI);
2082 
2083  MachineIRBuilder MIB(I);
2084 
2085  // Choose a lane copy opcode and subregister based off of the size of the
2086  // vector's elements.
2087  unsigned CopyOpc = 0;
2088  unsigned ExtractSubReg = 0;
2089  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
2090  return false;
2091 
2092  // Set up for the lane copies.
2093  MachineBasicBlock &MBB = *I.getParent();
2094 
2095  // Stores the registers we'll be copying from.
2096  SmallVector<unsigned, 4> InsertRegs;
2097 
2098  // We'll use the first register twice, so we only need NumElts-1 registers.
2099  unsigned NumInsertRegs = NumElts - 1;
2100 
2101  // If our elements fit into exactly 128 bits, then we can copy from the source
2102  // directly. Otherwise, we need to do a bit of setup with some subregister
2103  // inserts.
2104  if (NarrowTy.getSizeInBits() * NumElts == 128) {
2105  InsertRegs = SmallVector<unsigned, 4>(NumInsertRegs, SrcReg);
2106  } else {
2107  // No. We have to perform subregister inserts. For each insert, create an
2108  // implicit def and a subregister insert, and save the register we create.
2109  for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
2110  unsigned ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2111  MachineInstr &ImpDefMI =
2112  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2113  ImpDefReg);
2114 
2115  // Now, create the subregister insert from SrcReg.
2116  unsigned InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2117  MachineInstr &InsMI =
2118  *BuildMI(MBB, I, I.getDebugLoc(),
2119  TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
2120  .addUse(ImpDefReg)
2121  .addUse(SrcReg)
2122  .addImm(AArch64::dsub);
2123 
2124  constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
2126 
2127  // Save the register so that we can copy from it after.
2128  InsertRegs.push_back(InsertReg);
2129  }
2130  }
2131 
2132  // Now that we've created any necessary subregister inserts, we can
2133  // create the copies.
2134  //
2135  // Perform the first copy separately as a subregister copy.
2136  unsigned CopyTo = I.getOperand(0).getReg();
2137  auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
2138  .addReg(InsertRegs[0], 0, ExtractSubReg);
2139  constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
2140 
2141  // Now, perform the remaining copies as vector lane copies.
2142  unsigned LaneIdx = 1;
2143  for (unsigned InsReg : InsertRegs) {
2144  unsigned CopyTo = I.getOperand(LaneIdx).getReg();
2145  MachineInstr &CopyInst =
2146  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
2147  .addUse(InsReg)
2148  .addImm(LaneIdx);
2149  constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
2150  ++LaneIdx;
2151  }
2152 
2153  // Separately constrain the first copy's destination. Because of the
2154  // limitation in constrainOperandRegClass, we can't guarantee that this will
2155  // actually be constrained. So, do it ourselves using the second operand.
2156  const TargetRegisterClass *RC =
2157  MRI.getRegClassOrNull(I.getOperand(1).getReg());
2158  if (!RC) {
2159  LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
2160  return false;
2161  }
2162 
2163  RBI.constrainGenericRegister(CopyTo, *RC, MRI);
2164  I.eraseFromParent();
2165  return true;
2166 }
2167 
2168 bool AArch64InstructionSelector::selectConcatVectors(
2169  MachineInstr &I, MachineRegisterInfo &MRI) const {
2170  assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
2171  "Unexpected opcode");
2172  unsigned Dst = I.getOperand(0).getReg();
2173  unsigned Op1 = I.getOperand(1).getReg();
2174  unsigned Op2 = I.getOperand(2).getReg();
2175  MachineIRBuilder MIRBuilder(I);
2176  MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
2177  if (!ConcatMI)
2178  return false;
2179  I.eraseFromParent();
2180  return true;
2181 }
2182 
2183 void AArch64InstructionSelector::collectShuffleMaskIndices(
2185  SmallVectorImpl<int> &Idxs) const {
2186  MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
2187  assert(
2188  MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
2189  "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
2190  // Find the constant indices.
2191  for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) {
2192  MachineInstr *ScalarDef = MRI.getVRegDef(MaskDef->getOperand(i).getReg());
2193  assert(ScalarDef && "Could not find vreg def of shufflevec index op");
2194  // Look through copies.
2195  while (ScalarDef->getOpcode() == TargetOpcode::COPY) {
2196  ScalarDef = MRI.getVRegDef(ScalarDef->getOperand(1).getReg());
2197  assert(ScalarDef && "Could not find def of copy operand");
2198  }
2199  assert(ScalarDef->getOpcode() == TargetOpcode::G_CONSTANT);
2200  Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
2201  }
2202 }
2203 
2204 unsigned
2205 AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
2206  MachineFunction &MF) const {
2207  Type *CPTy = CPVal->getType()->getPointerTo();
2208  unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
2209  if (Align == 0)
2210  Align = MF.getDataLayout().getTypeAllocSize(CPTy);
2211 
2213  return MCP->getConstantPoolIndex(CPVal, Align);
2214 }
2215 
2216 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
2217  Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
2218  unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
2219 
2220  auto Adrp =
2221  MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
2222  .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
2223 
2224  MachineInstr *LoadMI = nullptr;
2225  switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
2226  case 16:
2227  LoadMI =
2228  &*MIRBuilder
2229  .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
2230  .addConstantPoolIndex(CPIdx, 0,
2232  break;
2233  case 8:
2234  LoadMI = &*MIRBuilder
2235  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
2236  .addConstantPoolIndex(
2238  break;
2239  default:
2240  LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
2241  << *CPVal->getType());
2242  return nullptr;
2243  }
2245  constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
2246  return LoadMI;
2247 }
2248 
2249 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
2250 /// size and RB.
2251 static std::pair<unsigned, unsigned>
2252 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
2253  unsigned Opc, SubregIdx;
2254  if (RB.getID() == AArch64::GPRRegBankID) {
2255  if (EltSize == 32) {
2256  Opc = AArch64::INSvi32gpr;
2257  SubregIdx = AArch64::ssub;
2258  } else if (EltSize == 64) {
2259  Opc = AArch64::INSvi64gpr;
2260  SubregIdx = AArch64::dsub;
2261  } else {
2262  llvm_unreachable("invalid elt size!");
2263  }
2264  } else {
2265  if (EltSize == 8) {
2266  Opc = AArch64::INSvi8lane;
2267  SubregIdx = AArch64::bsub;
2268  } else if (EltSize == 16) {
2269  Opc = AArch64::INSvi16lane;
2270  SubregIdx = AArch64::hsub;
2271  } else if (EltSize == 32) {
2272  Opc = AArch64::INSvi32lane;
2273  SubregIdx = AArch64::ssub;
2274  } else if (EltSize == 64) {
2275  Opc = AArch64::INSvi64lane;
2276  SubregIdx = AArch64::dsub;
2277  } else {
2278  llvm_unreachable("invalid elt size!");
2279  }
2280  }
2281  return std::make_pair(Opc, SubregIdx);
2282 }
2283 
2284 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
2285  Optional<unsigned> Dst, unsigned Op1, unsigned Op2,
2286  MachineIRBuilder &MIRBuilder) const {
2287  // We implement a vector concat by:
2288  // 1. Use scalar_to_vector to insert the lower vector into the larger dest
2289  // 2. Insert the upper vector into the destination's upper element
2290  // TODO: some of this code is common with G_BUILD_VECTOR handling.
2291  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
2292 
2293  const LLT Op1Ty = MRI.getType(Op1);
2294  const LLT Op2Ty = MRI.getType(Op2);
2295 
2296  if (Op1Ty != Op2Ty) {
2297  LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
2298  return nullptr;
2299  }
2300  assert(Op1Ty.isVector() && "Expected a vector for vector concat");
2301 
2302  if (Op1Ty.getSizeInBits() >= 128) {
2303  LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
2304  return nullptr;
2305  }
2306 
2307  // At the moment we just support 64 bit vector concats.
2308  if (Op1Ty.getSizeInBits() != 64) {
2309  LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
2310  return nullptr;
2311  }
2312 
2313  const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
2314  const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
2315  const TargetRegisterClass *DstRC =
2316  getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
2317 
2318  MachineInstr *WidenedOp1 =
2319  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
2320  MachineInstr *WidenedOp2 =
2321  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
2322  if (!WidenedOp1 || !WidenedOp2) {
2323  LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
2324  return nullptr;
2325  }
2326 
2327  // Now do the insert of the upper element.
2328  unsigned InsertOpc, InsSubRegIdx;
2329  std::tie(InsertOpc, InsSubRegIdx) =
2330  getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
2331 
2332  if (!Dst)
2333  Dst = MRI.createVirtualRegister(DstRC);
2334  auto InsElt =
2335  MIRBuilder
2336  .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
2337  .addImm(1) /* Lane index */
2338  .addUse(WidenedOp2->getOperand(0).getReg())
2339  .addImm(0);
2340  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
2341  return &*InsElt;
2342 }
2343 
2344 bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
2345  // Try to match a vector splat operation into a dup instruction.
2346  // We're looking for this pattern:
2347  // %scalar:gpr(s64) = COPY $x0
2348  // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
2349  // %cst0:gpr(s32) = G_CONSTANT i32 0
2350  // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
2351  // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
2352  // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
2353  // %zerovec(<2 x s32>)
2354  //
2355  // ...into:
2356  // %splat = DUP %scalar
2357  // We use the regbank of the scalar to determine which kind of dup to use.
2358  MachineIRBuilder MIB(I);
2359  MachineRegisterInfo &MRI = *MIB.getMRI();
2361  using namespace TargetOpcode;
2362  using namespace MIPatternMatch;
2363 
2364  // Begin matching the insert.
2365  auto *InsMI =
2366  findMIFromReg(I.getOperand(1).getReg(), G_INSERT_VECTOR_ELT, MIB);
2367  if (!InsMI)
2368  return false;
2369  // Match the undef vector operand.
2370  auto *UndefMI =
2371  findMIFromReg(InsMI->getOperand(1).getReg(), G_IMPLICIT_DEF, MIB);
2372  if (!UndefMI)
2373  return false;
2374  // Match the scalar being splatted.
2375  unsigned ScalarReg = InsMI->getOperand(2).getReg();
2376  const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
2377  // Match the index constant 0.
2378  int64_t Index = 0;
2379  if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
2380  return false;
2381 
2382  // The shuffle's second operand doesn't matter if the mask is all zero.
2383  auto *ZeroVec = findMIFromReg(I.getOperand(3).getReg(), G_BUILD_VECTOR, MIB);
2384  if (!ZeroVec)
2385  return false;
2386  int64_t Zero = 0;
2387  if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
2388  return false;
2389  for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) {
2390  if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
2391  return false; // This wasn't an all zeros vector.
2392  }
2393 
2394  // We're done, now find out what kind of splat we need.
2395  LLT VecTy = MRI.getType(I.getOperand(0).getReg());
2396  LLT EltTy = VecTy.getElementType();
2397  if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
2398  LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
2399  return false;
2400  }
2401  bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
2402  static const unsigned OpcTable[2][2] = {
2403  {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
2404  {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
2405  unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
2406 
2407  // For FP splats, we need to widen the scalar reg via undef too.
2408  if (IsFP) {
2409  MachineInstr *Widen = emitScalarToVector(
2410  EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
2411  if (!Widen)
2412  return false;
2413  ScalarReg = Widen->getOperand(0).getReg();
2414  }
2415  auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
2416  if (IsFP)
2417  Dup.addImm(0);
2418  constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
2419  I.eraseFromParent();
2420  return true;
2421 }
2422 
2423 bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
2424  if (TM.getOptLevel() == CodeGenOpt::None)
2425  return false;
2426  if (tryOptVectorDup(I))
2427  return true;
2428  return false;
2429 }
2430 
2431 bool AArch64InstructionSelector::selectShuffleVector(
2432  MachineInstr &I, MachineRegisterInfo &MRI) const {
2433  if (tryOptVectorShuffle(I))
2434  return true;
2435  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2436  unsigned Src1Reg = I.getOperand(1).getReg();
2437  const LLT Src1Ty = MRI.getType(Src1Reg);
2438  unsigned Src2Reg = I.getOperand(2).getReg();
2439  const LLT Src2Ty = MRI.getType(Src2Reg);
2440 
2441  MachineBasicBlock &MBB = *I.getParent();
2442  MachineFunction &MF = *MBB.getParent();
2443  LLVMContext &Ctx = MF.getFunction().getContext();
2444 
2445  // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
2446  // operand, it comes in as a normal vector value which we have to analyze to
2447  // find the mask indices.
2449  collectShuffleMaskIndices(I, MRI, Mask);
2450  assert(!Mask.empty() && "Expected to find mask indices");
2451 
2452  // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
2453  // it's originated from a <1 x T> type. Those should have been lowered into
2454  // G_BUILD_VECTOR earlier.
2455  if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
2456  LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
2457  return false;
2458  }
2459 
2460  unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
2461 
2463  for (int Val : Mask) {
2464  for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
2465  unsigned Offset = Byte + Val * BytesPerElt;
2466  CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
2467  }
2468  }
2469 
2470  MachineIRBuilder MIRBuilder(I);
2471 
2472  // Use a constant pool to load the index vector for TBL.
2473  Constant *CPVal = ConstantVector::get(CstIdxs);
2474  MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
2475  if (!IndexLoad) {
2476  LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
2477  return false;
2478  }
2479 
2480  if (DstTy.getSizeInBits() != 128) {
2481  assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
2482  // This case can be done with TBL1.
2483  MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
2484  if (!Concat) {
2485  LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
2486  return false;
2487  }
2488 
2489  // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
2490  IndexLoad =
2491  emitScalarToVector(64, &AArch64::FPR128RegClass,
2492  IndexLoad->getOperand(0).getReg(), MIRBuilder);
2493 
2494  auto TBL1 = MIRBuilder.buildInstr(
2495  AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
2496  {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
2498 
2499  auto Copy =
2500  MIRBuilder
2501  .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2502  .addReg(TBL1.getReg(0), 0, AArch64::dsub);
2503  RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
2504  I.eraseFromParent();
2505  return true;
2506  }
2507 
2508  // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
2509  // Q registers for regalloc.
2510  auto RegSeq = MIRBuilder
2511  .buildInstr(TargetOpcode::REG_SEQUENCE,
2512  {&AArch64::QQRegClass}, {Src1Reg})
2513  .addImm(AArch64::qsub0)
2514  .addUse(Src2Reg)
2515  .addImm(AArch64::qsub1);
2516 
2517  auto TBL2 =
2518  MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
2519  {RegSeq, IndexLoad->getOperand(0).getReg()});
2520  constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
2522  I.eraseFromParent();
2523  return true;
2524 }
2525 
2526 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
2527  Optional<unsigned> DstReg, unsigned SrcReg, unsigned EltReg,
2528  unsigned LaneIdx, const RegisterBank &RB,
2529  MachineIRBuilder &MIRBuilder) const {
2530  MachineInstr *InsElt = nullptr;
2531  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
2532  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2533 
2534  // Create a register to define with the insert if one wasn't passed in.
2535  if (!DstReg)
2536  DstReg = MRI.createVirtualRegister(DstRC);
2537 
2538  unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
2539  unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
2540 
2541  if (RB.getID() == AArch64::FPRRegBankID) {
2542  auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
2543  InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
2544  .addImm(LaneIdx)
2545  .addUse(InsSub->getOperand(0).getReg())
2546  .addImm(0);
2547  } else {
2548  InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
2549  .addImm(LaneIdx)
2550  .addUse(EltReg);
2551  }
2552 
2553  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
2554  return InsElt;
2555 }
2556 
2557 bool AArch64InstructionSelector::selectInsertElt(
2558  MachineInstr &I, MachineRegisterInfo &MRI) const {
2559  assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
2560 
2561  // Get information on the destination.
2562  unsigned DstReg = I.getOperand(0).getReg();
2563  const LLT DstTy = MRI.getType(DstReg);
2564  if (DstTy.getSizeInBits() < 128) {
2565  // TODO: Handle unpacked vectors.
2566  LLVM_DEBUG(dbgs() << "Unpacked vectors not supported yet!");
2567  return false;
2568  }
2569 
2570  // Get information on the element we want to insert into the destination.
2571  unsigned EltReg = I.getOperand(2).getReg();
2572  const LLT EltTy = MRI.getType(EltReg);
2573  unsigned EltSize = EltTy.getSizeInBits();
2574  if (EltSize < 16 || EltSize > 64)
2575  return false; // Don't support all element types yet.
2576 
2577  // Find the definition of the index. Bail out if it's not defined by a
2578  // G_CONSTANT.
2579  unsigned IdxReg = I.getOperand(3).getReg();
2580  unsigned LaneIdx = 0;
2581  if (!getConstantValueForReg(IdxReg, MRI, LaneIdx))
2582  return false;
2583 
2584  // Perform the lane insert.
2585  unsigned SrcReg = I.getOperand(1).getReg();
2586  const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
2587  MachineIRBuilder MIRBuilder(I);
2588  emitLaneInsert(DstReg, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
2589  I.eraseFromParent();
2590  return true;
2591 }
2592 
2593 bool AArch64InstructionSelector::selectBuildVector(
2594  MachineInstr &I, MachineRegisterInfo &MRI) const {
2595  assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
2596  // Until we port more of the optimized selections, for now just use a vector
2597  // insert sequence.
2598  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2599  const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
2600  unsigned EltSize = EltTy.getSizeInBits();
2601  if (EltSize < 16 || EltSize > 64)
2602  return false; // Don't support all element types yet.
2603  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2604  MachineIRBuilder MIRBuilder(I);
2605 
2606  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
2607  MachineInstr *ScalarToVec =
2608  emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
2609  I.getOperand(1).getReg(), MIRBuilder);
2610  if (!ScalarToVec)
2611  return false;
2612 
2613  unsigned DstVec = ScalarToVec->getOperand(0).getReg();
2614  unsigned DstSize = DstTy.getSizeInBits();
2615 
2616  // Keep track of the last MI we inserted. Later on, we might be able to save
2617  // a copy using it.
2618  MachineInstr *PrevMI = nullptr;
2619  for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
2620  // Note that if we don't do a subregister copy, we can end up making an
2621  // extra register.
2622  PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
2623  MIRBuilder);
2624  DstVec = PrevMI->getOperand(0).getReg();
2625  }
2626 
2627  // If DstTy's size in bits is less than 128, then emit a subregister copy
2628  // from DstVec to the last register we've defined.
2629  if (DstSize < 128) {
2630  // Force this to be FPR using the destination vector.
2631  const TargetRegisterClass *RC =
2632  getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
2633  if (!RC)
2634  return false;
2635  if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
2636  LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
2637  return false;
2638  }
2639 
2640  unsigned SubReg = 0;
2641  if (!getSubRegForClass(RC, TRI, SubReg))
2642  return false;
2643  if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
2644  LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
2645  << "\n");
2646  return false;
2647  }
2648 
2649  unsigned Reg = MRI.createVirtualRegister(RC);
2650  unsigned DstReg = I.getOperand(0).getReg();
2651 
2652  MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2653  .addReg(DstVec, 0, SubReg);
2654  MachineOperand &RegOp = I.getOperand(1);
2655  RegOp.setReg(Reg);
2656  RBI.constrainGenericRegister(DstReg, *RC, MRI);
2657  } else {
2658  // We don't need a subregister copy. Save a copy by re-using the
2659  // destination register on the final insert.
2660  assert(PrevMI && "PrevMI was null?");
2661  PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
2662  constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
2663  }
2664 
2665  I.eraseFromParent();
2666  return true;
2667 }
2668 
2669 /// SelectArithImmed - Select an immediate value that can be represented as
2670 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
2671 /// Val set to the 12-bit value and Shift set to the shifter operand.
2673 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
2674  MachineInstr &MI = *Root.getParent();
2675  MachineBasicBlock &MBB = *MI.getParent();
2676  MachineFunction &MF = *MBB.getParent();
2677  MachineRegisterInfo &MRI = MF.getRegInfo();
2678 
2679  // This function is called from the addsub_shifted_imm ComplexPattern,
2680  // which lists [imm] as the list of opcode it's interested in, however
2681  // we still need to check whether the operand is actually an immediate
2682  // here because the ComplexPattern opcode list is only used in
2683  // root-level opcode matching.
2684  uint64_t Immed;
2685  if (Root.isImm())
2686  Immed = Root.getImm();
2687  else if (Root.isCImm())
2688  Immed = Root.getCImm()->getZExtValue();
2689  else if (Root.isReg()) {
2690  MachineInstr *Def = MRI.getVRegDef(Root.getReg());
2691  if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
2692  return None;
2693  MachineOperand &Op1 = Def->getOperand(1);
2694  if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
2695  return None;
2696  Immed = Op1.getCImm()->getZExtValue();
2697  } else
2698  return None;
2699 
2700  unsigned ShiftAmt;
2701 
2702  if (Immed >> 12 == 0) {
2703  ShiftAmt = 0;
2704  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
2705  ShiftAmt = 12;
2706  Immed = Immed >> 12;
2707  } else
2708  return None;
2709 
2710  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
2711  return {{
2712  [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
2713  [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
2714  }};
2715 }
2716 
2717 /// Select a "register plus unscaled signed 9-bit immediate" address. This
2718 /// should only match when there is an offset that is not valid for a scaled
2719 /// immediate addressing mode. The "Size" argument is the size in bytes of the
2720 /// memory reference, which is needed here to know what is valid for a scaled
2721 /// immediate.
2723 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
2724  unsigned Size) const {
2725  MachineRegisterInfo &MRI =
2726  Root.getParent()->getParent()->getParent()->getRegInfo();
2727 
2728  if (!Root.isReg())
2729  return None;
2730 
2731  if (!isBaseWithConstantOffset(Root, MRI))
2732  return None;
2733 
2734  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
2735  if (!RootDef)
2736  return None;
2737 
2738  MachineOperand &OffImm = RootDef->getOperand(2);
2739  if (!OffImm.isReg())
2740  return None;
2741  MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
2742  if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
2743  return None;
2744  int64_t RHSC;
2745  MachineOperand &RHSOp1 = RHS->getOperand(1);
2746  if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
2747  return None;
2748  RHSC = RHSOp1.getCImm()->getSExtValue();
2749 
2750  // If the offset is valid as a scaled immediate, don't match here.
2751  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
2752  return None;
2753  if (RHSC >= -256 && RHSC < 256) {
2754  MachineOperand &Base = RootDef->getOperand(1);
2755  return {{
2756  [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
2757  [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
2758  }};
2759  }
2760  return None;
2761 }
2762 
2763 /// Select a "register plus scaled unsigned 12-bit immediate" address. The
2764 /// "Size" argument is the size in bytes of the memory reference, which
2765 /// determines the scale.
2767 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
2768  unsigned Size) const {
2769  MachineRegisterInfo &MRI =
2770  Root.getParent()->getParent()->getParent()->getRegInfo();
2771 
2772  if (!Root.isReg())
2773  return None;
2774 
2775  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
2776  if (!RootDef)
2777  return None;
2778 
2779  if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
2780  return {{
2781  [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
2782  [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
2783  }};
2784  }
2785 
2786  if (isBaseWithConstantOffset(Root, MRI)) {
2787  MachineOperand &LHS = RootDef->getOperand(1);
2788  MachineOperand &RHS = RootDef->getOperand(2);
2789  MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
2790  MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
2791  if (LHSDef && RHSDef) {
2792  int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
2793  unsigned Scale = Log2_32(Size);
2794  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
2795  if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
2796  return {{
2797  [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
2798  [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
2799  }};
2800 
2801  return {{
2802  [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
2803  [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
2804  }};
2805  }
2806  }
2807  }
2808 
2809  // Before falling back to our general case, check if the unscaled
2810  // instructions can handle this. If so, that's preferable.
2811  if (selectAddrModeUnscaled(Root, Size).hasValue())
2812  return None;
2813 
2814  return {{
2815  [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
2816  [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
2817  }};
2818 }
2819 
2820 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
2821  const MachineInstr &MI) const {
2822  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2823  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
2825  assert(CstVal && "Expected constant value");
2826  MIB.addImm(CstVal.getValue());
2827 }
2828 
2829 namespace llvm {
2832  AArch64Subtarget &Subtarget,
2833  AArch64RegisterBankInfo &RBI) {
2834  return new AArch64InstructionSelector(TM, Subtarget, RBI);
2835 }
2836 }
const NoneType None
Definition: None.h:23
iterator_range< reg_instr_nodbg_iterator > reg_nodbg_instructions(unsigned Reg) const
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:645
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address...
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
MachineBasicBlock * getMBB() const
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB...
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
void setTargetFlags(unsigned F)
void push_back(const T &Elt)
Definition: SmallVector.h:211
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:382
bool isScalar() const
static CondCode getInvertedCondCode(CondCode Code)
unsigned getReg() const
getReg - Returns the register number.
unsigned Reg
static uint64_t selectImpl(uint64_t CandidateMask, uint64_t &NextInSequenceMask)
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:320
static uint32_t Concat[]
unsigned const TargetRegisterInfo * TRI
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address...
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:458
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
unsigned getBitWidth() const
getBitWidth - Return the bitwidth of this constant.
Definition: Constants.h:142
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address...
This file declares the targeting of the RegisterBankInfo class for AArch64.
bool isIntrinsicID() const
bool isVector() const
Holds all the information related to register banks.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:651
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned SubReg
static StringRef getName(Value *V)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
bool mi_match(Reg R, MachineRegisterInfo &MRI, Pattern &&P)
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
static int getID(struct InternalInstruction *insn, const void *miiArg)
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
const RegClassOrRegBank & getRegClassOrRegBank(unsigned Reg) const
Return the register bank or register class of Reg.
static bool isStore(int Opcode)
MachineFunction & getMF()
Getter for the function we currently build.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:968
#define EQ(a, b)
Definition: regexec.c:112
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
TargetInstrInfo - Interface to description of machine instruction set.
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV)...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
#define P(N)
MachineRegisterInfo * getMRI()
Getter for MRI.
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:630
const TargetRegisterInfo * getTargetRegisterInfo() const
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
static bool getConstantValueForReg(unsigned Reg, MachineRegisterInfo &MRI, unsigned &Val)
Given a register Reg, find the value of a constant defining Reg.
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address...
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
unsigned const MachineRegisterInfo * MRI
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
This is an important base class in LLVM.
Definition: Constant.h:41
const GlobalValue * getGlobal() const
Helper class to build MachineInstr.
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:759
bool isExactlyValue(double V) const
We don&#39;t rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1129
bool isValid() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:646
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
self_iterator getIterator()
Definition: ilist_node.h:81
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineInstrBuilder & addFrameIndex(int Idx) const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:192
auto find_if_not(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Definition: STLExtras.h:1218
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:141
bool isCopy() const
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:50
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
#define GET_GLOBALISEL_TEMPORARIES_INIT
const APFloat & getValueAPF() const
Definition: Constants.h:302
unsigned createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:250
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
BlockVerifier::State From
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:533
RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
MachineOperand class - Representation of each machine instruction operand.
Intrinsic::ID getIntrinsicID() const
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:87
This class implements the register bank concept.
Definition: RegisterBank.h:28
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
This file declares the MachineIRBuilder class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function that verifies that we have a valid copy at the end of selectCopy. ...
Optional< int64_t > getConstantVRegVal(unsigned VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:184
bool isPointer() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:461
Provides the logic to select generic machine instructions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, unsigned SrcReg, const TargetRegisterClass *From, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
This class provides the information for the target register banks.
static reg_instr_nodbg_iterator reg_instr_nodbg_end()
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
ConstantMatch m_ICst(int64_t &Cst)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &)
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
int64_t getOffset() const
Return the offset from the symbol in this operand.
const BlockAddress * getBlockAddress() const
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page...
void setSubReg(unsigned subReg)
static const TargetRegisterClass * constrainGenericRegister(unsigned Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
#define GET_GLOBALISEL_PREDICATES_INIT
uint32_t Size
Definition: Profile.cpp:46
const DataLayout & getDataLayout() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
T get() const
Returns the value of the specified pointer type.
Definition: PointerUnion.h:134
const TargetRegisterClass * getRegClassOrNull(unsigned Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void changeFCMPPredToAArch64CC(CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel...
Definition: TargetOpcodes.h:30
LLVM Value Representation.
Definition: Value.h:72
unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
void ChangeToFrameIndex(int Idx)
Replace this operand with a frame index.
IRTranslator LLVM IR MI
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow...
APInt bitcastToAPInt() const
Definition: APFloat.h:1093
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:156
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
const ConstantInt * getCImm() const
#define DEBUG_TYPE
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:47
A discriminated union of two pointer types, with the discriminator in the low bit of the pointer...
Definition: PointerUnion.h:86
unsigned getPredicate() const